{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "f1", "beta1": 0.9, "beta2": 0.98, "buffer_size": 512, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "VirusEC4", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/VirusEC4/protein/multi_label/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 2560, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "matrix", "intermediate_size": 4096, "label_filepath": "../dataset/VirusEC4/protein/multi_label/label.txt", "label_size": 70, "label_type": "VirusEC4", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": null, "llm_step": "3B", "llm_task_level": "token_level,span_level,seq_level,structure_level", "llm_time_str": null, "llm_type": "esm", "llm_version": "esm2", "lmdb_path": null, "local_rank": -1, "log_dir": "../logs/VirusEC4/protein/multi_label/luca_base/matrix/20250501135241", "logging_steps": 1000, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": true, "matrix_dirpath": "../matrices/VirusEC4/protein/multi_label/luca_base/esm2/esm//3B", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "non_ignore": true, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 0, "num_hidden_layers": 0, "num_train_epochs": 50, "output_dir": "../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135241", "output_mode": "multi_label", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 1.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "null", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "multi_label", "tb_log_dir": "../tb-logs/VirusEC4/protein/multi_label/luca_base/matrix/20250501135241", "test_data_dir": "../dataset/VirusEC4/protein/multi_label/test/", "time_str": "20250501135250", "train_data_dir": "../dataset/VirusEC4/protein/multi_label/train/", "trunc_type": "right", "vector_dirpath": "../vectors/VirusEC4/protein/multi_label/luca_base/esm2/esm//3B", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 200, "weight": [1749.35, 1715.049019607843, 1666.047619047619, 1521.1739130434783, 1445.7438016528927, 1356.0852713178294, 1305.4850746268658, 1258.525179856115, 1240.6737588652481, 1150.8881578947369, 993.9488636363636, 920.7105263157895, 874.675, 813.6511627906976, 777.4888888888889, 774.0486725663717, 763.9082969432314, 650.3159851301115, 631.5342960288808, 629.2625899280575, 620.3368794326241, 615.9683098591549, 585.0668896321071, 567.9707792207793, 544.9688473520249, 539.9228395061729, 519.0949554896142, 510.01457725947523, 484.584487534626, 366.74004192872115, 354.1194331983806, 343.0098039215686, 331.3162878787879, 328.8251879699248, 328.2082551594747, 314.0664272890485, 312.38392857142856, 199.01592718998862, 186.49786780383795, 181.84511434511435, 172.01081612586037, 170.66829268292682, 156.05263157894737, 149.0076660988075, 134.4619523443505, 128.62867647058823, 127.68978102189782, 127.4107793153678, 127.0406681190995, 100.07723112128147, 94.0510752688172, 80.20861989912883, 78.0611334225792, 74.85451433461704, 65.00743218134522, 63.7983223924143, 44.866632469864065, 42.24462690171456, 41.661109788044776, 29.93924353927777, 27.193377895227734, 27.004476690336524, 25.790210821170575, 22.048777413662716, 21.104475811316203, 17.876047414674023, 10.421482187537233, 9.041970331317517, 17.4935, 7.06893764900796], "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,embedding_matrix ################################################## Encoder Config: {'llm_type': 'esm', 'llm_version': 'esm2', 'llm_step': '3B', 'llm_dirpath': None, 'input_type': 'matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/VirusEC4/protein/multi_label/luca_base/esm2/esm//3B', 'matrix_dirpath': '../matrices/VirusEC4/protein/multi_label/luca_base/esm2/esm//3B', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': True, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 2560, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "id2label": {}, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "label2id": {}, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4098, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "num_attention_heads": 8, "num_hidden_layers": 4, "pad_token_id": 0, "pos_weight": 1.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": null, "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.29.0", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39, "weight": [ 1749.35, 1715.049019607843, 1666.047619047619, 1521.1739130434783, 1445.7438016528927, 1356.0852713178294, 1305.4850746268658, 1258.525179856115, 1240.6737588652481, 1150.8881578947369, 993.9488636363636, 920.7105263157895, 874.675, 813.6511627906976, 777.4888888888889, 774.0486725663717, 763.9082969432314, 650.3159851301115, 631.5342960288808, 629.2625899280575, 620.3368794326241, 615.9683098591549, 585.0668896321071, 567.9707792207793, 544.9688473520249, 539.9228395061729, 519.0949554896142, 510.01457725947523, 484.584487534626, 366.74004192872115, 354.1194331983806, 343.0098039215686, 331.3162878787879, 328.8251879699248, 328.2082551594747, 314.0664272890485, 312.38392857142856, 199.01592718998862, 186.49786780383795, 181.84511434511435, 172.01081612586037, 170.66829268292682, 156.05263157894737, 149.0076660988075, 134.4619523443505, 128.62867647058823, 127.68978102189782, 127.4107793153678, 127.0406681190995, 100.07723112128147, 94.0510752688172, 80.20861989912883, 78.0611334225792, 74.85451433461704, 65.00743218134522, 63.7983223924143, 44.866632469864065, 42.24462690171456, 41.661109788044776, 29.93924353927777, 27.193377895227734, 27.004476690336524, 25.790210821170575, 22.048777413662716, 21.104475811316203, 17.876047414674023, 10.421482187537233, 9.041970331317517, 17.4935, 7.06893764900796 ] } ################################################## Mode Architecture: LucaBase( (matrix_pooler): GlobalMaskValueAttentionPooling1D (2560 -> 2560) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=2560, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=128, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=70, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 20014150 ################################################## {"total_num": "19.090000M", "total_size": "76.350000MB", "param_sum": "19.090000M", "param_size": "76.350000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "19.086981M", "trainable_size": "76.347923MB"} ################################################## Train dataset len: 79578, batch size: 16, batch num: 4974 Train dataset t_total: 248700, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 79578 Train Dataset Num Epochs = 50 Logging Steps = 1000 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 248700 ################################################## Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.05653681, Cur Avg Loss: 0.17099434, Log Avg loss: 0.17099434, Global Avg Loss: 0.17099434, Time: 0.3344 Steps: 1000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.03046256, Cur Avg Loss: 0.10135779, Log Avg loss: 0.03172124, Global Avg Loss: 0.10135779, Time: 0.2153 Steps: 2000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.01379222, Cur Avg Loss: 0.07373613, Log Avg loss: 0.01849281, Global Avg Loss: 0.07373613, Time: 0.3590 Steps: 3000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00233068, Cur Avg Loss: 0.05808966, Log Avg loss: 0.01115025, Global Avg Loss: 0.05808966, Time: 0.2869 Steps: 4000, Updated lr: 0.000098 ***** Running evaluation checkpoint-4974 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-4974 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1477.890478, Avg time per batch (s): 0.300000 {"eval_avg_loss": 0.005821, "eval_total_loss": 6.20482, "eval_acc": 0.998126, "eval_jaccard": 0.894979, "eval_prec": 0.897967, "eval_recall": 0.901057, "eval_f1": 0.898488, "eval_pr_auc": 0.971681, "eval_roc_auc": 0.996632, "eval_fmax": 0.966157, "eval_pmax": 0.978761, "eval_rmax": 0.953874, "eval_tmax": 0.16, "update_flag": true, "test_avg_loss": 0.005963, "test_total_loss": 6.356565, "test_acc": 0.998077, "test_jaccard": 0.890004, "test_prec": 0.892986, "test_recall": 0.896366, "test_f1": 0.893654, "test_pr_auc": 0.971523, "test_roc_auc": 0.996521, "test_fmax": 0.965291, "test_pmax": 0.977642, "test_rmax": 0.953248, "test_tmax": 0.16, "lr": 9.807887323943662e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.04821053509928408, "train_cur_epoch_loss": 239.79920158383902, "train_cur_epoch_avg_loss": 0.04821053509928408, "train_cur_epoch_time": 1477.8904783725739, "train_cur_epoch_avg_time": 0.2971231359816192, "epoch": 1, "step": 4974} ################################################## Training, Epoch: 0002, Batch: 000026, Sample Num: 416, Cur Loss: 0.00323801, Cur Avg Loss: 0.00452559, Log Avg loss: 0.00755822, Global Avg Loss: 0.04798337, Time: 0.1577 Steps: 5000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001026, Sample Num: 16416, Cur Loss: 0.00261159, Cur Avg Loss: 0.00557665, Log Avg loss: 0.00560397, Global Avg Loss: 0.04092014, Time: 0.2165 Steps: 6000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 002026, Sample Num: 32416, Cur Loss: 0.00058956, Cur Avg Loss: 0.00474138, Log Avg loss: 0.00388440, Global Avg Loss: 0.03562932, Time: 0.2263 Steps: 7000, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003026, Sample Num: 48416, Cur Loss: 0.00124521, Cur Avg Loss: 0.00424614, Log Avg loss: 0.00324280, Global Avg Loss: 0.03158100, Time: 0.2224 Steps: 8000, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004026, Sample Num: 64416, Cur Loss: 0.00021337, Cur Avg Loss: 0.00384908, Log Avg loss: 0.00264757, Global Avg Loss: 0.02836618, Time: 0.2212 Steps: 9000, Updated lr: 0.000096 ***** Running evaluation checkpoint-9948 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-9948 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1338.096723, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.002097, "eval_total_loss": 2.235681, "eval_acc": 0.999468, "eval_jaccard": 0.979742, "eval_prec": 0.983248, "eval_recall": 0.982641, "eval_f1": 0.982132, "eval_pr_auc": 0.995365, "eval_roc_auc": 0.999098, "eval_fmax": 0.991627, "eval_pmax": 0.993646, "eval_rmax": 0.989616, "eval_tmax": 0.21, "update_flag": true, "test_avg_loss": 0.00215, "test_total_loss": 2.292159, "test_acc": 0.999471, "test_jaccard": 0.9784, "test_prec": 0.981863, "test_recall": 0.98158, "test_f1": 0.980927, "test_pr_auc": 0.995162, "test_roc_auc": 0.999039, "test_fmax": 0.990882, "test_pmax": 0.992999, "test_rmax": 0.988775, "test_tmax": 0.21, "lr": 9.607726358148895e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.025873281402417275, "train_cur_epoch_loss": 17.58820180740804, "train_cur_epoch_avg_loss": 0.0035360277055504707, "train_cur_epoch_time": 1338.0967230796814, "train_cur_epoch_avg_time": 0.2690182394611342, "epoch": 2, "step": 9948} ################################################## Training, Epoch: 0003, Batch: 000052, Sample Num: 832, Cur Loss: 0.00015347, Cur Avg Loss: 0.00192787, Log Avg loss: 0.00219205, Global Avg Loss: 0.02574877, Time: 0.4527 Steps: 10000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001052, Sample Num: 16832, Cur Loss: 0.00102574, Cur Avg Loss: 0.00196322, Log Avg loss: 0.00196506, Global Avg Loss: 0.02358661, Time: 0.6319 Steps: 11000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 002052, Sample Num: 32832, Cur Loss: 0.00064618, Cur Avg Loss: 0.00170189, Log Avg loss: 0.00142696, Global Avg Loss: 0.02173997, Time: 0.2916 Steps: 12000, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003052, Sample Num: 48832, Cur Loss: 0.00019276, Cur Avg Loss: 0.00160336, Log Avg loss: 0.00140119, Global Avg Loss: 0.02017545, Time: 0.2166 Steps: 13000, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004052, Sample Num: 64832, Cur Loss: 0.00553259, Cur Avg Loss: 0.00150148, Log Avg loss: 0.00119054, Global Avg Loss: 0.01881939, Time: 0.3272 Steps: 14000, Updated lr: 0.000094 ***** Running evaluation checkpoint-14922 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-14922 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1318.922807, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001322, "eval_total_loss": 1.409535, "eval_acc": 0.999676, "eval_jaccard": 0.988986, "eval_prec": 0.992218, "eval_recall": 0.990427, "eval_f1": 0.990695, "eval_pr_auc": 0.996939, "eval_roc_auc": 0.999458, "eval_fmax": 0.995402, "eval_pmax": 0.997915, "eval_rmax": 0.9929, "eval_tmax": 0.27, "update_flag": true, "test_avg_loss": 0.0014, "test_total_loss": 1.492561, "test_acc": 0.999682, "test_jaccard": 0.988125, "test_prec": 0.991179, "test_recall": 0.989653, "test_f1": 0.989869, "test_pr_auc": 0.99659, "test_roc_auc": 0.99938, "test_fmax": 0.994927, "test_pmax": 0.996471, "test_rmax": 0.993388, "test_tmax": 0.19, "lr": 9.407565392354125e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.01772437357896693, "train_cur_epoch_loss": 7.095699154097474, "train_cur_epoch_avg_loss": 0.0014265579320662392, "train_cur_epoch_time": 1318.9228067398071, "train_cur_epoch_avg_time": 0.2651634110856066, "epoch": 3, "step": 14922} ################################################## Training, Epoch: 0004, Batch: 000078, Sample Num: 1248, Cur Loss: 0.00143984, Cur Avg Loss: 0.00114347, Log Avg loss: 0.00110089, Global Avg Loss: 0.01763815, Time: 0.2182 Steps: 15000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001078, Sample Num: 17248, Cur Loss: 0.00011576, Cur Avg Loss: 0.00107165, Log Avg loss: 0.00106604, Global Avg Loss: 0.01660240, Time: 0.2687 Steps: 16000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 002078, Sample Num: 33248, Cur Loss: 0.00018997, Cur Avg Loss: 0.00090248, Log Avg loss: 0.00072012, Global Avg Loss: 0.01566814, Time: 0.2194 Steps: 17000, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003078, Sample Num: 49248, Cur Loss: 0.00004557, Cur Avg Loss: 0.00086777, Log Avg loss: 0.00079565, Global Avg Loss: 0.01484189, Time: 0.2208 Steps: 18000, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004078, Sample Num: 65248, Cur Loss: 0.00063604, Cur Avg Loss: 0.00081875, Log Avg loss: 0.00066787, Global Avg Loss: 0.01409589, Time: 0.4274 Steps: 19000, Updated lr: 0.000092 ***** Running evaluation checkpoint-19896 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-19896 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1333.868669, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001245, "eval_total_loss": 1.327134, "eval_acc": 0.999693, "eval_jaccard": 0.989681, "eval_prec": 0.993237, "eval_recall": 0.990693, "eval_f1": 0.991293, "eval_pr_auc": 0.997225, "eval_roc_auc": 0.999507, "eval_fmax": 0.99579, "eval_pmax": 0.997898, "eval_rmax": 0.993691, "eval_tmax": 0.13, "update_flag": true, "test_avg_loss": 0.00138, "test_total_loss": 1.471224, "test_acc": 0.999706, "test_jaccard": 0.989191, "test_prec": 0.99265, "test_recall": 0.990291, "test_f1": 0.990865, "test_pr_auc": 0.997211, "test_roc_auc": 0.999409, "test_fmax": 0.995809, "test_pmax": 0.997341, "test_rmax": 0.994283, "test_tmax": 0.1, "lr": 9.207404426559356e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.013493945515531751, "train_cur_epoch_loss": 3.992437431675171, "train_cur_epoch_avg_loss": 0.0008026613252262105, "train_cur_epoch_time": 1333.868668794632, "train_cur_epoch_avg_time": 0.2681682084428291, "epoch": 4, "step": 19896} ################################################## Training, Epoch: 0005, Batch: 000104, Sample Num: 1664, Cur Loss: 0.00028481, Cur Avg Loss: 0.00089050, Log Avg loss: 0.00074617, Global Avg Loss: 0.01342841, Time: 0.3352 Steps: 20000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001104, Sample Num: 17664, Cur Loss: 0.00053830, Cur Avg Loss: 0.00074531, Log Avg loss: 0.00073021, Global Avg Loss: 0.01282373, Time: 0.3163 Steps: 21000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 002104, Sample Num: 33664, Cur Loss: 0.00001090, Cur Avg Loss: 0.00065467, Log Avg loss: 0.00055461, Global Avg Loss: 0.01226604, Time: 0.4162 Steps: 22000, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003104, Sample Num: 49664, Cur Loss: 0.00373382, Cur Avg Loss: 0.00064196, Log Avg loss: 0.00061521, Global Avg Loss: 0.01175949, Time: 0.3204 Steps: 23000, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004104, Sample Num: 65664, Cur Loss: 0.00910759, Cur Avg Loss: 0.00061537, Log Avg loss: 0.00053282, Global Avg Loss: 0.01129171, Time: 0.2150 Steps: 24000, Updated lr: 0.000090 ***** Running evaluation checkpoint-24870 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-24870 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1321.111227, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001057, "eval_total_loss": 1.12672, "eval_acc": 0.999755, "eval_jaccard": 0.991815, "eval_prec": 0.9944, "eval_recall": 0.992895, "eval_f1": 0.993149, "eval_pr_auc": 0.997682, "eval_roc_auc": 0.999516, "eval_fmax": 0.99645, "eval_pmax": 0.997544, "eval_rmax": 0.995358, "eval_tmax": 0.1, "update_flag": true, "test_avg_loss": 0.001259, "test_total_loss": 1.342308, "test_acc": 0.999752, "test_jaccard": 0.990763, "test_prec": 0.993378, "test_recall": 0.99197, "test_f1": 0.992208, "test_pr_auc": 0.997573, "test_roc_auc": 0.999438, "test_fmax": 0.996244, "test_pmax": 0.998037, "test_rmax": 0.994458, "test_tmax": 0.19, "lr": 9.007243460764588e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.010916553177487272, "train_cur_epoch_loss": 3.0191375470887465, "train_cur_epoch_avg_loss": 0.0006069838253093579, "train_cur_epoch_time": 1321.1112265586853, "train_cur_epoch_avg_time": 0.2656033829028318, "epoch": 5, "step": 24870} ################################################## Training, Epoch: 0006, Batch: 000130, Sample Num: 2080, Cur Loss: 0.00001518, Cur Avg Loss: 0.00064421, Log Avg loss: 0.00057742, Global Avg Loss: 0.01086314, Time: 0.2200 Steps: 25000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001130, Sample Num: 18080, Cur Loss: 0.00006390, Cur Avg Loss: 0.00057816, Log Avg loss: 0.00056958, Global Avg Loss: 0.01046723, Time: 0.1297 Steps: 26000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 002130, Sample Num: 34080, Cur Loss: 0.00002728, Cur Avg Loss: 0.00050541, Log Avg loss: 0.00042319, Global Avg Loss: 0.01009523, Time: 0.2184 Steps: 27000, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003130, Sample Num: 50080, Cur Loss: 0.00009737, Cur Avg Loss: 0.00050453, Log Avg loss: 0.00050266, Global Avg Loss: 0.00975264, Time: 0.2489 Steps: 28000, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004130, Sample Num: 66080, Cur Loss: 0.00001131, Cur Avg Loss: 0.00049621, Log Avg loss: 0.00047016, Global Avg Loss: 0.00943255, Time: 0.1460 Steps: 29000, Updated lr: 0.000088 ***** Running evaluation checkpoint-29844 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-29844 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1268.543715, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001232, "eval_total_loss": 1.313129, "eval_acc": 0.999728, "eval_jaccard": 0.991173, "eval_prec": 0.994532, "eval_recall": 0.992243, "eval_f1": 0.992732, "eval_pr_auc": 0.997665, "eval_roc_auc": 0.999528, "eval_fmax": 0.996306, "eval_pmax": 0.997069, "eval_rmax": 0.995543, "eval_tmax": 0.05, "update_flag": false, "test_avg_loss": 0.001391, "test_total_loss": 1.483101, "test_acc": 0.999739, "test_jaccard": 0.990902, "test_prec": 0.99405, "test_recall": 0.992053, "test_f1": 0.992478, "test_pr_auc": 0.997527, "test_roc_auc": 0.999423, "test_fmax": 0.996367, "test_pmax": 0.997216, "test_rmax": 0.995519, "test_tmax": 0.06, "lr": 8.80708249496982e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.009178924815310916, "train_cur_epoch_loss": 2.4411546640305417, "train_cur_epoch_avg_loss": 0.0004907830044291398, "train_cur_epoch_time": 1268.5437150001526, "train_cur_epoch_avg_time": 0.2550349246079921, "epoch": 6, "step": 29844} ################################################## Training, Epoch: 0007, Batch: 000156, Sample Num: 2496, Cur Loss: 0.00009021, Cur Avg Loss: 0.00059039, Log Avg loss: 0.00048392, Global Avg Loss: 0.00913426, Time: 0.2196 Steps: 30000, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001156, Sample Num: 18496, Cur Loss: 0.00002002, Cur Avg Loss: 0.00050231, Log Avg loss: 0.00048857, Global Avg Loss: 0.00885537, Time: 0.2203 Steps: 31000, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 002156, Sample Num: 34496, Cur Loss: 0.00001780, Cur Avg Loss: 0.00042828, Log Avg loss: 0.00034271, Global Avg Loss: 0.00858935, Time: 0.2201 Steps: 32000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003156, Sample Num: 50496, Cur Loss: 0.00526690, Cur Avg Loss: 0.00042888, Log Avg loss: 0.00043016, Global Avg Loss: 0.00834210, Time: 0.2224 Steps: 33000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004156, Sample Num: 66496, Cur Loss: 0.00004354, Cur Avg Loss: 0.00042243, Log Avg loss: 0.00040209, Global Avg Loss: 0.00810857, Time: 0.2763 Steps: 34000, Updated lr: 0.000086 ***** Running evaluation checkpoint-34818 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-34818 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1298.473625, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001011, "eval_total_loss": 1.07814, "eval_acc": 0.999767, "eval_jaccard": 0.992298, "eval_prec": 0.994861, "eval_recall": 0.993488, "eval_f1": 0.993624, "eval_pr_auc": 0.997829, "eval_roc_auc": 0.999554, "eval_fmax": 0.996738, "eval_pmax": 0.997977, "eval_rmax": 0.995502, "eval_tmax": 0.12, "update_flag": true, "test_avg_loss": 0.001198, "test_total_loss": 1.277157, "test_acc": 0.999775, "test_jaccard": 0.991977, "test_prec": 0.994236, "test_recall": 0.993285, "test_f1": 0.993277, "test_pr_auc": 0.997652, "test_roc_auc": 0.99944, "test_fmax": 0.996724, "test_pmax": 0.997608, "test_rmax": 0.995841, "test_tmax": 0.1, "lr": 8.606921529175051e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.007928192062883052, "train_cur_epoch_loss": 2.1079590573231677, "train_cur_epoch_avg_loss": 0.0004237955483158761, "train_cur_epoch_time": 1298.4736247062683, "train_cur_epoch_avg_time": 0.26105219636233784, "epoch": 7, "step": 34818} ################################################## Training, Epoch: 0008, Batch: 000182, Sample Num: 2912, Cur Loss: 0.00057698, Cur Avg Loss: 0.00045118, Log Avg loss: 0.00043444, Global Avg Loss: 0.00788931, Time: 0.2194 Steps: 35000, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001182, Sample Num: 18912, Cur Loss: 0.00001642, Cur Avg Loss: 0.00042003, Log Avg loss: 0.00041437, Global Avg Loss: 0.00768167, Time: 0.4688 Steps: 36000, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 002182, Sample Num: 34912, Cur Loss: 0.00000842, Cur Avg Loss: 0.00035972, Log Avg loss: 0.00028842, Global Avg Loss: 0.00748186, Time: 0.2191 Steps: 37000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003182, Sample Num: 50912, Cur Loss: 0.00013467, Cur Avg Loss: 0.00036825, Log Avg loss: 0.00038686, Global Avg Loss: 0.00729515, Time: 0.2192 Steps: 38000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 004182, Sample Num: 66912, Cur Loss: 0.00000116, Cur Avg Loss: 0.00036125, Log Avg loss: 0.00033899, Global Avg Loss: 0.00711678, Time: 0.1198 Steps: 39000, Updated lr: 0.000084 ***** Running evaluation checkpoint-39792 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-39792 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1327.229903, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.000979, "eval_total_loss": 1.044105, "eval_acc": 0.999784, "eval_jaccard": 0.9925, "eval_prec": 0.994544, "eval_recall": 0.993714, "eval_f1": 0.993602, "eval_pr_auc": 0.998111, "eval_roc_auc": 0.999603, "eval_fmax": 0.996854, "eval_pmax": 0.998393, "eval_rmax": 0.995321, "eval_tmax": 0.22, "update_flag": false, "test_avg_loss": 0.001204, "test_total_loss": 1.283356, "test_acc": 0.99981, "test_jaccard": 0.992903, "test_prec": 0.994392, "test_recall": 0.994338, "test_f1": 0.99396, "test_pr_auc": 0.997821, "test_roc_auc": 0.999417, "test_fmax": 0.996643, "test_pmax": 0.997421, "test_rmax": 0.995866, "test_tmax": 0.12, "lr": 8.406760563380283e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006982870076812118, "train_cur_epoch_loss": 1.8185748510457085, "train_cur_epoch_avg_loss": 0.0003656161743155827, "train_cur_epoch_time": 1327.229903459549, "train_cur_epoch_avg_time": 0.2668335149697525, "epoch": 8, "step": 39792} ################################################## Training, Epoch: 0009, Batch: 000208, Sample Num: 3328, Cur Loss: 0.00005001, Cur Avg Loss: 0.00044517, Log Avg loss: 0.00040042, Global Avg Loss: 0.00694887, Time: 0.3952 Steps: 40000, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001208, Sample Num: 19328, Cur Loss: 0.00002475, Cur Avg Loss: 0.00039854, Log Avg loss: 0.00038884, Global Avg Loss: 0.00678887, Time: 0.2206 Steps: 41000, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 002208, Sample Num: 35328, Cur Loss: 0.00003546, Cur Avg Loss: 0.00034089, Log Avg loss: 0.00027125, Global Avg Loss: 0.00663369, Time: 0.2594 Steps: 42000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003208, Sample Num: 51328, Cur Loss: 0.00003376, Cur Avg Loss: 0.00033667, Log Avg loss: 0.00032736, Global Avg Loss: 0.00648703, Time: 0.3853 Steps: 43000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004208, Sample Num: 67328, Cur Loss: 0.00003528, Cur Avg Loss: 0.00032989, Log Avg loss: 0.00030815, Global Avg Loss: 0.00634660, Time: 0.2273 Steps: 44000, Updated lr: 0.000082 ***** Running evaluation checkpoint-44766 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-44766 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1265.282202, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001044, "eval_total_loss": 1.112768, "eval_acc": 0.99979, "eval_jaccard": 0.992851, "eval_prec": 0.994737, "eval_recall": 0.994126, "eval_f1": 0.993973, "eval_pr_auc": 0.998018, "eval_roc_auc": 0.999619, "eval_fmax": 0.996839, "eval_pmax": 0.998267, "eval_rmax": 0.995416, "eval_tmax": 0.19, "update_flag": true, "test_avg_loss": 0.001235, "test_total_loss": 1.316464, "test_acc": 0.999803, "test_jaccard": 0.992738, "test_prec": 0.994326, "test_recall": 0.994122, "test_f1": 0.993837, "test_pr_auc": 0.997832, "test_roc_auc": 0.999468, "test_fmax": 0.996808, "test_pmax": 0.998067, "test_rmax": 0.995553, "test_tmax": 0.19, "lr": 8.206599597585514e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006243812038558492, "train_cur_epoch_loss": 1.6481236216015027, "train_cur_epoch_avg_loss": 0.0003313477325294537, "train_cur_epoch_time": 1265.2822017669678, "train_cur_epoch_avg_time": 0.2543792122571306, "epoch": 9, "step": 44766} ################################################## Training, Epoch: 0010, Batch: 000234, Sample Num: 3744, Cur Loss: 0.00039464, Cur Avg Loss: 0.00035125, Log Avg loss: 0.00034213, Global Avg Loss: 0.00621317, Time: 0.2241 Steps: 45000, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001234, Sample Num: 19744, Cur Loss: 0.00000725, Cur Avg Loss: 0.00034209, Log Avg loss: 0.00033995, Global Avg Loss: 0.00608549, Time: 0.2934 Steps: 46000, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 002234, Sample Num: 35744, Cur Loss: 0.00014217, Cur Avg Loss: 0.00030029, Log Avg loss: 0.00024870, Global Avg Loss: 0.00596131, Time: 0.2199 Steps: 47000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003234, Sample Num: 51744, Cur Loss: 0.00005393, Cur Avg Loss: 0.00030588, Log Avg loss: 0.00031838, Global Avg Loss: 0.00584374, Time: 0.4149 Steps: 48000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004234, Sample Num: 67744, Cur Loss: 0.00006635, Cur Avg Loss: 0.00030489, Log Avg loss: 0.00030165, Global Avg Loss: 0.00573064, Time: 0.2247 Steps: 49000, Updated lr: 0.000080 ***** Running evaluation checkpoint-49740 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-49740 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1302.609490, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001001, "eval_total_loss": 1.067296, "eval_acc": 0.999804, "eval_jaccard": 0.992953, "eval_prec": 0.994502, "eval_recall": 0.994111, "eval_f1": 0.993894, "eval_pr_auc": 0.998158, "eval_roc_auc": 0.999586, "eval_fmax": 0.996827, "eval_pmax": 0.997406, "eval_rmax": 0.996248, "eval_tmax": 0.06, "update_flag": false, "test_avg_loss": 0.001256, "test_total_loss": 1.339241, "test_acc": 0.999814, "test_jaccard": 0.9929, "test_prec": 0.994185, "test_recall": 0.994298, "test_f1": 0.993886, "test_pr_auc": 0.997676, "test_roc_auc": 0.999423, "test_fmax": 0.996806, "test_pmax": 0.997954, "test_rmax": 0.995661, "test_tmax": 0.16, "lr": 8.006438631790744e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005650013215668994, "train_cur_epoch_loss": 1.52116762926636, "train_cur_epoch_avg_loss": 0.0003058238096635223, "train_cur_epoch_time": 1302.6094899177551, "train_cur_epoch_avg_time": 0.2618836931881293, "epoch": 10, "step": 49740} ################################################## Training, Epoch: 0011, Batch: 000260, Sample Num: 4160, Cur Loss: 0.00047908, Cur Avg Loss: 0.00032268, Log Avg loss: 0.00031418, Global Avg Loss: 0.00562231, Time: 0.3237 Steps: 50000, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001260, Sample Num: 20160, Cur Loss: 0.00000053, Cur Avg Loss: 0.00030757, Log Avg loss: 0.00030365, Global Avg Loss: 0.00551802, Time: 0.2199 Steps: 51000, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 002260, Sample Num: 36160, Cur Loss: 0.00501105, Cur Avg Loss: 0.00027404, Log Avg loss: 0.00023179, Global Avg Loss: 0.00541637, Time: 0.4203 Steps: 52000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003260, Sample Num: 52160, Cur Loss: 0.00001541, Cur Avg Loss: 0.00028042, Log Avg loss: 0.00029484, Global Avg Loss: 0.00531973, Time: 0.3055 Steps: 53000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004260, Sample Num: 68160, Cur Loss: 0.00000815, Cur Avg Loss: 0.00028005, Log Avg loss: 0.00027883, Global Avg Loss: 0.00522638, Time: 0.3954 Steps: 54000, Updated lr: 0.000078 ***** Running evaluation checkpoint-54714 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-54714 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1281.132601, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.000993, "eval_total_loss": 1.058577, "eval_acc": 0.999804, "eval_jaccard": 0.99308, "eval_prec": 0.994595, "eval_recall": 0.994321, "eval_f1": 0.994032, "eval_pr_auc": 0.998346, "eval_roc_auc": 0.999612, "eval_fmax": 0.996863, "eval_pmax": 0.998192, "eval_rmax": 0.995538, "eval_tmax": 0.16, "update_flag": true, "test_avg_loss": 0.001213, "test_total_loss": 1.292955, "test_acc": 0.999823, "test_jaccard": 0.993336, "test_prec": 0.994539, "test_recall": 0.994683, "test_f1": 0.994268, "test_pr_auc": 0.997805, "test_roc_auc": 0.999474, "test_fmax": 0.996903, "test_pmax": 0.997711, "test_rmax": 0.996096, "test_tmax": 0.09, "lr": 7.806277665995976e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005162049910718352, "train_cur_epoch_loss": 1.4047414676680994, "train_cur_epoch_avg_loss": 0.0002824168612119219, "train_cur_epoch_time": 1281.1326014995575, "train_cur_epoch_avg_time": 0.25756586278640076, "epoch": 11, "step": 54714} ################################################## Training, Epoch: 0012, Batch: 000286, Sample Num: 4576, Cur Loss: 0.00001343, Cur Avg Loss: 0.00032930, Log Avg loss: 0.00030592, Global Avg Loss: 0.00513692, Time: 0.3556 Steps: 55000, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001286, Sample Num: 20576, Cur Loss: 0.00001533, Cur Avg Loss: 0.00030273, Log Avg loss: 0.00029513, Global Avg Loss: 0.00505046, Time: 0.2808 Steps: 56000, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 002286, Sample Num: 36576, Cur Loss: 0.00000808, Cur Avg Loss: 0.00025893, Log Avg loss: 0.00020262, Global Avg Loss: 0.00496541, Time: 0.3212 Steps: 57000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003286, Sample Num: 52576, Cur Loss: 0.00000661, Cur Avg Loss: 0.00025816, Log Avg loss: 0.00025639, Global Avg Loss: 0.00488422, Time: 0.5088 Steps: 58000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004286, Sample Num: 68576, Cur Loss: 0.00010021, Cur Avg Loss: 0.00025438, Log Avg loss: 0.00024196, Global Avg Loss: 0.00480554, Time: 0.2208 Steps: 59000, Updated lr: 0.000076 ***** Running evaluation checkpoint-59688 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-59688 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1338.928270, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001003, "eval_total_loss": 1.068895, "eval_acc": 0.999807, "eval_jaccard": 0.993176, "eval_prec": 0.994531, "eval_recall": 0.994424, "eval_f1": 0.994066, "eval_pr_auc": 0.998132, "eval_roc_auc": 0.999603, "eval_fmax": 0.997005, "eval_pmax": 0.998228, "eval_rmax": 0.995785, "eval_tmax": 0.18, "update_flag": true, "test_avg_loss": 0.001242, "test_total_loss": 1.323893, "test_acc": 0.999827, "test_jaccard": 0.993232, "test_prec": 0.994294, "test_recall": 0.994632, "test_f1": 0.994122, "test_pr_auc": 0.997778, "test_roc_auc": 0.999467, "test_fmax": 0.996894, "test_pmax": 0.997998, "test_rmax": 0.995793, "test_tmax": 0.17, "lr": 7.606116700201208e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00475351251654476, "train_cur_epoch_loss": 1.2912562724797176, "train_cur_epoch_avg_loss": 0.0002596011806352468, "train_cur_epoch_time": 1338.92826962471, "train_cur_epoch_avg_time": 0.2691854180990571, "epoch": 12, "step": 59688} ################################################## Training, Epoch: 0013, Batch: 000312, Sample Num: 4992, Cur Loss: 0.00007609, Cur Avg Loss: 0.00029355, Log Avg loss: 0.00029257, Global Avg Loss: 0.00473032, Time: 0.2246 Steps: 60000, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001312, Sample Num: 20992, Cur Loss: 0.00000559, Cur Avg Loss: 0.00026235, Log Avg loss: 0.00025261, Global Avg Loss: 0.00465692, Time: 0.4184 Steps: 61000, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 002312, Sample Num: 36992, Cur Loss: 0.00001424, Cur Avg Loss: 0.00024095, Log Avg loss: 0.00021287, Global Avg Loss: 0.00458524, Time: 0.2162 Steps: 62000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003312, Sample Num: 52992, Cur Loss: 0.00006356, Cur Avg Loss: 0.00024556, Log Avg loss: 0.00025622, Global Avg Loss: 0.00451652, Time: 0.2741 Steps: 63000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 004312, Sample Num: 68992, Cur Loss: 0.00000223, Cur Avg Loss: 0.00024355, Log Avg loss: 0.00023691, Global Avg Loss: 0.00444965, Time: 0.2203 Steps: 64000, Updated lr: 0.000074 ***** Running evaluation checkpoint-64662 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-64662 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1322.034581, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001031, "eval_total_loss": 1.099561, "eval_acc": 0.999817, "eval_jaccard": 0.993392, "eval_prec": 0.994728, "eval_recall": 0.994559, "eval_f1": 0.994255, "eval_pr_auc": 0.998128, "eval_roc_auc": 0.999584, "eval_fmax": 0.996942, "eval_pmax": 0.998124, "eval_rmax": 0.995764, "eval_tmax": 0.13, "update_flag": true, "test_avg_loss": 0.00125, "test_total_loss": 1.33285, "test_acc": 0.999822, "test_jaccard": 0.993354, "test_prec": 0.994387, "test_recall": 0.994891, "test_f1": 0.994286, "test_pr_auc": 0.997723, "test_roc_auc": 0.999429, "test_fmax": 0.996902, "test_pmax": 0.997719, "test_rmax": 0.996086, "test_tmax": 0.15, "lr": 7.405955734406439e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004406876715003096, "train_cur_epoch_loss": 1.229807058006576, "train_cur_epoch_avg_loss": 0.0002472470965031315, "train_cur_epoch_time": 1322.0345809459686, "train_cur_epoch_avg_time": 0.2657890190884537, "epoch": 13, "step": 64662} ################################################## Training, Epoch: 0014, Batch: 000338, Sample Num: 5408, Cur Loss: 0.00001321, Cur Avg Loss: 0.00021561, Log Avg loss: 0.00025248, Global Avg Loss: 0.00438508, Time: 0.2192 Steps: 65000, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001338, Sample Num: 21408, Cur Loss: 0.00137237, Cur Avg Loss: 0.00023641, Log Avg loss: 0.00024344, Global Avg Loss: 0.00432233, Time: 0.2263 Steps: 66000, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 002338, Sample Num: 37408, Cur Loss: 0.00001737, Cur Avg Loss: 0.00022401, Log Avg loss: 0.00020742, Global Avg Loss: 0.00426091, Time: 0.2445 Steps: 67000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003338, Sample Num: 53408, Cur Loss: 0.00000739, Cur Avg Loss: 0.00023405, Log Avg loss: 0.00025753, Global Avg Loss: 0.00420204, Time: 0.3119 Steps: 68000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 004338, Sample Num: 69408, Cur Loss: 0.00002347, Cur Avg Loss: 0.00022989, Log Avg loss: 0.00021598, Global Avg Loss: 0.00414427, Time: 0.2181 Steps: 69000, Updated lr: 0.000072 ***** Running evaluation checkpoint-69636 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-69636 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1348.014638, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001041, "eval_total_loss": 1.110066, "eval_acc": 0.999812, "eval_jaccard": 0.993443, "eval_prec": 0.994928, "eval_recall": 0.994586, "eval_f1": 0.994349, "eval_pr_auc": 0.998118, "eval_roc_auc": 0.999621, "eval_fmax": 0.997044, "eval_pmax": 0.998364, "eval_rmax": 0.995728, "eval_tmax": 0.17, "update_flag": true, "test_avg_loss": 0.001232, "test_total_loss": 1.313008, "test_acc": 0.999827, "test_jaccard": 0.99324, "test_prec": 0.994345, "test_recall": 0.994526, "test_f1": 0.994102, "test_pr_auc": 0.997709, "test_roc_auc": 0.99946, "test_fmax": 0.997036, "test_pmax": 0.998184, "test_rmax": 0.99589, "test_tmax": 0.16, "lr": 7.20579476861167e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004108707719114561, "train_cur_epoch_loss": 1.1565085827311856, "train_cur_epoch_avg_loss": 0.00023251077256356768, "train_cur_epoch_time": 1348.014638185501, "train_cur_epoch_avg_time": 0.2710121910304586, "epoch": 14, "step": 69636} ################################################## Training, Epoch: 0015, Batch: 000364, Sample Num: 5824, Cur Loss: 0.00000629, Cur Avg Loss: 0.00021483, Log Avg loss: 0.00023746, Global Avg Loss: 0.00408846, Time: 0.2159 Steps: 70000, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001364, Sample Num: 21824, Cur Loss: 0.00001671, Cur Avg Loss: 0.00021903, Log Avg loss: 0.00022056, Global Avg Loss: 0.00403398, Time: 0.2159 Steps: 71000, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 002364, Sample Num: 37824, Cur Loss: 0.00007809, Cur Avg Loss: 0.00020760, Log Avg loss: 0.00019200, Global Avg Loss: 0.00398062, Time: 0.2151 Steps: 72000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003364, Sample Num: 53824, Cur Loss: 0.00001940, Cur Avg Loss: 0.00022086, Log Avg loss: 0.00025222, Global Avg Loss: 0.00392955, Time: 0.2924 Steps: 73000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 004364, Sample Num: 69824, Cur Loss: 0.00000628, Cur Avg Loss: 0.00021580, Log Avg loss: 0.00019875, Global Avg Loss: 0.00387913, Time: 0.2177 Steps: 74000, Updated lr: 0.000070 ***** Running evaluation checkpoint-74610 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-74610 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1284.043533, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001042, "eval_total_loss": 1.111212, "eval_acc": 0.999804, "eval_jaccard": 0.993277, "eval_prec": 0.994497, "eval_recall": 0.994721, "eval_f1": 0.99421, "eval_pr_auc": 0.998032, "eval_roc_auc": 0.99962, "eval_fmax": 0.996766, "eval_pmax": 0.997854, "eval_rmax": 0.99568, "eval_tmax": 0.13, "update_flag": false, "test_avg_loss": 0.001269, "test_total_loss": 1.352487, "test_acc": 0.99983, "test_jaccard": 0.993501, "test_prec": 0.994318, "test_recall": 0.995043, "test_f1": 0.994352, "test_pr_auc": 0.997739, "test_roc_auc": 0.999458, "test_fmax": 0.996982, "test_pmax": 0.997831, "test_rmax": 0.996135, "test_tmax": 0.14, "lr": 7.005633802816902e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0038494329798986313, "train_cur_epoch_loss": 1.0922239019753004, "train_cur_epoch_avg_loss": 0.00021958663087561327, "train_cur_epoch_time": 1284.0435328483582, "train_cur_epoch_avg_time": 0.25815109224936833, "epoch": 15, "step": 74610} ################################################## Training, Epoch: 0016, Batch: 000390, Sample Num: 6240, Cur Loss: 0.00006459, Cur Avg Loss: 0.00021154, Log Avg loss: 0.00023299, Global Avg Loss: 0.00383052, Time: 0.4537 Steps: 75000, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001390, Sample Num: 22240, Cur Loss: 0.00015462, Cur Avg Loss: 0.00020843, Log Avg loss: 0.00020722, Global Avg Loss: 0.00378284, Time: 0.4571 Steps: 76000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002390, Sample Num: 38240, Cur Loss: 0.00000855, Cur Avg Loss: 0.00019250, Log Avg loss: 0.00017034, Global Avg Loss: 0.00373593, Time: 0.3024 Steps: 77000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003390, Sample Num: 54240, Cur Loss: 0.00000154, Cur Avg Loss: 0.00020671, Log Avg loss: 0.00024068, Global Avg Loss: 0.00369111, Time: 0.2723 Steps: 78000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 004390, Sample Num: 70240, Cur Loss: 0.00001689, Cur Avg Loss: 0.00020116, Log Avg loss: 0.00018237, Global Avg Loss: 0.00364670, Time: 0.2707 Steps: 79000, Updated lr: 0.000068 ***** Running evaluation checkpoint-79584 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-79584 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1304.590922, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001116, "eval_total_loss": 1.189329, "eval_acc": 0.999804, "eval_jaccard": 0.993304, "eval_prec": 0.9946, "eval_recall": 0.99454, "eval_f1": 0.994148, "eval_pr_auc": 0.998018, "eval_roc_auc": 0.999589, "eval_fmax": 0.996794, "eval_pmax": 0.997844, "eval_rmax": 0.995746, "eval_tmax": 0.11, "update_flag": false, "test_avg_loss": 0.001281, "test_total_loss": 1.365381, "test_acc": 0.999834, "test_jaccard": 0.99353, "test_prec": 0.994325, "test_recall": 0.994856, "test_f1": 0.994284, "test_pr_auc": 0.997694, "test_roc_auc": 0.999443, "test_fmax": 0.99696, "test_pmax": 0.998336, "test_rmax": 0.995587, "test_tmax": 0.32, "lr": 6.805472837022134e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.003621599896578937, "train_cur_epoch_loss": 1.0152115391012726, "train_cur_epoch_avg_loss": 0.00020410364678352888, "train_cur_epoch_time": 1304.5909216403961, "train_cur_epoch_avg_time": 0.2622820509932441, "epoch": 16, "step": 79584} ################################################## Training, Epoch: 0017, Batch: 000416, Sample Num: 6656, Cur Loss: 0.00000764, Cur Avg Loss: 0.00019983, Log Avg loss: 0.00021523, Global Avg Loss: 0.00360381, Time: 0.3826 Steps: 80000, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001416, Sample Num: 22656, Cur Loss: 0.00000127, Cur Avg Loss: 0.00020171, Log Avg loss: 0.00020248, Global Avg Loss: 0.00356182, Time: 0.2196 Steps: 81000, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002416, Sample Num: 38656, Cur Loss: 0.00000342, Cur Avg Loss: 0.00018766, Log Avg loss: 0.00016776, Global Avg Loss: 0.00352042, Time: 0.2195 Steps: 82000, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003416, Sample Num: 54656, Cur Loss: 0.00000379, Cur Avg Loss: 0.00020336, Log Avg loss: 0.00024128, Global Avg Loss: 0.00348092, Time: 0.2207 Steps: 83000, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 004416, Sample Num: 70656, Cur Loss: 0.00000884, Cur Avg Loss: 0.00019866, Log Avg loss: 0.00018260, Global Avg Loss: 0.00344165, Time: 0.2958 Steps: 84000, Updated lr: 0.000066 ***** Running evaluation checkpoint-84558 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-84558 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1289.821191, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001073, "eval_total_loss": 1.143564, "eval_acc": 0.999812, "eval_jaccard": 0.993527, "eval_prec": 0.994707, "eval_recall": 0.994833, "eval_f1": 0.994375, "eval_pr_auc": 0.998192, "eval_roc_auc": 0.999584, "eval_fmax": 0.996917, "eval_pmax": 0.998368, "eval_rmax": 0.995469, "eval_tmax": 0.17, "update_flag": true, "test_avg_loss": 0.001279, "test_total_loss": 1.363196, "test_acc": 0.999827, "test_jaccard": 0.993424, "test_prec": 0.994294, "test_recall": 0.994888, "test_f1": 0.994258, "test_pr_auc": 0.997786, "test_roc_auc": 0.999469, "test_fmax": 0.997016, "test_pmax": 0.997796, "test_rmax": 0.996237, "test_tmax": 0.07, "lr": 6.605311871227364e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0034206158986035884, "train_cur_epoch_loss": 1.0190329847838635, "train_cur_epoch_avg_loss": 0.0002048719309979621, "train_cur_epoch_time": 1289.8211908340454, "train_cur_epoch_avg_time": 0.2593126640197116, "epoch": 17, "step": 84558} ################################################## Training, Epoch: 0018, Batch: 000442, Sample Num: 7072, Cur Loss: 0.00002093, Cur Avg Loss: 0.00020874, Log Avg loss: 0.00023403, Global Avg Loss: 0.00340391, Time: 0.2249 Steps: 85000, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001442, Sample Num: 23072, Cur Loss: 0.00002207, Cur Avg Loss: 0.00018125, Log Avg loss: 0.00016909, Global Avg Loss: 0.00336630, Time: 0.2784 Steps: 86000, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002442, Sample Num: 39072, Cur Loss: 0.00002442, Cur Avg Loss: 0.00018210, Log Avg loss: 0.00018334, Global Avg Loss: 0.00332971, Time: 0.4478 Steps: 87000, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003442, Sample Num: 55072, Cur Loss: 0.00000255, Cur Avg Loss: 0.00019762, Log Avg loss: 0.00023551, Global Avg Loss: 0.00329455, Time: 0.2194 Steps: 88000, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 004442, Sample Num: 71072, Cur Loss: 0.00000618, Cur Avg Loss: 0.00019380, Log Avg loss: 0.00018063, Global Avg Loss: 0.00325956, Time: 0.2213 Steps: 89000, Updated lr: 0.000064 ***** Running evaluation checkpoint-89532 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-89532 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1317.585328, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001127, "eval_total_loss": 1.201543, "eval_acc": 0.999814, "eval_jaccard": 0.993507, "eval_prec": 0.994843, "eval_recall": 0.994677, "eval_f1": 0.994364, "eval_pr_auc": 0.998081, "eval_roc_auc": 0.999588, "eval_fmax": 0.996879, "eval_pmax": 0.998578, "eval_rmax": 0.995187, "eval_tmax": 0.27, "update_flag": false, "test_avg_loss": 0.001305, "test_total_loss": 1.391503, "test_acc": 0.999835, "test_jaccard": 0.993676, "test_prec": 0.994655, "test_recall": 0.994945, "test_f1": 0.994478, "test_pr_auc": 0.997681, "test_roc_auc": 0.999434, "test_fmax": 0.997066, "test_pmax": 0.997656, "test_rmax": 0.996477, "test_tmax": 0.09, "lr": 6.405150905432596e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.003241620120866365, "train_cur_epoch_loss": 0.9882935072853081, "train_cur_epoch_avg_loss": 0.0001986918993335963, "train_cur_epoch_time": 1317.5853283405304, "train_cur_epoch_avg_time": 0.26489451715732415, "epoch": 18, "step": 89532} ################################################## Training, Epoch: 0019, Batch: 000468, Sample Num: 7488, Cur Loss: 0.00004818, Cur Avg Loss: 0.00021933, Log Avg loss: 0.00023010, Global Avg Loss: 0.00322590, Time: 0.2169 Steps: 90000, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001468, Sample Num: 23488, Cur Loss: 0.00005769, Cur Avg Loss: 0.00017478, Log Avg loss: 0.00015393, Global Avg Loss: 0.00319215, Time: 0.2177 Steps: 91000, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002468, Sample Num: 39488, Cur Loss: 0.00000130, Cur Avg Loss: 0.00017365, Log Avg loss: 0.00017199, Global Avg Loss: 0.00315932, Time: 0.2605 Steps: 92000, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003468, Sample Num: 55488, Cur Loss: 0.00002719, Cur Avg Loss: 0.00018461, Log Avg loss: 0.00021167, Global Avg Loss: 0.00312762, Time: 0.2668 Steps: 93000, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 004468, Sample Num: 71488, Cur Loss: 0.00053789, Cur Avg Loss: 0.00018217, Log Avg loss: 0.00017370, Global Avg Loss: 0.00309620, Time: 0.2807 Steps: 94000, Updated lr: 0.000062 ***** Running evaluation checkpoint-94506 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-94506 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1308.738431, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001113, "eval_total_loss": 1.186249, "eval_acc": 0.999803, "eval_jaccard": 0.993154, "eval_prec": 0.994483, "eval_recall": 0.994332, "eval_f1": 0.994003, "eval_pr_auc": 0.998023, "eval_roc_auc": 0.9996, "eval_fmax": 0.997077, "eval_pmax": 0.998511, "eval_rmax": 0.995648, "eval_tmax": 0.23, "update_flag": false, "test_avg_loss": 0.001312, "test_total_loss": 1.398559, "test_acc": 0.999834, "test_jaccard": 0.993603, "test_prec": 0.994515, "test_recall": 0.994924, "test_f1": 0.994403, "test_pr_auc": 0.997737, "test_roc_auc": 0.999427, "test_fmax": 0.997065, "test_pmax": 0.998189, "test_rmax": 0.995944, "test_tmax": 0.21, "lr": 6.204989939637827e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0030808810774818358, "train_cur_epoch_loss": 0.9330144470915672, "train_cur_epoch_avg_loss": 0.00018757829656042768, "train_cur_epoch_time": 1308.7384305000305, "train_cur_epoch_avg_time": 0.26311588872135716, "epoch": 19, "step": 94506} ################################################## Training, Epoch: 0020, Batch: 000494, Sample Num: 7904, Cur Loss: 0.00005858, Cur Avg Loss: 0.00020218, Log Avg loss: 0.00021896, Global Avg Loss: 0.00306591, Time: 0.3495 Steps: 95000, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001494, Sample Num: 23904, Cur Loss: 0.00000108, Cur Avg Loss: 0.00016044, Log Avg loss: 0.00013982, Global Avg Loss: 0.00303543, Time: 0.3317 Steps: 96000, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002494, Sample Num: 39904, Cur Loss: 0.00001177, Cur Avg Loss: 0.00015994, Log Avg loss: 0.00015920, Global Avg Loss: 0.00300578, Time: 0.4279 Steps: 97000, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003494, Sample Num: 55904, Cur Loss: 0.00000057, Cur Avg Loss: 0.00017622, Log Avg loss: 0.00021681, Global Avg Loss: 0.00297732, Time: 0.2147 Steps: 98000, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 004494, Sample Num: 71904, Cur Loss: 0.00089702, Cur Avg Loss: 0.00017731, Log Avg loss: 0.00018112, Global Avg Loss: 0.00294908, Time: 0.1731 Steps: 99000, Updated lr: 0.000060 ***** Running evaluation checkpoint-99480 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-99480 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1276.029819, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001133, "eval_total_loss": 1.207487, "eval_acc": 0.9998, "eval_jaccard": 0.993135, "eval_prec": 0.994434, "eval_recall": 0.994479, "eval_f1": 0.994024, "eval_pr_auc": 0.997896, "eval_roc_auc": 0.999604, "eval_fmax": 0.996964, "eval_pmax": 0.997589, "eval_rmax": 0.996339, "eval_tmax": 0.04, "update_flag": false, "test_avg_loss": 0.001305, "test_total_loss": 1.390682, "test_acc": 0.999837, "test_jaccard": 0.993764, "test_prec": 0.994608, "test_recall": 0.995227, "test_f1": 0.994586, "test_pr_auc": 0.997773, "test_roc_auc": 0.999435, "test_fmax": 0.997069, "test_pmax": 0.998217, "test_rmax": 0.995924, "test_tmax": 0.22, "lr": 6.0048289738430586e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0029358643717811057, "train_cur_epoch_loss": 0.8980405962861412, "train_cur_epoch_avg_loss": 0.00018054696346725798, "train_cur_epoch_time": 1276.0298194885254, "train_cur_epoch_avg_time": 0.2565399717508093, "epoch": 20, "step": 99480} ################################################## Training, Epoch: 0021, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00004634, Cur Avg Loss: 0.00020866, Log Avg loss: 0.00020972, Global Avg Loss: 0.00292168, Time: 0.2208 Steps: 100000, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00000555, Cur Avg Loss: 0.00016253, Log Avg loss: 0.00013855, Global Avg Loss: 0.00289413, Time: 0.2180 Steps: 101000, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002520, Sample Num: 40320, Cur Loss: 0.00033264, Cur Avg Loss: 0.00016432, Log Avg loss: 0.00016702, Global Avg Loss: 0.00286739, Time: 0.2727 Steps: 102000, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00000669, Cur Avg Loss: 0.00017947, Log Avg loss: 0.00021767, Global Avg Loss: 0.00284167, Time: 0.2207 Steps: 103000, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00002244, Cur Avg Loss: 0.00017817, Log Avg loss: 0.00017357, Global Avg Loss: 0.00281601, Time: 0.3935 Steps: 104000, Updated lr: 0.000058 ***** Running evaluation checkpoint-104454 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-104454 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1299.664524, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001139, "eval_total_loss": 1.213952, "eval_acc": 0.999805, "eval_jaccard": 0.993263, "eval_prec": 0.99456, "eval_recall": 0.994598, "eval_f1": 0.994172, "eval_pr_auc": 0.998055, "eval_roc_auc": 0.999594, "eval_fmax": 0.996956, "eval_pmax": 0.998265, "eval_rmax": 0.995649, "eval_tmax": 0.16, "update_flag": false, "test_avg_loss": 0.001344, "test_total_loss": 1.433155, "test_acc": 0.999831, "test_jaccard": 0.99354, "test_prec": 0.994474, "test_recall": 0.994893, "test_f1": 0.994362, "test_pr_auc": 0.997778, "test_roc_auc": 0.999438, "test_fmax": 0.997149, "test_pmax": 0.998023, "test_rmax": 0.996276, "test_tmax": 0.1, "lr": 5.8046680080482895e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0028045885971188034, "train_cur_epoch_loss": 0.8907096186628131, "train_cur_epoch_avg_loss": 0.00017907310387270067, "train_cur_epoch_time": 1299.664523601532, "train_cur_epoch_avg_time": 0.26129162115028787, "epoch": 21, "step": 104454} ################################################## Training, Epoch: 0022, Batch: 000546, Sample Num: 8736, Cur Loss: 0.00000052, Cur Avg Loss: 0.00018116, Log Avg loss: 0.00018430, Global Avg Loss: 0.00279095, Time: 0.2197 Steps: 105000, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001546, Sample Num: 24736, Cur Loss: 0.00007205, Cur Avg Loss: 0.00015289, Log Avg loss: 0.00013745, Global Avg Loss: 0.00276591, Time: 0.2150 Steps: 106000, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002546, Sample Num: 40736, Cur Loss: 0.00000307, Cur Avg Loss: 0.00016079, Log Avg loss: 0.00017302, Global Avg Loss: 0.00274168, Time: 0.2176 Steps: 107000, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003546, Sample Num: 56736, Cur Loss: 0.00000740, Cur Avg Loss: 0.00017808, Log Avg loss: 0.00022209, Global Avg Loss: 0.00271835, Time: 0.3744 Steps: 108000, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 004546, Sample Num: 72736, Cur Loss: 0.00020332, Cur Avg Loss: 0.00017168, Log Avg loss: 0.00014901, Global Avg Loss: 0.00269478, Time: 0.2153 Steps: 109000, Updated lr: 0.000056 ***** Running evaluation checkpoint-109428 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-109428 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1341.108684, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.00109, "eval_total_loss": 1.161707, "eval_acc": 0.999821, "eval_jaccard": 0.993785, "eval_prec": 0.994786, "eval_recall": 0.995176, "eval_f1": 0.994611, "eval_pr_auc": 0.998053, "eval_roc_auc": 0.999582, "eval_fmax": 0.996985, "eval_pmax": 0.997776, "eval_rmax": 0.996195, "eval_tmax": 0.07, "update_flag": true, "test_avg_loss": 0.001307, "test_total_loss": 1.392932, "test_acc": 0.999842, "test_jaccard": 0.993934, "test_prec": 0.994596, "test_recall": 0.995403, "test_f1": 0.994685, "test_pr_auc": 0.997749, "test_roc_auc": 0.999423, "test_fmax": 0.997105, "test_pmax": 0.998058, "test_rmax": 0.996154, "test_tmax": 0.21, "lr": 5.604507042253522e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00268502488994246, "train_cur_epoch_loss": 0.8664063331766488, "train_cur_epoch_avg_loss": 0.00017418703923937452, "train_cur_epoch_time": 1341.1086835861206, "train_cur_epoch_avg_time": 0.26962378037517504, "epoch": 22, "step": 109428} ################################################## Training, Epoch: 0023, Batch: 000572, Sample Num: 9152, Cur Loss: 0.00005287, Cur Avg Loss: 0.00020480, Log Avg loss: 0.00020307, Global Avg Loss: 0.00267213, Time: 0.3820 Steps: 110000, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001572, Sample Num: 25152, Cur Loss: 0.00000091, Cur Avg Loss: 0.00015092, Log Avg loss: 0.00012010, Global Avg Loss: 0.00264914, Time: 0.3124 Steps: 111000, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002572, Sample Num: 41152, Cur Loss: 0.00001381, Cur Avg Loss: 0.00015484, Log Avg loss: 0.00016101, Global Avg Loss: 0.00262692, Time: 0.2883 Steps: 112000, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003572, Sample Num: 57152, Cur Loss: 0.00005708, Cur Avg Loss: 0.00017512, Log Avg loss: 0.00022727, Global Avg Loss: 0.00260569, Time: 0.2871 Steps: 113000, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 004572, Sample Num: 73152, Cur Loss: 0.00009720, Cur Avg Loss: 0.00016931, Log Avg loss: 0.00014855, Global Avg Loss: 0.00258413, Time: 0.1997 Steps: 114000, Updated lr: 0.000054 ***** Running evaluation checkpoint-114402 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-114402 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1277.717051, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001168, "eval_total_loss": 1.245481, "eval_acc": 0.999812, "eval_jaccard": 0.993454, "eval_prec": 0.994644, "eval_recall": 0.994705, "eval_f1": 0.994269, "eval_pr_auc": 0.997986, "eval_roc_auc": 0.999573, "eval_fmax": 0.996972, "eval_pmax": 0.998197, "eval_rmax": 0.99575, "eval_tmax": 0.15, "update_flag": false, "test_avg_loss": 0.001349, "test_total_loss": 1.438322, "test_acc": 0.999837, "test_jaccard": 0.99366, "test_prec": 0.994435, "test_recall": 0.995061, "test_f1": 0.99443, "test_pr_auc": 0.997857, "test_roc_auc": 0.999441, "test_fmax": 0.997053, "test_pmax": 0.998101, "test_rmax": 0.996008, "test_tmax": 0.16, "lr": 5.4043460764587526e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00257573773922358, "train_cur_epoch_loss": 0.8526451860322346, "train_cur_epoch_avg_loss": 0.0001714204234081694, "train_cur_epoch_time": 1277.7170507907867, "train_cur_epoch_avg_time": 0.25687918190405845, "epoch": 23, "step": 114402} ################################################## Training, Epoch: 0024, Batch: 000598, Sample Num: 9568, Cur Loss: 0.00001220, Cur Avg Loss: 0.00018703, Log Avg loss: 0.00019042, Global Avg Loss: 0.00256332, Time: 0.2199 Steps: 115000, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001598, Sample Num: 25568, Cur Loss: 0.00000041, Cur Avg Loss: 0.00014472, Log Avg loss: 0.00011942, Global Avg Loss: 0.00254225, Time: 0.2191 Steps: 116000, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002598, Sample Num: 41568, Cur Loss: 0.00002400, Cur Avg Loss: 0.00015085, Log Avg loss: 0.00016065, Global Avg Loss: 0.00252189, Time: 0.2181 Steps: 117000, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003598, Sample Num: 57568, Cur Loss: 0.00002320, Cur Avg Loss: 0.00016877, Log Avg loss: 0.00021532, Global Avg Loss: 0.00250235, Time: 0.2271 Steps: 118000, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 004598, Sample Num: 73568, Cur Loss: 0.00005605, Cur Avg Loss: 0.00016368, Log Avg loss: 0.00014537, Global Avg Loss: 0.00248254, Time: 0.6599 Steps: 119000, Updated lr: 0.000052 ***** Running evaluation checkpoint-119376 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-119376 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1254.738135, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001162, "eval_total_loss": 1.238752, "eval_acc": 0.999815, "eval_jaccard": 0.993571, "eval_prec": 0.994641, "eval_recall": 0.994826, "eval_f1": 0.994365, "eval_pr_auc": 0.997953, "eval_roc_auc": 0.999579, "eval_fmax": 0.997026, "eval_pmax": 0.997913, "eval_rmax": 0.996141, "eval_tmax": 0.07, "update_flag": false, "test_avg_loss": 0.001367, "test_total_loss": 1.45768, "test_acc": 0.999837, "test_jaccard": 0.993765, "test_prec": 0.994571, "test_recall": 0.995196, "test_f1": 0.994567, "test_pr_auc": 0.997797, "test_roc_auc": 0.99943, "test_fmax": 0.997102, "test_pmax": 0.998116, "test_rmax": 0.996091, "test_tmax": 0.16, "lr": 5.2041851106639835e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0024753388278021425, "train_cur_epoch_loss": 0.8264990650523762, "train_cur_epoch_avg_loss": 0.00016616386510904228, "train_cur_epoch_time": 1254.7381353378296, "train_cur_epoch_avg_time": 0.2522593758218395, "epoch": 24, "step": 119376} ################################################## Training, Epoch: 0025, Batch: 000624, Sample Num: 9984, Cur Loss: 0.00003159, Cur Avg Loss: 0.00018873, Log Avg loss: 0.00019166, Global Avg Loss: 0.00246345, Time: 0.3144 Steps: 120000, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001624, Sample Num: 25984, Cur Loss: 0.00003383, Cur Avg Loss: 0.00015398, Log Avg loss: 0.00013230, Global Avg Loss: 0.00244418, Time: 0.2196 Steps: 121000, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002624, Sample Num: 41984, Cur Loss: 0.00000001, Cur Avg Loss: 0.00016313, Log Avg loss: 0.00017797, Global Avg Loss: 0.00242561, Time: 0.2511 Steps: 122000, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003624, Sample Num: 57984, Cur Loss: 0.00000992, Cur Avg Loss: 0.00016976, Log Avg loss: 0.00018715, Global Avg Loss: 0.00240741, Time: 0.2963 Steps: 123000, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 004624, Sample Num: 73984, Cur Loss: 0.00000045, Cur Avg Loss: 0.00016750, Log Avg loss: 0.00015931, Global Avg Loss: 0.00238928, Time: 0.3077 Steps: 124000, Updated lr: 0.000050 ***** Running evaluation checkpoint-124350 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-124350 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1256.151835, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.00117, "eval_total_loss": 1.247645, "eval_acc": 0.999817, "eval_jaccard": 0.993643, "eval_prec": 0.994738, "eval_recall": 0.994918, "eval_f1": 0.994435, "eval_pr_auc": 0.99794, "eval_roc_auc": 0.999584, "eval_fmax": 0.996925, "eval_pmax": 0.998295, "eval_rmax": 0.995559, "eval_tmax": 0.22, "update_flag": false, "test_avg_loss": 0.001385, "test_total_loss": 1.476853, "test_acc": 0.999836, "test_jaccard": 0.993782, "test_prec": 0.994577, "test_recall": 0.995276, "test_f1": 0.994581, "test_pr_auc": 0.997721, "test_roc_auc": 0.999432, "test_fmax": 0.99712, "test_pmax": 0.997643, "test_rmax": 0.996599, "test_tmax": 0.05, "lr": 5.004024144869216e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0023831744682508284, "train_cur_epoch_loss": 0.8516972192816297, "train_cur_epoch_avg_loss": 0.0001712298390192259, "train_cur_epoch_time": 1256.1518354415894, "train_cur_epoch_avg_time": 0.2525435937759528, "epoch": 25, "step": 124350} ################################################## Training, Epoch: 0026, Batch: 000650, Sample Num: 10400, Cur Loss: 0.00000264, Cur Avg Loss: 0.00016899, Log Avg loss: 0.00018704, Global Avg Loss: 0.00237166, Time: 0.0842 Steps: 125000, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001650, Sample Num: 26400, Cur Loss: 0.00000740, Cur Avg Loss: 0.00014102, Log Avg loss: 0.00012283, Global Avg Loss: 0.00235381, Time: 0.3003 Steps: 126000, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002650, Sample Num: 42400, Cur Loss: 0.00143561, Cur Avg Loss: 0.00014825, Log Avg loss: 0.00016018, Global Avg Loss: 0.00233654, Time: 0.6569 Steps: 127000, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003650, Sample Num: 58400, Cur Loss: 0.00000085, Cur Avg Loss: 0.00016059, Log Avg loss: 0.00019331, Global Avg Loss: 0.00231980, Time: 0.2187 Steps: 128000, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 004650, Sample Num: 74400, Cur Loss: 0.00000692, Cur Avg Loss: 0.00015818, Log Avg loss: 0.00014938, Global Avg Loss: 0.00230297, Time: 0.2185 Steps: 129000, Updated lr: 0.000048 ***** Running evaluation checkpoint-129324 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-129324 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1206.643691, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001158, "eval_total_loss": 1.233899, "eval_acc": 0.999814, "eval_jaccard": 0.993446, "eval_prec": 0.994531, "eval_recall": 0.994752, "eval_f1": 0.99427, "eval_pr_auc": 0.997841, "eval_roc_auc": 0.999584, "eval_fmax": 0.997012, "eval_pmax": 0.998045, "eval_rmax": 0.995982, "eval_tmax": 0.09, "update_flag": false, "test_avg_loss": 0.001403, "test_total_loss": 1.49581, "test_acc": 0.999838, "test_jaccard": 0.993622, "test_prec": 0.994245, "test_recall": 0.99509, "test_f1": 0.994362, "test_pr_auc": 0.997805, "test_roc_auc": 0.999414, "test_fmax": 0.997154, "test_pmax": 0.998121, "test_rmax": 0.996188, "test_tmax": 0.13, "lr": 4.803863179074447e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0022978101408431665, "train_cur_epoch_loss": 0.8142535274109157, "train_cur_epoch_avg_loss": 0.00016370195565157132, "train_cur_epoch_time": 1206.6436913013458, "train_cur_epoch_avg_time": 0.242590207338429, "epoch": 26, "step": 129324} ################################################## Training, Epoch: 0027, Batch: 000676, Sample Num: 10816, Cur Loss: 0.00000400, Cur Avg Loss: 0.00016229, Log Avg loss: 0.00018842, Global Avg Loss: 0.00228671, Time: 0.1181 Steps: 130000, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001676, Sample Num: 26816, Cur Loss: 0.00000139, Cur Avg Loss: 0.00014024, Log Avg loss: 0.00012534, Global Avg Loss: 0.00227021, Time: 0.2216 Steps: 131000, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002676, Sample Num: 42816, Cur Loss: 0.00562489, Cur Avg Loss: 0.00015508, Log Avg loss: 0.00017996, Global Avg Loss: 0.00225437, Time: 0.4109 Steps: 132000, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003676, Sample Num: 58816, Cur Loss: 0.00003839, Cur Avg Loss: 0.00016292, Log Avg loss: 0.00018389, Global Avg Loss: 0.00223880, Time: 0.3948 Steps: 133000, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 004676, Sample Num: 74816, Cur Loss: 0.00000705, Cur Avg Loss: 0.00015816, Log Avg loss: 0.00014068, Global Avg Loss: 0.00222315, Time: 0.2196 Steps: 134000, Updated lr: 0.000046 ***** Running evaluation checkpoint-134298 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-134298 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1281.215469, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001156, "eval_total_loss": 1.232656, "eval_acc": 0.999818, "eval_jaccard": 0.99355, "eval_prec": 0.994639, "eval_recall": 0.994747, "eval_f1": 0.994332, "eval_pr_auc": 0.997986, "eval_roc_auc": 0.999615, "eval_fmax": 0.997089, "eval_pmax": 0.998295, "eval_rmax": 0.995885, "eval_tmax": 0.13, "update_flag": false, "test_avg_loss": 0.001376, "test_total_loss": 1.466446, "test_acc": 0.999836, "test_jaccard": 0.99367, "test_prec": 0.994445, "test_recall": 0.995061, "test_f1": 0.994442, "test_pr_auc": 0.997828, "test_roc_auc": 0.99945, "test_fmax": 0.997083, "test_pmax": 0.997959, "test_rmax": 0.996208, "test_tmax": 0.11, "lr": 4.603702213279678e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.002218729366816752, "train_cur_epoch_loss": 0.8089178503546273, "train_cur_epoch_avg_loss": 0.00016262924213000145, "train_cur_epoch_time": 1281.215469121933, "train_cur_epoch_avg_time": 0.2575825229436938, "epoch": 27, "step": 134298} ################################################## Training, Epoch: 0028, Batch: 000702, Sample Num: 11232, Cur Loss: 0.00000800, Cur Avg Loss: 0.00015877, Log Avg loss: 0.00018080, Global Avg Loss: 0.00220802, Time: 0.2217 Steps: 135000, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001702, Sample Num: 27232, Cur Loss: 0.00001757, Cur Avg Loss: 0.00013716, Log Avg loss: 0.00012198, Global Avg Loss: 0.00219268, Time: 0.2202 Steps: 136000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002702, Sample Num: 43232, Cur Loss: 0.00000059, Cur Avg Loss: 0.00014804, Log Avg loss: 0.00016657, Global Avg Loss: 0.00217789, Time: 0.2174 Steps: 137000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003702, Sample Num: 59232, Cur Loss: 0.00000337, Cur Avg Loss: 0.00016045, Log Avg loss: 0.00019397, Global Avg Loss: 0.00216351, Time: 0.2116 Steps: 138000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 004702, Sample Num: 75232, Cur Loss: 0.00013519, Cur Avg Loss: 0.00015928, Log Avg loss: 0.00015496, Global Avg Loss: 0.00214906, Time: 0.3138 Steps: 139000, Updated lr: 0.000044 ***** Running evaluation checkpoint-139272 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-139272 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1267.749004, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001176, "eval_total_loss": 1.253302, "eval_acc": 0.999812, "eval_jaccard": 0.993343, "eval_prec": 0.994351, "eval_recall": 0.994711, "eval_f1": 0.994162, "eval_pr_auc": 0.997896, "eval_roc_auc": 0.999588, "eval_fmax": 0.996952, "eval_pmax": 0.998177, "eval_rmax": 0.99573, "eval_tmax": 0.13, "update_flag": false, "test_avg_loss": 0.001382, "test_total_loss": 1.472918, "test_acc": 0.999843, "test_jaccard": 0.993924, "test_prec": 0.994638, "test_recall": 0.995358, "test_f1": 0.994679, "test_pr_auc": 0.997744, "test_roc_auc": 0.999416, "test_fmax": 0.997104, "test_pmax": 0.998248, "test_rmax": 0.995964, "test_tmax": 0.16, "lr": 4.40354124748491e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0021453489054055813, "train_cur_epoch_loss": 0.8161162488901917, "train_cur_epoch_avg_loss": 0.00016407644730401924, "train_cur_epoch_time": 1267.7490038871765, "train_cur_epoch_avg_time": 0.2548751515655763, "epoch": 28, "step": 139272} ################################################## Training, Epoch: 0029, Batch: 000728, Sample Num: 11648, Cur Loss: 0.00452267, Cur Avg Loss: 0.00016520, Log Avg loss: 0.00018745, Global Avg Loss: 0.00213505, Time: 0.2207 Steps: 140000, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001728, Sample Num: 27648, Cur Loss: 0.00000937, Cur Avg Loss: 0.00013681, Log Avg loss: 0.00011615, Global Avg Loss: 0.00212073, Time: 0.2356 Steps: 141000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002728, Sample Num: 43648, Cur Loss: 0.00000060, Cur Avg Loss: 0.00014038, Log Avg loss: 0.00014655, Global Avg Loss: 0.00210683, Time: 0.2187 Steps: 142000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003728, Sample Num: 59648, Cur Loss: 0.00000944, Cur Avg Loss: 0.00015115, Log Avg loss: 0.00018054, Global Avg Loss: 0.00209336, Time: 0.3858 Steps: 143000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 004728, Sample Num: 75648, Cur Loss: 0.00000019, Cur Avg Loss: 0.00015105, Log Avg loss: 0.00015067, Global Avg Loss: 0.00207987, Time: 0.2875 Steps: 144000, Updated lr: 0.000042 ***** Running evaluation checkpoint-144246 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-144246 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1253.160947, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001165, "eval_total_loss": 1.242123, "eval_acc": 0.999817, "eval_jaccard": 0.993464, "eval_prec": 0.994493, "eval_recall": 0.994679, "eval_f1": 0.994236, "eval_pr_auc": 0.997946, "eval_roc_auc": 0.999564, "eval_fmax": 0.996962, "eval_pmax": 0.998254, "eval_rmax": 0.995675, "eval_tmax": 0.19, "update_flag": false, "test_avg_loss": 0.001376, "test_total_loss": 1.467187, "test_acc": 0.999842, "test_jaccard": 0.99389, "test_prec": 0.994562, "test_recall": 0.995266, "test_f1": 0.99462, "test_pr_auc": 0.997844, "test_roc_auc": 0.999421, "test_fmax": 0.997193, "test_pmax": 0.998385, "test_rmax": 0.996003, "test_tmax": 0.19, "lr": 4.203380281690141e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.002076655847784151, "train_cur_epoch_loss": 0.7622666658256794, "train_cur_epoch_avg_loss": 0.00015325023438393233, "train_cur_epoch_time": 1253.1609468460083, "train_cur_epoch_avg_time": 0.25194228927342344, "epoch": 29, "step": 144246} ################################################## Training, Epoch: 0030, Batch: 000754, Sample Num: 12064, Cur Loss: 0.00000141, Cur Avg Loss: 0.00016926, Log Avg loss: 0.00017572, Global Avg Loss: 0.00206674, Time: 0.2184 Steps: 145000, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001754, Sample Num: 28064, Cur Loss: 0.00000719, Cur Avg Loss: 0.00013235, Log Avg loss: 0.00010452, Global Avg Loss: 0.00205330, Time: 0.3324 Steps: 146000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002754, Sample Num: 44064, Cur Loss: 0.00000055, Cur Avg Loss: 0.00014498, Log Avg loss: 0.00016713, Global Avg Loss: 0.00204047, Time: 0.2196 Steps: 147000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003754, Sample Num: 60064, Cur Loss: 0.00000498, Cur Avg Loss: 0.00015290, Log Avg loss: 0.00017470, Global Avg Loss: 0.00202786, Time: 0.0860 Steps: 148000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 004754, Sample Num: 76064, Cur Loss: 0.00002802, Cur Avg Loss: 0.00015271, Log Avg loss: 0.00015199, Global Avg Loss: 0.00201527, Time: 0.2199 Steps: 149000, Updated lr: 0.000040 ***** Running evaluation checkpoint-149220 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-149220 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1238.931063, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001188, "eval_total_loss": 1.266642, "eval_acc": 0.999812, "eval_jaccard": 0.993474, "eval_prec": 0.994504, "eval_recall": 0.994736, "eval_f1": 0.994258, "eval_pr_auc": 0.997813, "eval_roc_auc": 0.999574, "eval_fmax": 0.996948, "eval_pmax": 0.997971, "eval_rmax": 0.995926, "eval_tmax": 0.08, "update_flag": false, "test_avg_loss": 0.001414, "test_total_loss": 1.507196, "test_acc": 0.999843, "test_jaccard": 0.993979, "test_prec": 0.994662, "test_recall": 0.995323, "test_f1": 0.994702, "test_pr_auc": 0.997831, "test_roc_auc": 0.99941, "test_fmax": 0.997075, "test_pmax": 0.99814, "test_rmax": 0.996012, "test_tmax": 0.09, "lr": 4.003219315895372e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0020126396412535738, "train_cur_epoch_loss": 0.7767878483855691, "train_cur_epoch_avg_loss": 0.0001561696518668213, "train_cur_epoch_time": 1238.9310631752014, "train_cur_epoch_avg_time": 0.24908143610277472, "epoch": 30, "step": 149220} ################################################## Training, Epoch: 0031, Batch: 000780, Sample Num: 12480, Cur Loss: 0.00001144, Cur Avg Loss: 0.00017964, Log Avg loss: 0.00019095, Global Avg Loss: 0.00200311, Time: 0.2113 Steps: 150000, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001780, Sample Num: 28480, Cur Loss: 0.00000155, Cur Avg Loss: 0.00013359, Log Avg loss: 0.00009766, Global Avg Loss: 0.00199049, Time: 0.2128 Steps: 151000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002780, Sample Num: 44480, Cur Loss: 0.00000196, Cur Avg Loss: 0.00014677, Log Avg loss: 0.00017024, Global Avg Loss: 0.00197851, Time: 0.2195 Steps: 152000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003780, Sample Num: 60480, Cur Loss: 0.00000160, Cur Avg Loss: 0.00015071, Log Avg loss: 0.00016168, Global Avg Loss: 0.00196664, Time: 0.2196 Steps: 153000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 004780, Sample Num: 76480, Cur Loss: 0.00000725, Cur Avg Loss: 0.00014948, Log Avg loss: 0.00014480, Global Avg Loss: 0.00195481, Time: 0.2216 Steps: 154000, Updated lr: 0.000038 ***** Running evaluation checkpoint-154194 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-154194 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1217.010044, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001146, "eval_total_loss": 1.22144, "eval_acc": 0.999816, "eval_jaccard": 0.993597, "eval_prec": 0.994653, "eval_recall": 0.994768, "eval_f1": 0.994356, "eval_pr_auc": 0.997997, "eval_roc_auc": 0.999588, "eval_fmax": 0.997033, "eval_pmax": 0.998161, "eval_rmax": 0.995906, "eval_tmax": 0.08, "update_flag": false, "test_avg_loss": 0.001365, "test_total_loss": 1.45533, "test_acc": 0.999847, "test_jaccard": 0.9941, "test_prec": 0.994738, "test_recall": 0.995432, "test_f1": 0.994799, "test_pr_auc": 0.997919, "test_roc_auc": 0.999414, "test_fmax": 0.997106, "test_pmax": 0.998659, "test_rmax": 0.995558, "test_tmax": 0.46, "lr": 3.803058350100604e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0019526444078084835, "train_cur_epoch_loss": 0.7599645497635439, "train_cur_epoch_avg_loss": 0.00015278740445587934, "train_cur_epoch_time": 1217.010044336319, "train_cur_epoch_avg_time": 0.24467431530685946, "epoch": 31, "step": 154194} ################################################## Training, Epoch: 0032, Batch: 000806, Sample Num: 12896, Cur Loss: 0.00000445, Cur Avg Loss: 0.00016737, Log Avg loss: 0.00018036, Global Avg Loss: 0.00194336, Time: 0.2167 Steps: 155000, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001806, Sample Num: 28896, Cur Loss: 0.00000000, Cur Avg Loss: 0.00012426, Log Avg loss: 0.00008952, Global Avg Loss: 0.00193148, Time: 0.2136 Steps: 156000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002806, Sample Num: 44896, Cur Loss: 0.00000215, Cur Avg Loss: 0.00013464, Log Avg loss: 0.00015339, Global Avg Loss: 0.00192015, Time: 0.3515 Steps: 157000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003806, Sample Num: 60896, Cur Loss: 0.00002234, Cur Avg Loss: 0.00014066, Log Avg loss: 0.00015756, Global Avg Loss: 0.00190900, Time: 0.2185 Steps: 158000, Updated lr: 0.000036 Training, Epoch: 0032, Batch: 004806, Sample Num: 76896, Cur Loss: 0.00013535, Cur Avg Loss: 0.00014467, Log Avg loss: 0.00015991, Global Avg Loss: 0.00189800, Time: 0.2162 Steps: 159000, Updated lr: 0.000036 ***** Running evaluation checkpoint-159168 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-159168 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1240.349366, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001197, "eval_total_loss": 1.275488, "eval_acc": 0.999817, "eval_jaccard": 0.993631, "eval_prec": 0.994602, "eval_recall": 0.994854, "eval_f1": 0.994376, "eval_pr_auc": 0.997892, "eval_roc_auc": 0.999604, "eval_fmax": 0.996923, "eval_pmax": 0.998138, "eval_rmax": 0.995711, "eval_tmax": 0.13, "update_flag": false, "test_avg_loss": 0.001405, "test_total_loss": 1.498031, "test_acc": 0.999847, "test_jaccard": 0.993961, "test_prec": 0.994582, "test_recall": 0.995333, "test_f1": 0.994666, "test_pr_auc": 0.997978, "test_roc_auc": 0.999436, "test_fmax": 0.997129, "test_pmax": 0.998052, "test_rmax": 0.996208, "test_tmax": 0.11, "lr": 3.602897384305835e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.001896150307461999, "train_cur_epoch_loss": 0.7204003204899829, "train_cur_epoch_avg_loss": 0.0001448331967209455, "train_cur_epoch_time": 1240.3493661880493, "train_cur_epoch_avg_time": 0.24936657945075377, "epoch": 32, "step": 159168} ################################################## Training, Epoch: 0033, Batch: 000832, Sample Num: 13312, Cur Loss: 0.00004781, Cur Avg Loss: 0.00014852, Log Avg loss: 0.00014870, Global Avg Loss: 0.00188706, Time: 0.2579 Steps: 160000, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001832, Sample Num: 29312, Cur Loss: 0.00000463, Cur Avg Loss: 0.00011313, Log Avg loss: 0.00008369, Global Avg Loss: 0.00187586, Time: 0.2247 Steps: 161000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002832, Sample Num: 45312, Cur Loss: 0.00000223, Cur Avg Loss: 0.00013063, Log Avg loss: 0.00016269, Global Avg Loss: 0.00186529, Time: 0.2229 Steps: 162000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003832, Sample Num: 61312, Cur Loss: 0.00000142, Cur Avg Loss: 0.00013526, Log Avg loss: 0.00014836, Global Avg Loss: 0.00185475, Time: 0.3373 Steps: 163000, Updated lr: 0.000034 Training, Epoch: 0033, Batch: 004832, Sample Num: 77312, Cur Loss: 0.00000285, Cur Avg Loss: 0.00014167, Log Avg loss: 0.00016622, Global Avg Loss: 0.00184446, Time: 0.2548 Steps: 164000, Updated lr: 0.000034 ***** Running evaluation checkpoint-164142 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-164142 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1252.769878, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001206, "eval_total_loss": 1.2853, "eval_acc": 0.999806, "eval_jaccard": 0.993402, "eval_prec": 0.994465, "eval_recall": 0.994762, "eval_f1": 0.994238, "eval_pr_auc": 0.997875, "eval_roc_auc": 0.999607, "eval_fmax": 0.996904, "eval_pmax": 0.998138, "eval_rmax": 0.995673, "eval_tmax": 0.14, "update_flag": false, "test_avg_loss": 0.00142, "test_total_loss": 1.513574, "test_acc": 0.999841, "test_jaccard": 0.993774, "test_prec": 0.994408, "test_recall": 0.995196, "test_f1": 0.994504, "test_pr_auc": 0.997926, "test_roc_auc": 0.999428, "test_fmax": 0.997109, "test_pmax": 0.998119, "test_rmax": 0.9961, "test_tmax": 0.14, "lr": 3.402736418511067e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0018430028096563518, "train_cur_epoch_loss": 0.7077150445019394, "train_cur_epoch_avg_loss": 0.00014228287987574174, "train_cur_epoch_time": 1252.7698783874512, "train_cur_epoch_avg_time": 0.25186366674456195, "epoch": 33, "step": 164142} ################################################## Training, Epoch: 0034, Batch: 000858, Sample Num: 13728, Cur Loss: 0.00001096, Cur Avg Loss: 0.00016475, Log Avg loss: 0.00016453, Global Avg Loss: 0.00183428, Time: 0.3286 Steps: 165000, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001858, Sample Num: 29728, Cur Loss: 0.00000335, Cur Avg Loss: 0.00011814, Log Avg loss: 0.00007814, Global Avg Loss: 0.00182370, Time: 0.2164 Steps: 166000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002858, Sample Num: 45728, Cur Loss: 0.00000004, Cur Avg Loss: 0.00013483, Log Avg loss: 0.00016585, Global Avg Loss: 0.00181377, Time: 0.2185 Steps: 167000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003858, Sample Num: 61728, Cur Loss: 0.00000693, Cur Avg Loss: 0.00013874, Log Avg loss: 0.00014992, Global Avg Loss: 0.00180387, Time: 0.3413 Steps: 168000, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004858, Sample Num: 77728, Cur Loss: 0.00000134, Cur Avg Loss: 0.00014488, Log Avg loss: 0.00016857, Global Avg Loss: 0.00179419, Time: 0.3466 Steps: 169000, Updated lr: 0.000032 ***** Running evaluation checkpoint-169116 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-169116 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1240.625651, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001214, "eval_total_loss": 1.294514, "eval_acc": 0.999812, "eval_jaccard": 0.993452, "eval_prec": 0.994492, "eval_recall": 0.994679, "eval_f1": 0.994227, "eval_pr_auc": 0.997909, "eval_roc_auc": 0.999585, "eval_fmax": 0.996904, "eval_pmax": 0.998236, "eval_rmax": 0.995575, "eval_tmax": 0.16, "update_flag": false, "test_avg_loss": 0.001429, "test_total_loss": 1.523807, "test_acc": 0.999842, "test_jaccard": 0.993773, "test_prec": 0.994433, "test_recall": 0.995177, "test_f1": 0.9945, "test_pr_auc": 0.997918, "test_roc_auc": 0.999426, "test_fmax": 0.997166, "test_pmax": 0.998155, "test_rmax": 0.996179, "test_tmax": 0.1, "lr": 3.202575452716298e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0017930998893872251, "train_cur_epoch_loss": 0.7277137109980392, "train_cur_epoch_avg_loss": 0.0001463035205062403, "train_cur_epoch_time": 1240.625650882721, "train_cur_epoch_avg_time": 0.2494221252277284, "epoch": 34, "step": 169116} ################################################## Training, Epoch: 0035, Batch: 000884, Sample Num: 14144, Cur Loss: 0.00001083, Cur Avg Loss: 0.00015050, Log Avg loss: 0.00015692, Global Avg Loss: 0.00178456, Time: 0.1995 Steps: 170000, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001884, Sample Num: 30144, Cur Loss: 0.00000952, Cur Avg Loss: 0.00011720, Log Avg loss: 0.00008776, Global Avg Loss: 0.00177464, Time: 0.0905 Steps: 171000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002884, Sample Num: 46144, Cur Loss: 0.00000515, Cur Avg Loss: 0.00013857, Log Avg loss: 0.00017884, Global Avg Loss: 0.00176536, Time: 0.2063 Steps: 172000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003884, Sample Num: 62144, Cur Loss: 0.00000031, Cur Avg Loss: 0.00013396, Log Avg loss: 0.00012068, Global Avg Loss: 0.00175585, Time: 0.2185 Steps: 173000, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004884, Sample Num: 78144, Cur Loss: 0.00002523, Cur Avg Loss: 0.00013953, Log Avg loss: 0.00016116, Global Avg Loss: 0.00174669, Time: 0.2182 Steps: 174000, Updated lr: 0.000030 ***** Running evaluation checkpoint-174090 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-174090 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1233.739642, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001224, "eval_total_loss": 1.304405, "eval_acc": 0.999809, "eval_jaccard": 0.993354, "eval_prec": 0.994388, "eval_recall": 0.994685, "eval_f1": 0.994161, "eval_pr_auc": 0.997822, "eval_roc_auc": 0.999579, "eval_fmax": 0.996947, "eval_pmax": 0.997795, "eval_rmax": 0.9961, "eval_tmax": 0.05, "update_flag": false, "test_avg_loss": 0.001432, "test_total_loss": 1.526045, "test_acc": 0.999846, "test_jaccard": 0.993949, "test_prec": 0.994589, "test_recall": 0.995372, "test_f1": 0.994678, "test_pr_auc": 0.997929, "test_roc_auc": 0.999436, "test_fmax": 0.997166, "test_pmax": 0.998018, "test_rmax": 0.996315, "test_tmax": 0.09, "lr": 3.0024144869215293e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.001745921786934198, "train_cur_epoch_loss": 0.7056429937649626, "train_cur_epoch_avg_loss": 0.00014186630353135558, "train_cur_epoch_time": 1233.7396416664124, "train_cur_epoch_avg_time": 0.24803772450068604, "epoch": 35, "step": 174090} ################################################## Training, Epoch: 0036, Batch: 000910, Sample Num: 14560, Cur Loss: 0.00001563, Cur Avg Loss: 0.00016301, Log Avg loss: 0.00017251, Global Avg Loss: 0.00173769, Time: 0.2181 Steps: 175000, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001910, Sample Num: 30560, Cur Loss: 0.00000489, Cur Avg Loss: 0.00012179, Log Avg loss: 0.00008428, Global Avg Loss: 0.00172830, Time: 0.2177 Steps: 176000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002910, Sample Num: 46560, Cur Loss: 0.00007200, Cur Avg Loss: 0.00014280, Log Avg loss: 0.00018294, Global Avg Loss: 0.00171957, Time: 0.2189 Steps: 177000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003910, Sample Num: 62560, Cur Loss: 0.00000049, Cur Avg Loss: 0.00014009, Log Avg loss: 0.00013221, Global Avg Loss: 0.00171065, Time: 0.0900 Steps: 178000, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004910, Sample Num: 78560, Cur Loss: 0.00000219, Cur Avg Loss: 0.00014199, Log Avg loss: 0.00014938, Global Avg Loss: 0.00170193, Time: 0.2246 Steps: 179000, Updated lr: 0.000028 ***** Running evaluation checkpoint-179064 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-179064 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1276.413895, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001232, "eval_total_loss": 1.313496, "eval_acc": 0.999806, "eval_jaccard": 0.993233, "eval_prec": 0.994301, "eval_recall": 0.994592, "eval_f1": 0.994065, "eval_pr_auc": 0.99789, "eval_roc_auc": 0.999582, "eval_fmax": 0.996824, "eval_pmax": 0.998124, "eval_rmax": 0.995528, "eval_tmax": 0.13, "update_flag": false, "test_avg_loss": 0.001444, "test_total_loss": 1.539167, "test_acc": 0.999839, "test_jaccard": 0.993758, "test_prec": 0.994423, "test_recall": 0.995162, "test_f1": 0.994495, "test_pr_auc": 0.997909, "test_roc_auc": 0.999423, "test_fmax": 0.997072, "test_pmax": 0.997918, "test_rmax": 0.996227, "test_tmax": 0.07, "lr": 2.802253521126761e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0017014221844812093, "train_cur_epoch_loss": 0.7159381545689412, "train_cur_epoch_avg_loss": 0.0001439360986266468, "train_cur_epoch_time": 1276.413895368576, "train_cur_epoch_avg_time": 0.2566171884536743, "epoch": 36, "step": 179064} ################################################## Training, Epoch: 0037, Batch: 000936, Sample Num: 14976, Cur Loss: 0.00001675, Cur Avg Loss: 0.00013788, Log Avg loss: 0.00014785, Global Avg Loss: 0.00169329, Time: 0.2194 Steps: 180000, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 001936, Sample Num: 30976, Cur Loss: 0.00000141, Cur Avg Loss: 0.00010827, Log Avg loss: 0.00008054, Global Avg Loss: 0.00168438, Time: 0.3395 Steps: 181000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002936, Sample Num: 46976, Cur Loss: 0.00000225, Cur Avg Loss: 0.00012776, Log Avg loss: 0.00016551, Global Avg Loss: 0.00167604, Time: 0.3212 Steps: 182000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003936, Sample Num: 62976, Cur Loss: 0.00000013, Cur Avg Loss: 0.00012479, Log Avg loss: 0.00011606, Global Avg Loss: 0.00166751, Time: 0.1373 Steps: 183000, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004936, Sample Num: 78976, Cur Loss: 0.00000105, Cur Avg Loss: 0.00012908, Log Avg loss: 0.00014598, Global Avg Loss: 0.00165924, Time: 0.3246 Steps: 184000, Updated lr: 0.000026 ***** Running evaluation checkpoint-184038 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-184038 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1212.650225, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001255, "eval_total_loss": 1.337353, "eval_acc": 0.999808, "eval_jaccard": 0.993349, "eval_prec": 0.994377, "eval_recall": 0.994692, "eval_f1": 0.994159, "eval_pr_auc": 0.997899, "eval_roc_auc": 0.999563, "eval_fmax": 0.996894, "eval_pmax": 0.998271, "eval_rmax": 0.995521, "eval_tmax": 0.15, "update_flag": false, "test_avg_loss": 0.001443, "test_total_loss": 1.538589, "test_acc": 0.999839, "test_jaccard": 0.993763, "test_prec": 0.994413, "test_recall": 0.995128, "test_f1": 0.994475, "test_pr_auc": 0.997865, "test_roc_auc": 0.999438, "test_fmax": 0.997232, "test_pmax": 0.998141, "test_rmax": 0.996325, "test_tmax": 0.09, "lr": 2.6020925553319918e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.001658976090485939, "train_cur_epoch_loss": 0.6511796989067533, "train_cur_epoch_avg_loss": 0.00013091670665596166, "train_cur_epoch_time": 1212.6502249240875, "train_cur_epoch_avg_time": 0.24379779351107508, "epoch": 37, "step": 184038} ################################################## Training, Epoch: 0038, Batch: 000962, Sample Num: 15392, Cur Loss: 0.00000450, Cur Avg Loss: 0.00013702, Log Avg loss: 0.00014583, Global Avg Loss: 0.00165106, Time: 0.2181 Steps: 185000, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 001962, Sample Num: 31392, Cur Loss: 0.00000006, Cur Avg Loss: 0.00011050, Log Avg loss: 0.00008499, Global Avg Loss: 0.00164264, Time: 0.2189 Steps: 186000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002962, Sample Num: 47392, Cur Loss: 0.00000536, Cur Avg Loss: 0.00014090, Log Avg loss: 0.00020054, Global Avg Loss: 0.00163493, Time: 0.2198 Steps: 187000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003962, Sample Num: 63392, Cur Loss: 0.00005958, Cur Avg Loss: 0.00013921, Log Avg loss: 0.00013421, Global Avg Loss: 0.00162695, Time: 0.2939 Steps: 188000, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004962, Sample Num: 79392, Cur Loss: 0.00003006, Cur Avg Loss: 0.00014248, Log Avg loss: 0.00015545, Global Avg Loss: 0.00161916, Time: 0.2189 Steps: 189000, Updated lr: 0.000024 ***** Running evaluation checkpoint-189012 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-189012 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1231.665205, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001244, "eval_total_loss": 1.325681, "eval_acc": 0.999812, "eval_jaccard": 0.993442, "eval_prec": 0.994469, "eval_recall": 0.994752, "eval_f1": 0.994243, "eval_pr_auc": 0.997783, "eval_roc_auc": 0.999584, "eval_fmax": 0.996861, "eval_pmax": 0.998304, "eval_rmax": 0.995422, "eval_tmax": 0.21, "update_flag": false, "test_avg_loss": 0.001466, "test_total_loss": 1.563186, "test_acc": 0.99984, "test_jaccard": 0.993587, "test_prec": 0.994179, "test_recall": 0.995011, "test_f1": 0.994297, "test_pr_auc": 0.997868, "test_roc_auc": 0.999429, "test_fmax": 0.997113, "test_pmax": 0.998158, "test_rmax": 0.996071, "test_tmax": 0.09, "lr": 2.4019315895372237e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00161907971689112, "train_cur_epoch_loss": 0.7108537081719803, "train_cur_epoch_avg_loss": 0.0001429138938825855, "train_cur_epoch_time": 1231.6652047634125, "train_cur_epoch_avg_time": 0.24762066842851074, "epoch": 38, "step": 189012} ################################################## Training, Epoch: 0039, Batch: 000988, Sample Num: 15808, Cur Loss: 0.00000063, Cur Avg Loss: 0.00012915, Log Avg loss: 0.00013145, Global Avg Loss: 0.00161133, Time: 0.2192 Steps: 190000, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 001988, Sample Num: 31808, Cur Loss: 0.00000238, Cur Avg Loss: 0.00010666, Log Avg loss: 0.00008445, Global Avg Loss: 0.00160334, Time: 0.1463 Steps: 191000, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002988, Sample Num: 47808, Cur Loss: 0.00000731, Cur Avg Loss: 0.00013678, Log Avg loss: 0.00019666, Global Avg Loss: 0.00159601, Time: 0.3101 Steps: 192000, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003988, Sample Num: 63808, Cur Loss: 0.00000151, Cur Avg Loss: 0.00013527, Log Avg loss: 0.00013075, Global Avg Loss: 0.00158842, Time: 0.2174 Steps: 193000, Updated lr: 0.000022 ***** Running evaluation checkpoint-193986 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-193986 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1257.967635, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.00124, "eval_total_loss": 1.321709, "eval_acc": 0.999813, "eval_jaccard": 0.993473, "eval_prec": 0.994439, "eval_recall": 0.994812, "eval_f1": 0.994264, "eval_pr_auc": 0.997867, "eval_roc_auc": 0.999583, "eval_fmax": 0.996895, "eval_pmax": 0.998285, "eval_rmax": 0.99551, "eval_tmax": 0.16, "update_flag": false, "test_avg_loss": 0.001463, "test_total_loss": 1.55913, "test_acc": 0.999843, "test_jaccard": 0.993802, "test_prec": 0.994413, "test_recall": 0.995284, "test_f1": 0.994544, "test_pr_auc": 0.997844, "test_roc_auc": 0.999441, "test_fmax": 0.997075, "test_pmax": 0.997943, "test_rmax": 0.996208, "test_tmax": 0.08, "lr": 2.201770623742455e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0015811387628739653, "train_cur_epoch_loss": 0.6932886058449865, "train_cur_epoch_avg_loss": 0.0001393825102221525, "train_cur_epoch_time": 1257.9676351547241, "train_cur_epoch_avg_time": 0.2529086520214564, "epoch": 39, "step": 193986} ################################################## Training, Epoch: 0040, Batch: 000014, Sample Num: 224, Cur Loss: 0.00001826, Cur Avg Loss: 0.00001395, Log Avg loss: 0.00015403, Global Avg Loss: 0.00158103, Time: 0.2168 Steps: 194000, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001014, Sample Num: 16224, Cur Loss: 0.00000116, Cur Avg Loss: 0.00012411, Log Avg loss: 0.00012565, Global Avg Loss: 0.00157356, Time: 0.2488 Steps: 195000, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 002014, Sample Num: 32224, Cur Loss: 0.00000164, Cur Avg Loss: 0.00011005, Log Avg loss: 0.00009579, Global Avg Loss: 0.00156602, Time: 0.2177 Steps: 196000, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003014, Sample Num: 48224, Cur Loss: 0.00000196, Cur Avg Loss: 0.00013344, Log Avg loss: 0.00018054, Global Avg Loss: 0.00155899, Time: 0.3439 Steps: 197000, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 004014, Sample Num: 64224, Cur Loss: 0.00000014, Cur Avg Loss: 0.00012921, Log Avg loss: 0.00011645, Global Avg Loss: 0.00155170, Time: 0.1193 Steps: 198000, Updated lr: 0.000020 ***** Running evaluation checkpoint-198960 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-198960 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1222.814029, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001256, "eval_total_loss": 1.339302, "eval_acc": 0.99981, "eval_jaccard": 0.993514, "eval_prec": 0.994543, "eval_recall": 0.994808, "eval_f1": 0.99431, "eval_pr_auc": 0.99782, "eval_roc_auc": 0.999575, "eval_fmax": 0.996887, "eval_pmax": 0.998334, "eval_rmax": 0.995446, "eval_tmax": 0.19, "update_flag": false, "test_avg_loss": 0.001479, "test_total_loss": 1.576746, "test_acc": 0.999842, "test_jaccard": 0.993675, "test_prec": 0.994286, "test_recall": 0.99505, "test_f1": 0.994379, "test_pr_auc": 0.997784, "test_roc_auc": 0.999435, "test_fmax": 0.997119, "test_pmax": 0.998111, "test_rmax": 0.99613, "test_tmax": 0.11, "lr": 2.001609657947686e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0015449463806268065, "train_cur_epoch_loss": 0.6637478346409009, "train_cur_epoch_avg_loss": 0.0001334434729877163, "train_cur_epoch_time": 1222.8140292167664, "train_cur_epoch_avg_time": 0.24584117997924534, "epoch": 40, "step": 198960} ################################################## Training, Epoch: 0041, Batch: 000040, Sample Num: 640, Cur Loss: 0.00027470, Cur Avg Loss: 0.00014056, Log Avg loss: 0.00015074, Global Avg Loss: 0.00154466, Time: 0.2187 Steps: 199000, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00000464, Cur Avg Loss: 0.00011655, Log Avg loss: 0.00011559, Global Avg Loss: 0.00153752, Time: 0.2182 Steps: 200000, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00000475, Cur Avg Loss: 0.00011152, Log Avg loss: 0.00010630, Global Avg Loss: 0.00153040, Time: 0.1975 Steps: 201000, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00000142, Cur Avg Loss: 0.00013019, Log Avg loss: 0.00016826, Global Avg Loss: 0.00152365, Time: 0.2400 Steps: 202000, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00000396, Cur Avg Loss: 0.00012664, Log Avg loss: 0.00011586, Global Avg Loss: 0.00151672, Time: 0.2201 Steps: 203000, Updated lr: 0.000018 ***** Running evaluation checkpoint-203934 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-203934 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1214.045251, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001254, "eval_total_loss": 1.337093, "eval_acc": 0.999814, "eval_jaccard": 0.993576, "eval_prec": 0.994641, "eval_recall": 0.994772, "eval_f1": 0.994346, "eval_pr_auc": 0.997809, "eval_roc_auc": 0.99958, "eval_fmax": 0.996903, "eval_pmax": 0.998393, "eval_rmax": 0.995418, "eval_tmax": 0.19, "update_flag": false, "test_avg_loss": 0.001468, "test_total_loss": 1.56523, "test_acc": 0.999843, "test_jaccard": 0.993753, "test_prec": 0.994442, "test_recall": 0.995089, "test_f1": 0.994469, "test_pr_auc": 0.99788, "test_roc_auc": 0.999438, "test_fmax": 0.997126, "test_pmax": 0.997968, "test_rmax": 0.996286, "test_tmax": 0.04, "lr": 1.8014486921529177e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0015104449760518278, "train_cur_epoch_loss": 0.6485538566450916, "train_cur_epoch_avg_loss": 0.00013038879305289337, "train_cur_epoch_time": 1214.0452511310577, "train_cur_epoch_avg_time": 0.24407825716346154, "epoch": 41, "step": 203934} ################################################## Training, Epoch: 0042, Batch: 000066, Sample Num: 1056, Cur Loss: 0.00000298, Cur Avg Loss: 0.00012908, Log Avg loss: 0.00014545, Global Avg Loss: 0.00151000, Time: 0.1290 Steps: 204000, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 001066, Sample Num: 17056, Cur Loss: 0.00004905, Cur Avg Loss: 0.00012247, Log Avg loss: 0.00012204, Global Avg Loss: 0.00150323, Time: 0.2194 Steps: 205000, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 002066, Sample Num: 33056, Cur Loss: 0.00500117, Cur Avg Loss: 0.00010882, Log Avg loss: 0.00009427, Global Avg Loss: 0.00149639, Time: 0.3147 Steps: 206000, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003066, Sample Num: 49056, Cur Loss: 0.00000085, Cur Avg Loss: 0.00012726, Log Avg loss: 0.00016534, Global Avg Loss: 0.00148996, Time: 0.2174 Steps: 207000, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 004066, Sample Num: 65056, Cur Loss: 0.00000611, Cur Avg Loss: 0.00012307, Log Avg loss: 0.00011023, Global Avg Loss: 0.00148332, Time: 0.2807 Steps: 208000, Updated lr: 0.000016 ***** Running evaluation checkpoint-208908 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-208908 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1226.517418, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001256, "eval_total_loss": 1.338957, "eval_acc": 0.999812, "eval_jaccard": 0.993512, "eval_prec": 0.994601, "eval_recall": 0.994747, "eval_f1": 0.994306, "eval_pr_auc": 0.997735, "eval_roc_auc": 0.999577, "eval_fmax": 0.996884, "eval_pmax": 0.998632, "eval_rmax": 0.995143, "eval_tmax": 0.42, "update_flag": false, "test_avg_loss": 0.00146, "test_total_loss": 1.556605, "test_acc": 0.999843, "test_jaccard": 0.993792, "test_prec": 0.994462, "test_recall": 0.995167, "test_f1": 0.994516, "test_pr_auc": 0.997778, "test_roc_auc": 0.999432, "test_fmax": 0.997114, "test_pmax": 0.997865, "test_rmax": 0.996364, "test_tmax": 0.04, "lr": 1.601287726358149e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.001477540007562465, "train_cur_epoch_loss": 0.6388421537054448, "train_cur_epoch_avg_loss": 0.00012843629949848106, "train_cur_epoch_time": 1226.5174181461334, "train_cur_epoch_avg_time": 0.24658572942222223, "epoch": 42, "step": 208908} ################################################## Training, Epoch: 0043, Batch: 000092, Sample Num: 1472, Cur Loss: 0.00000221, Cur Avg Loss: 0.00021343, Log Avg loss: 0.00015808, Global Avg Loss: 0.00147698, Time: 0.2192 Steps: 209000, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 001092, Sample Num: 17472, Cur Loss: 0.00002436, Cur Avg Loss: 0.00011613, Log Avg loss: 0.00010718, Global Avg Loss: 0.00147046, Time: 0.3887 Steps: 210000, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 002092, Sample Num: 33472, Cur Loss: 0.00001009, Cur Avg Loss: 0.00010143, Log Avg loss: 0.00008537, Global Avg Loss: 0.00146390, Time: 0.6517 Steps: 211000, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003092, Sample Num: 49472, Cur Loss: 0.00004920, Cur Avg Loss: 0.00012040, Log Avg loss: 0.00016010, Global Avg Loss: 0.00145775, Time: 0.2167 Steps: 212000, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 004092, Sample Num: 65472, Cur Loss: 0.00010663, Cur Avg Loss: 0.00011785, Log Avg loss: 0.00010998, Global Avg Loss: 0.00145142, Time: 0.2456 Steps: 213000, Updated lr: 0.000014 ***** Running evaluation checkpoint-213882 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-213882 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1247.750675, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001249, "eval_total_loss": 1.331669, "eval_acc": 0.999814, "eval_jaccard": 0.993465, "eval_prec": 0.994585, "eval_recall": 0.994658, "eval_f1": 0.994255, "eval_pr_auc": 0.997909, "eval_roc_auc": 0.999592, "eval_fmax": 0.996875, "eval_pmax": 0.998079, "eval_rmax": 0.995673, "eval_tmax": 0.08, "update_flag": false, "test_avg_loss": 0.001473, "test_total_loss": 1.570708, "test_acc": 0.99984, "test_jaccard": 0.993763, "test_prec": 0.994442, "test_recall": 0.995216, "test_f1": 0.994516, "test_pr_auc": 0.997799, "test_roc_auc": 0.999439, "test_fmax": 0.99718, "test_pmax": 0.997938, "test_rmax": 0.996423, "test_tmax": 0.04, "lr": 1.4011267605633804e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0014460646944206355, "train_cur_epoch_loss": 0.6172810722155783, "train_cur_epoch_avg_loss": 0.00012410154246392808, "train_cur_epoch_time": 1247.7506754398346, "train_cur_epoch_avg_time": 0.25085457889823776, "epoch": 43, "step": 213882} ################################################## Training, Epoch: 0044, Batch: 000118, Sample Num: 1888, Cur Loss: 0.00000011, Cur Avg Loss: 0.00012667, Log Avg loss: 0.00014997, Global Avg Loss: 0.00144534, Time: 0.3288 Steps: 214000, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 001118, Sample Num: 17888, Cur Loss: 0.00000086, Cur Avg Loss: 0.00012101, Log Avg loss: 0.00012034, Global Avg Loss: 0.00143917, Time: 0.2194 Steps: 215000, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 002118, Sample Num: 33888, Cur Loss: 0.00000604, Cur Avg Loss: 0.00011034, Log Avg loss: 0.00009842, Global Avg Loss: 0.00143297, Time: 0.2245 Steps: 216000, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003118, Sample Num: 49888, Cur Loss: 0.00000938, Cur Avg Loss: 0.00012650, Log Avg loss: 0.00016073, Global Avg Loss: 0.00142710, Time: 0.2200 Steps: 217000, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 004118, Sample Num: 65888, Cur Loss: 0.00000031, Cur Avg Loss: 0.00012179, Log Avg loss: 0.00010707, Global Avg Loss: 0.00142105, Time: 0.2195 Steps: 218000, Updated lr: 0.000012 ***** Running evaluation checkpoint-218856 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-218856 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1232.950922, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001247, "eval_total_loss": 1.329556, "eval_acc": 0.999814, "eval_jaccard": 0.993491, "eval_prec": 0.994592, "eval_recall": 0.994736, "eval_f1": 0.994289, "eval_pr_auc": 0.99786, "eval_roc_auc": 0.999591, "eval_fmax": 0.996923, "eval_pmax": 0.99851, "eval_rmax": 0.995341, "eval_tmax": 0.22, "update_flag": false, "test_avg_loss": 0.001469, "test_total_loss": 1.565945, "test_acc": 0.999843, "test_jaccard": 0.993802, "test_prec": 0.994472, "test_recall": 0.995226, "test_f1": 0.994538, "test_pr_auc": 0.997794, "test_roc_auc": 0.999431, "test_fmax": 0.997105, "test_pmax": 0.997866, "test_rmax": 0.996345, "test_tmax": 0.05, "lr": 1.2009657947686118e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.001416095757408628, "train_cur_epoch_loss": 0.6338441113488154, "train_cur_epoch_avg_loss": 0.00012743146589240357, "train_cur_epoch_time": 1232.9509217739105, "train_cur_epoch_avg_time": 0.2478791559658043, "epoch": 44, "step": 218856} ################################################## Training, Epoch: 0045, Batch: 000144, Sample Num: 2304, Cur Loss: 0.00000009, Cur Avg Loss: 0.00011778, Log Avg loss: 0.00014929, Global Avg Loss: 0.00141524, Time: 0.2140 Steps: 219000, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 001144, Sample Num: 18304, Cur Loss: 0.00350107, Cur Avg Loss: 0.00012046, Log Avg loss: 0.00012084, Global Avg Loss: 0.00140936, Time: 0.2195 Steps: 220000, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 002144, Sample Num: 34304, Cur Loss: 0.00000328, Cur Avg Loss: 0.00010820, Log Avg loss: 0.00009417, Global Avg Loss: 0.00140341, Time: 0.3484 Steps: 221000, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003144, Sample Num: 50304, Cur Loss: 0.00000661, Cur Avg Loss: 0.00012094, Log Avg loss: 0.00014825, Global Avg Loss: 0.00139775, Time: 0.4364 Steps: 222000, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 004144, Sample Num: 66304, Cur Loss: 0.00006308, Cur Avg Loss: 0.00011686, Log Avg loss: 0.00010404, Global Avg Loss: 0.00139195, Time: 0.5464 Steps: 223000, Updated lr: 0.000010 ***** Running evaluation checkpoint-223830 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-223830 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1227.140173, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001251, "eval_total_loss": 1.333994, "eval_acc": 0.999814, "eval_jaccard": 0.993478, "eval_prec": 0.994555, "eval_recall": 0.994759, "eval_f1": 0.994285, "eval_pr_auc": 0.997838, "eval_roc_auc": 0.999584, "eval_fmax": 0.996928, "eval_pmax": 0.998353, "eval_rmax": 0.995507, "eval_tmax": 0.18, "update_flag": false, "test_avg_loss": 0.00148, "test_total_loss": 1.577707, "test_acc": 0.999841, "test_jaccard": 0.993685, "test_prec": 0.994335, "test_recall": 0.995128, "test_f1": 0.994426, "test_pr_auc": 0.997803, "test_roc_auc": 0.999426, "test_fmax": 0.997094, "test_pmax": 0.997845, "test_rmax": 0.996345, "test_tmax": 0.04, "lr": 1.000804828973843e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0013873891860784844, "train_cur_epoch_loss": 0.6182684365242097, "train_cur_epoch_avg_loss": 0.00012430004755211292, "train_cur_epoch_time": 1227.1401734352112, "train_cur_epoch_avg_time": 0.24671093153100346, "epoch": 45, "step": 223830} ################################################## Training, Epoch: 0046, Batch: 000170, Sample Num: 2720, Cur Loss: 0.00000376, Cur Avg Loss: 0.00008954, Log Avg loss: 0.00014922, Global Avg Loss: 0.00138640, Time: 0.4237 Steps: 224000, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 001170, Sample Num: 18720, Cur Loss: 0.00000204, Cur Avg Loss: 0.00011460, Log Avg loss: 0.00011886, Global Avg Loss: 0.00138077, Time: 0.2202 Steps: 225000, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 002170, Sample Num: 34720, Cur Loss: 0.00000256, Cur Avg Loss: 0.00010324, Log Avg loss: 0.00008996, Global Avg Loss: 0.00137506, Time: 0.3200 Steps: 226000, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003170, Sample Num: 50720, Cur Loss: 0.00000879, Cur Avg Loss: 0.00011786, Log Avg loss: 0.00014956, Global Avg Loss: 0.00136966, Time: 0.2149 Steps: 227000, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 004170, Sample Num: 66720, Cur Loss: 0.00000269, Cur Avg Loss: 0.00011421, Log Avg loss: 0.00010266, Global Avg Loss: 0.00136410, Time: 0.2194 Steps: 228000, Updated lr: 0.000008 ***** Running evaluation checkpoint-228804 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-228804 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1233.148472, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001252, "eval_total_loss": 1.334836, "eval_acc": 0.999818, "eval_jaccard": 0.9936, "eval_prec": 0.994609, "eval_recall": 0.994768, "eval_f1": 0.99434, "eval_pr_auc": 0.99779, "eval_roc_auc": 0.999578, "eval_fmax": 0.996901, "eval_pmax": 0.998187, "eval_rmax": 0.995619, "eval_tmax": 0.1, "update_flag": false, "test_avg_loss": 0.001479, "test_total_loss": 1.577126, "test_acc": 0.99984, "test_jaccard": 0.993714, "test_prec": 0.994354, "test_recall": 0.995079, "test_f1": 0.994418, "test_pr_auc": 0.997816, "test_roc_auc": 0.999425, "test_fmax": 0.997124, "test_pmax": 0.997884, "test_rmax": 0.996364, "test_tmax": 0.04, "lr": 8.006438631790744e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.001359821130035991, "train_cur_epoch_loss": 0.5931923168067837, "train_cur_epoch_avg_loss": 0.00011925860812359947, "train_cur_epoch_time": 1233.148472070694, "train_cur_epoch_avg_time": 0.2479188725514061, "epoch": 46, "step": 228804} ################################################## Training, Epoch: 0047, Batch: 000196, Sample Num: 3136, Cur Loss: 0.00000298, Cur Avg Loss: 0.00013393, Log Avg loss: 0.00014318, Global Avg Loss: 0.00135877, Time: 0.3565 Steps: 229000, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 001196, Sample Num: 19136, Cur Loss: 0.00000269, Cur Avg Loss: 0.00010618, Log Avg loss: 0.00010074, Global Avg Loss: 0.00135330, Time: 0.1925 Steps: 230000, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 002196, Sample Num: 35136, Cur Loss: 0.00000040, Cur Avg Loss: 0.00010328, Log Avg loss: 0.00009981, Global Avg Loss: 0.00134788, Time: 0.2196 Steps: 231000, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003196, Sample Num: 51136, Cur Loss: 0.00000148, Cur Avg Loss: 0.00011486, Log Avg loss: 0.00014031, Global Avg Loss: 0.00134267, Time: 0.2171 Steps: 232000, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 004196, Sample Num: 67136, Cur Loss: 0.00075393, Cur Avg Loss: 0.00011112, Log Avg loss: 0.00009914, Global Avg Loss: 0.00133733, Time: 0.2270 Steps: 233000, Updated lr: 0.000006 ***** Running evaluation checkpoint-233778 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-233778 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1243.990026, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001252, "eval_total_loss": 1.334962, "eval_acc": 0.999813, "eval_jaccard": 0.993559, "eval_prec": 0.994575, "eval_recall": 0.99482, "eval_f1": 0.994333, "eval_pr_auc": 0.997877, "eval_roc_auc": 0.999583, "eval_fmax": 0.996881, "eval_pmax": 0.998365, "eval_rmax": 0.995402, "eval_tmax": 0.25, "update_flag": false, "test_avg_loss": 0.001474, "test_total_loss": 1.571748, "test_acc": 0.999837, "test_jaccard": 0.993656, "test_prec": 0.994335, "test_recall": 0.99504, "test_f1": 0.994381, "test_pr_auc": 0.997809, "test_roc_auc": 0.999426, "test_fmax": 0.997036, "test_pmax": 0.997885, "test_rmax": 0.996188, "test_tmax": 0.05, "lr": 6.004828973843059e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.001333408727772938, "train_cur_epoch_loss": 0.5891117245471339, "train_cur_epoch_avg_loss": 0.00011843822367252391, "train_cur_epoch_time": 1243.9900262355804, "train_cur_epoch_avg_time": 0.2500985175383153, "epoch": 47, "step": 233778} ################################################## Training, Epoch: 0048, Batch: 000222, Sample Num: 3552, Cur Loss: 0.00000529, Cur Avg Loss: 0.00010272, Log Avg loss: 0.00014567, Global Avg Loss: 0.00133224, Time: 0.2213 Steps: 234000, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 001222, Sample Num: 19552, Cur Loss: 0.00000698, Cur Avg Loss: 0.00010453, Log Avg loss: 0.00010493, Global Avg Loss: 0.00132702, Time: 0.2210 Steps: 235000, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 002222, Sample Num: 35552, Cur Loss: 0.00002543, Cur Avg Loss: 0.00009507, Log Avg loss: 0.00008352, Global Avg Loss: 0.00132175, Time: 0.3692 Steps: 236000, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003222, Sample Num: 51552, Cur Loss: 0.00001112, Cur Avg Loss: 0.00011129, Log Avg loss: 0.00014733, Global Avg Loss: 0.00131679, Time: 0.2191 Steps: 237000, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 004222, Sample Num: 67552, Cur Loss: 0.00000238, Cur Avg Loss: 0.00010908, Log Avg loss: 0.00010196, Global Avg Loss: 0.00131169, Time: 0.2169 Steps: 238000, Updated lr: 0.000004 ***** Running evaluation checkpoint-238752 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-238752 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1231.343265, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001242, "eval_total_loss": 1.323984, "eval_acc": 0.999817, "eval_jaccard": 0.99361, "eval_prec": 0.994546, "eval_recall": 0.9949, "eval_f1": 0.994369, "eval_pr_auc": 0.997872, "eval_roc_auc": 0.999591, "eval_fmax": 0.996908, "eval_pmax": 0.998505, "eval_rmax": 0.995316, "eval_tmax": 0.33, "update_flag": false, "test_avg_loss": 0.001472, "test_total_loss": 1.56963, "test_acc": 0.99984, "test_jaccard": 0.993685, "test_prec": 0.994325, "test_recall": 0.995079, "test_f1": 0.994397, "test_pr_auc": 0.997803, "test_roc_auc": 0.999425, "test_fmax": 0.997041, "test_pmax": 0.99812, "test_rmax": 0.995964, "test_tmax": 0.12, "lr": 4.003219315895372e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0013080286104456356, "train_cur_epoch_loss": 0.5728212398153458, "train_cur_epoch_avg_loss": 0.00011516309606259465, "train_cur_epoch_time": 1231.34326505661, "train_cur_epoch_avg_time": 0.24755594391970448, "epoch": 48, "step": 238752} ################################################## Training, Epoch: 0049, Batch: 000248, Sample Num: 3968, Cur Loss: 0.00000198, Cur Avg Loss: 0.00009805, Log Avg loss: 0.00013660, Global Avg Loss: 0.00130677, Time: 0.4450 Steps: 239000, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 001248, Sample Num: 19968, Cur Loss: 0.00001160, Cur Avg Loss: 0.00010004, Log Avg loss: 0.00010054, Global Avg Loss: 0.00130175, Time: 0.2380 Steps: 240000, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 002248, Sample Num: 35968, Cur Loss: 0.00002993, Cur Avg Loss: 0.00009097, Log Avg loss: 0.00007965, Global Avg Loss: 0.00129668, Time: 0.2154 Steps: 241000, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003248, Sample Num: 51968, Cur Loss: 0.00001311, Cur Avg Loss: 0.00010757, Log Avg loss: 0.00014488, Global Avg Loss: 0.00129192, Time: 0.2593 Steps: 242000, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 004248, Sample Num: 67968, Cur Loss: 0.00000425, Cur Avg Loss: 0.00010427, Log Avg loss: 0.00009356, Global Avg Loss: 0.00128699, Time: 0.2205 Steps: 243000, Updated lr: 0.000002 ***** Running evaluation checkpoint-243726 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-243726 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1234.308949, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001251, "eval_total_loss": 1.333701, "eval_acc": 0.999818, "eval_jaccard": 0.993649, "eval_prec": 0.994551, "eval_recall": 0.994935, "eval_f1": 0.994395, "eval_pr_auc": 0.997903, "eval_roc_auc": 0.999589, "eval_fmax": 0.996927, "eval_pmax": 0.998544, "eval_rmax": 0.995316, "eval_tmax": 0.33, "update_flag": false, "test_avg_loss": 0.001479, "test_total_loss": 1.576263, "test_acc": 0.999839, "test_jaccard": 0.993656, "test_prec": 0.994276, "test_recall": 0.99504, "test_f1": 0.994359, "test_pr_auc": 0.997799, "test_roc_auc": 0.999419, "test_fmax": 0.997031, "test_pmax": 0.998101, "test_rmax": 0.995964, "test_tmax": 0.11, "lr": 2.001609657947686e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0012835626260363747, "train_cur_epoch_loss": 0.543137792224752, "train_cur_epoch_avg_loss": 0.0001091953743917877, "train_cur_epoch_time": 1234.3089485168457, "train_cur_epoch_avg_time": 0.2481521810448021, "epoch": 49, "step": 243726} ################################################## Training, Epoch: 0050, Batch: 000274, Sample Num: 4384, Cur Loss: 0.00000009, Cur Avg Loss: 0.00009342, Log Avg loss: 0.00012579, Global Avg Loss: 0.00128223, Time: 0.1974 Steps: 244000, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 001274, Sample Num: 20384, Cur Loss: 0.00000131, Cur Avg Loss: 0.00009649, Log Avg loss: 0.00009733, Global Avg Loss: 0.00127739, Time: 0.2200 Steps: 245000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002274, Sample Num: 36384, Cur Loss: 0.00000131, Cur Avg Loss: 0.00009573, Log Avg loss: 0.00009477, Global Avg Loss: 0.00127258, Time: 0.3394 Steps: 246000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003274, Sample Num: 52384, Cur Loss: 0.00000520, Cur Avg Loss: 0.00010844, Log Avg loss: 0.00013733, Global Avg Loss: 0.00126799, Time: 0.2197 Steps: 247000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 004274, Sample Num: 68384, Cur Loss: 0.00000036, Cur Avg Loss: 0.00010404, Log Avg loss: 0.00008963, Global Avg Loss: 0.00126323, Time: 0.2255 Steps: 248000, Updated lr: 0.000000 ***** Running evaluation checkpoint-248700 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-248700 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1079.816034, Avg time per batch (s): 0.220000 {"eval_avg_loss": 0.001257, "eval_total_loss": 1.339484, "eval_acc": 0.999821, "eval_jaccard": 0.993698, "eval_prec": 0.994599, "eval_recall": 0.994935, "eval_f1": 0.994428, "eval_pr_auc": 0.997903, "eval_roc_auc": 0.999587, "eval_fmax": 0.996929, "eval_pmax": 0.998632, "eval_rmax": 0.995232, "eval_tmax": 0.35, "update_flag": false, "test_avg_loss": 0.00148, "test_total_loss": 1.577877, "test_acc": 0.999839, "test_jaccard": 0.993656, "test_prec": 0.994276, "test_recall": 0.99504, "test_f1": 0.994359, "test_pr_auc": 0.997796, "test_roc_auc": 0.999417, "test_fmax": 0.997051, "test_pmax": 0.99814, "test_rmax": 0.995964, "test_tmax": 0.11, "lr": 0.0, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0012600584992392876, "train_cur_epoch_loss": 0.5389641674694667, "train_cur_epoch_avg_loss": 0.00010835628618203996, "train_cur_epoch_time": 1079.816034078598, "train_cur_epoch_avg_time": 0.21709208566115762, "epoch": 50, "step": 248700} ################################################## #########################Best Metric######################### {"epoch": 22, "global_step": 109428, "eval_avg_loss": 0.00109, "eval_total_loss": 1.161707, "eval_acc": 0.999821, "eval_jaccard": 0.993785, "eval_prec": 0.994786, "eval_recall": 0.995176, "eval_f1": 0.994611, "eval_pr_auc": 0.998053, "eval_roc_auc": 0.999582, "eval_fmax": 0.996985, "eval_pmax": 0.997776, "eval_rmax": 0.996195, "eval_tmax": 0.07, "update_flag": true, "test_avg_loss": 0.001307, "test_total_loss": 1.392932, "test_acc": 0.999842, "test_jaccard": 0.993934, "test_prec": 0.994596, "test_recall": 0.995403, "test_f1": 0.994685, "test_pr_auc": 0.997749, "test_roc_auc": 0.999423, "test_fmax": 0.997105, "test_pmax": 0.998058, "test_rmax": 0.996154, "test_tmax": 0.21} ################################################## Total Time: 440019.406857, Avg time per epoch(50 epochs): 8800.390000 ++++++++++++Validation+++++++++++++ best f1 global step: 109428 checkpoint path: ../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135241/checkpoint-109428 ***** Running evaluation checkpoint-109428 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## {"evaluation_avg_loss_109428": 0.00109, "evaluation_total_loss_109428": 1.161707, "evaluation_acc_109428": 0.999821, "evaluation_jaccard_109428": 0.993785, "evaluation_prec_109428": 0.994786, "evaluation_recall_109428": 0.995176, "evaluation_f1_109428": 0.994611, "evaluation_pr_auc_109428": 0.998053, "evaluation_roc_auc_109428": 0.999582, "evaluation_fmax_109428": 0.996985, "evaluation_pmax_109428": 0.997776, "evaluation_rmax_109428": 0.996195, "evaluation_tmax_109428": 0.07} ++++++++++++Testing+++++++++++++ best f1 global step: 109428 checkpoint path: ../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135241/checkpoint-109428 ***** Running testing checkpoint-109428 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## {"evaluation_avg_loss_109428": 0.001307, "evaluation_total_loss_109428": 1.392932, "evaluation_acc_109428": 0.999842, "evaluation_jaccard_109428": 0.993934, "evaluation_prec_109428": 0.994596, "evaluation_recall_109428": 0.995403, "evaluation_f1_109428": 0.994685, "evaluation_pr_auc_109428": 0.997749, "evaluation_roc_auc_109428": 0.999423, "evaluation_fmax_109428": 0.997105, "evaluation_pmax_109428": 0.998058, "evaluation_rmax_109428": 0.996154, "evaluation_tmax_109428": 0.21}