{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "f1", "beta1": 0.9, "beta2": 0.98, "buffer_size": 512, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "VirusEC4", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/VirusEC4/protein/multi_label/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 1152, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "matrix", "intermediate_size": 4096, "label_filepath": "../dataset/VirusEC4/protein/multi_label/label.txt", "label_size": 70, "label_type": "VirusEC4", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": null, "llm_step": null, "llm_task_level": "token_level,span_level,seq_level,structure_level", "llm_time_str": null, "llm_type": "esmc", "llm_version": "600M", "lmdb_path": null, "local_rank": -1, "log_dir": "../logs/VirusEC4/protein/multi_label/luca_base/matrix/20250501135319", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": true, "matrix_dirpath": "../matrices/VirusEC4/protein/multi_label/luca_base/600M/esmc//", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "non_ignore": true, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 0, "num_hidden_layers": 0, "num_train_epochs": 50, "output_dir": "../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135319", "output_mode": "multi_label", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 1.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "null", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "multi_label", "tb_log_dir": "../tb-logs/VirusEC4/protein/multi_label/luca_base/matrix/20250501135319", "test_data_dir": "../dataset/VirusEC4/protein/multi_label/test/", "time_str": "20250501135328", "train_data_dir": "../dataset/VirusEC4/protein/multi_label/train/", "trunc_type": "right", "vector_dirpath": "../vectors/VirusEC4/protein/multi_label/luca_base/600M/esmc//", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 1000, "weight": [1749.35, 1715.049019607843, 1666.047619047619, 1521.1739130434783, 1445.7438016528927, 1356.0852713178294, 1305.4850746268658, 1258.525179856115, 1240.6737588652481, 1150.8881578947369, 993.9488636363636, 920.7105263157895, 874.675, 813.6511627906976, 777.4888888888889, 774.0486725663717, 763.9082969432314, 650.3159851301115, 631.5342960288808, 629.2625899280575, 620.3368794326241, 615.9683098591549, 585.0668896321071, 567.9707792207793, 544.9688473520249, 539.9228395061729, 519.0949554896142, 510.01457725947523, 484.584487534626, 366.74004192872115, 354.1194331983806, 343.0098039215686, 331.3162878787879, 328.8251879699248, 328.2082551594747, 314.0664272890485, 312.38392857142856, 199.01592718998862, 186.49786780383795, 181.84511434511435, 172.01081612586037, 170.66829268292682, 156.05263157894737, 149.0076660988075, 134.4619523443505, 128.62867647058823, 127.68978102189782, 127.4107793153678, 127.0406681190995, 100.07723112128147, 94.0510752688172, 80.20861989912883, 78.0611334225792, 74.85451433461704, 65.00743218134522, 63.7983223924143, 44.866632469864065, 42.24462690171456, 41.661109788044776, 29.93924353927777, 27.193377895227734, 27.004476690336524, 25.790210821170575, 22.048777413662716, 21.104475811316203, 17.876047414674023, 10.421482187537233, 9.041970331317517, 17.4935, 7.06893764900796], "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,embedding_matrix ################################################## Encoder Config: {'llm_type': 'esmc', 'llm_version': '600M', 'llm_step': None, 'llm_dirpath': None, 'input_type': 'matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/VirusEC4/protein/multi_label/luca_base/600M/esmc//', 'matrix_dirpath': '../matrices/VirusEC4/protein/multi_label/luca_base/600M/esmc//', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': True, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "_attn_implementation_autoset": true, "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 1152, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "id2label": {}, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "label2id": {}, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4098, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "num_attention_heads": 8, "num_hidden_layers": 4, "pad_token_id": 0, "pos_weight": 1.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": null, "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.46.3", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39, "weight": [ 1749.35, 1715.049019607843, 1666.047619047619, 1521.1739130434783, 1445.7438016528927, 1356.0852713178294, 1305.4850746268658, 1258.525179856115, 1240.6737588652481, 1150.8881578947369, 993.9488636363636, 920.7105263157895, 874.675, 813.6511627906976, 777.4888888888889, 774.0486725663717, 763.9082969432314, 650.3159851301115, 631.5342960288808, 629.2625899280575, 620.3368794326241, 615.9683098591549, 585.0668896321071, 567.9707792207793, 544.9688473520249, 539.9228395061729, 519.0949554896142, 510.01457725947523, 484.584487534626, 366.74004192872115, 354.1194331983806, 343.0098039215686, 331.3162878787879, 328.8251879699248, 328.2082551594747, 314.0664272890485, 312.38392857142856, 199.01592718998862, 186.49786780383795, 181.84511434511435, 172.01081612586037, 170.66829268292682, 156.05263157894737, 149.0076660988075, 134.4619523443505, 128.62867647058823, 127.68978102189782, 127.4107793153678, 127.0406681190995, 100.07723112128147, 94.0510752688172, 80.20861989912883, 78.0611334225792, 74.85451433461704, 65.00743218134522, 63.7983223924143, 44.866632469864065, 42.24462690171456, 41.661109788044776, 29.93924353927777, 27.193377895227734, 27.004476690336524, 25.790210821170575, 22.048777413662716, 21.104475811316203, 17.876047414674023, 10.421482187537233, 9.041970331317517, 17.4935, 7.06893764900796 ] } ################################################## Mode Architecture: LucaBase( (matrix_pooler): GlobalMaskValueAttentionPooling1D (1152 -> 1152) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=1152, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=128, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=70, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 4154438 ################################################## {"total_num": "3.960000M", "total_size": "15.850000MB", "param_sum": "3.960000M", "param_size": "15.850000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "3.961981M", "trainable_size": "15.847923MB"} ################################################## Train dataset len: 79578, batch size: 16, batch num: 4974 Train dataset t_total: 248700, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 79578 Train Dataset Num Epochs = 50 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 248700 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.69129026, Cur Avg Loss: 0.69261657, Log Avg loss: 0.69261657, Global Avg Loss: 0.69261657, Time: 0.1215 Steps: 200, Updated lr: 0.000020 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.66125244, Cur Avg Loss: 0.68741338, Log Avg loss: 0.68221019, Global Avg Loss: 0.68741338, Time: 0.0600 Steps: 400, Updated lr: 0.000040 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.36688352, Cur Avg Loss: 0.64352080, Log Avg loss: 0.55573564, Global Avg Loss: 0.64352080, Time: 0.1761 Steps: 600, Updated lr: 0.000060 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.10357140, Cur Avg Loss: 0.52667599, Log Avg loss: 0.17614155, Global Avg Loss: 0.52667599, Time: 0.1179 Steps: 800, Updated lr: 0.000080 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.09172229, Cur Avg Loss: 0.43927529, Log Avg loss: 0.08967252, Global Avg Loss: 0.43927529, Time: 0.0608 Steps: 1000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.07536339, Cur Avg Loss: 0.38004144, Log Avg loss: 0.08387219, Global Avg Loss: 0.38004144, Time: 0.1167 Steps: 1200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.06663176, Cur Avg Loss: 0.33753610, Log Avg loss: 0.08250402, Global Avg Loss: 0.33753610, Time: 0.0332 Steps: 1400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 0.08392768, Cur Avg Loss: 0.30551894, Log Avg loss: 0.08139881, Global Avg Loss: 0.30551894, Time: 0.0689 Steps: 1600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.09732844, Cur Avg Loss: 0.28057689, Log Avg loss: 0.08104048, Global Avg Loss: 0.28057689, Time: 0.0613 Steps: 1800, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.07354093, Cur Avg Loss: 0.26048195, Log Avg loss: 0.07962747, Global Avg Loss: 0.26048195, Time: 0.0810 Steps: 2000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.08527172, Cur Avg Loss: 0.24396119, Log Avg loss: 0.07875366, Global Avg Loss: 0.24396119, Time: 0.0612 Steps: 2200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.07523076, Cur Avg Loss: 0.22980641, Log Avg loss: 0.07410383, Global Avg Loss: 0.22980641, Time: 0.1158 Steps: 2400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.06528224, Cur Avg Loss: 0.21742578, Log Avg loss: 0.06885826, Global Avg Loss: 0.21742578, Time: 0.0622 Steps: 2600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.06035922, Cur Avg Loss: 0.20658575, Log Avg loss: 0.06566527, Global Avg Loss: 0.20658575, Time: 0.0617 Steps: 2800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.06881995, Cur Avg Loss: 0.19690954, Log Avg loss: 0.06144266, Global Avg Loss: 0.19690954, Time: 0.0629 Steps: 3000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.07182793, Cur Avg Loss: 0.18822170, Log Avg loss: 0.05790402, Global Avg Loss: 0.18822170, Time: 0.0693 Steps: 3200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.05240723, Cur Avg Loss: 0.18033110, Log Avg loss: 0.05408162, Global Avg Loss: 0.18033110, Time: 0.0612 Steps: 3400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.05299497, Cur Avg Loss: 0.17309215, Log Avg loss: 0.05002989, Global Avg Loss: 0.17309215, Time: 0.0635 Steps: 3600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.04414582, Cur Avg Loss: 0.16646654, Log Avg loss: 0.04720561, Global Avg Loss: 0.16646654, Time: 0.0604 Steps: 3800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.03407840, Cur Avg Loss: 0.16042133, Log Avg loss: 0.04556242, Global Avg Loss: 0.16042133, Time: 0.0621 Steps: 4000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.03835978, Cur Avg Loss: 0.15489941, Log Avg loss: 0.04446096, Global Avg Loss: 0.15489941, Time: 0.1084 Steps: 4200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.04826340, Cur Avg Loss: 0.14978310, Log Avg loss: 0.04234045, Global Avg Loss: 0.14978310, Time: 0.0599 Steps: 4400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.05765953, Cur Avg Loss: 0.14504408, Log Avg loss: 0.04078576, Global Avg Loss: 0.14504408, Time: 0.0649 Steps: 4600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.05434040, Cur Avg Loss: 0.14062622, Log Avg loss: 0.03901553, Global Avg Loss: 0.14062622, Time: 0.1124 Steps: 4800, Updated lr: 0.000098 ***** Running evaluation checkpoint-4974 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-4974 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 398.298868, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.036899, "eval_total_loss": 39.334736, "eval_acc": 0.99066, "eval_jaccard": 0.435457, "eval_prec": 0.453557, "eval_recall": 0.442774, "eval_f1": 0.444742, "eval_pr_auc": 0.652774, "eval_roc_auc": 0.919717, "eval_fmax": 0.700862, "eval_pmax": 0.823111, "eval_rmax": 0.610229, "eval_tmax": 0.1, "update_flag": true, "test_avg_loss": 0.037457, "test_total_loss": 39.929104, "test_acc": 0.990576, "test_jaccard": 0.429223, "test_prec": 0.446808, "test_recall": 0.436624, "test_f1": 0.438423, "test_pr_auc": 0.645367, "test_roc_auc": 0.917574, "test_fmax": 0.695546, "test_pmax": 0.818773, "test_rmax": 0.604558, "test_tmax": 0.1, "lr": 9.839563988696004e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.1370539875033753, "train_cur_epoch_loss": 681.7065338417888, "train_cur_epoch_avg_loss": 0.1370539875033753, "train_cur_epoch_time": 398.29886770248413, "train_cur_epoch_avg_time": 0.080076169622534, "epoch": 1, "step": 4974} ################################################## Training, Epoch: 0002, Batch: 000026, Sample Num: 416, Cur Loss: 0.02548085, Cur Avg Loss: 0.03372472, Log Avg loss: 0.03788749, Global Avg Loss: 0.13651668, Time: 0.0216 Steps: 5000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000226, Sample Num: 3616, Cur Loss: 0.03813307, Cur Avg Loss: 0.03624469, Log Avg loss: 0.03657228, Global Avg Loss: 0.13267266, Time: 0.0675 Steps: 5200, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000426, Sample Num: 6816, Cur Loss: 0.03631847, Cur Avg Loss: 0.03623055, Log Avg loss: 0.03621457, Global Avg Loss: 0.12910014, Time: 0.1757 Steps: 5400, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000626, Sample Num: 10016, Cur Loss: 0.03085681, Cur Avg Loss: 0.03565191, Log Avg loss: 0.03441941, Global Avg Loss: 0.12571868, Time: 0.0608 Steps: 5600, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000826, Sample Num: 13216, Cur Loss: 0.03219007, Cur Avg Loss: 0.03498939, Log Avg loss: 0.03291569, Global Avg Loss: 0.12251858, Time: 0.0773 Steps: 5800, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001026, Sample Num: 16416, Cur Loss: 0.02296518, Cur Avg Loss: 0.03438756, Log Avg loss: 0.03190204, Global Avg Loss: 0.11949803, Time: 0.0631 Steps: 6000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001226, Sample Num: 19616, Cur Loss: 0.03561187, Cur Avg Loss: 0.03383976, Log Avg loss: 0.03102955, Global Avg Loss: 0.11664421, Time: 0.0245 Steps: 6200, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001426, Sample Num: 22816, Cur Loss: 0.02815499, Cur Avg Loss: 0.03323785, Log Avg loss: 0.02954811, Global Avg Loss: 0.11392245, Time: 0.1623 Steps: 6400, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001626, Sample Num: 26016, Cur Loss: 0.04099884, Cur Avg Loss: 0.03276064, Log Avg loss: 0.02935814, Global Avg Loss: 0.11135990, Time: 0.0609 Steps: 6600, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001826, Sample Num: 29216, Cur Loss: 0.02782760, Cur Avg Loss: 0.03222636, Log Avg loss: 0.02788268, Global Avg Loss: 0.10890469, Time: 0.0811 Steps: 6800, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 002026, Sample Num: 32416, Cur Loss: 0.02018437, Cur Avg Loss: 0.03183350, Log Avg loss: 0.02824666, Global Avg Loss: 0.10660017, Time: 0.1269 Steps: 7000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 002226, Sample Num: 35616, Cur Loss: 0.02353626, Cur Avg Loss: 0.03141246, Log Avg loss: 0.02714739, Global Avg Loss: 0.10439315, Time: 0.0613 Steps: 7200, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 002426, Sample Num: 38816, Cur Loss: 0.02020735, Cur Avg Loss: 0.03097124, Log Avg loss: 0.02606038, Global Avg Loss: 0.10227605, Time: 0.0616 Steps: 7400, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 002626, Sample Num: 42016, Cur Loss: 0.02417888, Cur Avg Loss: 0.03050717, Log Avg loss: 0.02487806, Global Avg Loss: 0.10023926, Time: 0.1770 Steps: 7600, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 002826, Sample Num: 45216, Cur Loss: 0.02336481, Cur Avg Loss: 0.03011246, Log Avg loss: 0.02492982, Global Avg Loss: 0.09830825, Time: 0.0352 Steps: 7800, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003026, Sample Num: 48416, Cur Loss: 0.03357884, Cur Avg Loss: 0.02969289, Log Avg loss: 0.02376437, Global Avg Loss: 0.09644465, Time: 0.0614 Steps: 8000, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003226, Sample Num: 51616, Cur Loss: 0.03279926, Cur Avg Loss: 0.02928328, Log Avg loss: 0.02308589, Global Avg Loss: 0.09465541, Time: 0.1140 Steps: 8200, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003426, Sample Num: 54816, Cur Loss: 0.01325194, Cur Avg Loss: 0.02889590, Log Avg loss: 0.02264748, Global Avg Loss: 0.09294094, Time: 0.1196 Steps: 8400, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003626, Sample Num: 58016, Cur Loss: 0.04203711, Cur Avg Loss: 0.02854755, Log Avg loss: 0.02258036, Global Avg Loss: 0.09130465, Time: 0.0361 Steps: 8600, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003826, Sample Num: 61216, Cur Loss: 0.02266923, Cur Avg Loss: 0.02814551, Log Avg loss: 0.02085643, Global Avg Loss: 0.08970355, Time: 0.0729 Steps: 8800, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004026, Sample Num: 64416, Cur Loss: 0.01471033, Cur Avg Loss: 0.02781016, Log Avg loss: 0.02139499, Global Avg Loss: 0.08818558, Time: 0.0547 Steps: 9000, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004226, Sample Num: 67616, Cur Loss: 0.01505812, Cur Avg Loss: 0.02746256, Log Avg loss: 0.02046537, Global Avg Loss: 0.08671340, Time: 0.0644 Steps: 9200, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004426, Sample Num: 70816, Cur Loss: 0.01982945, Cur Avg Loss: 0.02709781, Log Avg loss: 0.01939061, Global Avg Loss: 0.08528100, Time: 0.0605 Steps: 9400, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004626, Sample Num: 74016, Cur Loss: 0.01421995, Cur Avg Loss: 0.02676820, Log Avg loss: 0.01947391, Global Avg Loss: 0.08391002, Time: 0.0613 Steps: 9600, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004826, Sample Num: 77216, Cur Loss: 0.02162291, Cur Avg Loss: 0.02641002, Log Avg loss: 0.01812543, Global Avg Loss: 0.08256748, Time: 0.0239 Steps: 9800, Updated lr: 0.000096 ***** Running evaluation checkpoint-9948 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-9948 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 406.694804, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.017265, "eval_total_loss": 18.404006, "eval_acc": 0.994386, "eval_jaccard": 0.643532, "eval_prec": 0.653614, "eval_recall": 0.647177, "eval_f1": 0.648309, "eval_pr_auc": 0.884418, "eval_roc_auc": 0.982667, "eval_fmax": 0.875074, "eval_pmax": 0.906049, "eval_rmax": 0.846146, "eval_tmax": 0.16, "update_flag": true, "test_avg_loss": 0.017673, "test_total_loss": 18.839383, "test_acc": 0.994318, "test_jaccard": 0.636951, "test_prec": 0.64796, "test_recall": 0.640715, "test_f1": 0.642153, "test_pr_auc": 0.877138, "test_roc_auc": 0.981706, "test_fmax": 0.868798, "test_pmax": 0.904235, "test_rmax": 0.836033, "test_tmax": 0.16, "lr": 9.638756560355269e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.08161398311327506, "train_cur_epoch_loss": 130.18937016907148, "train_cur_epoch_avg_loss": 0.026173978723174805, "train_cur_epoch_time": 406.6948037147522, "train_cur_epoch_avg_time": 0.08176413424100366, "epoch": 2, "step": 9948} ################################################## Training, Epoch: 0003, Batch: 000052, Sample Num: 832, Cur Loss: 0.01202641, Cur Avg Loss: 0.01653360, Log Avg loss: 0.01797176, Global Avg Loss: 0.08127557, Time: 0.1027 Steps: 10000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000252, Sample Num: 4032, Cur Loss: 0.01541365, Cur Avg Loss: 0.01739122, Log Avg loss: 0.01761420, Global Avg Loss: 0.08002730, Time: 0.1084 Steps: 10200, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000452, Sample Num: 7232, Cur Loss: 0.02994420, Cur Avg Loss: 0.01744750, Log Avg loss: 0.01751841, Global Avg Loss: 0.07882521, Time: 0.1418 Steps: 10400, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000652, Sample Num: 10432, Cur Loss: 0.01589941, Cur Avg Loss: 0.01733470, Log Avg loss: 0.01707978, Global Avg Loss: 0.07766020, Time: 0.1117 Steps: 10600, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000852, Sample Num: 13632, Cur Loss: 0.01048554, Cur Avg Loss: 0.01703571, Log Avg loss: 0.01606102, Global Avg Loss: 0.07651948, Time: 0.1203 Steps: 10800, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001052, Sample Num: 16832, Cur Loss: 0.02421260, Cur Avg Loss: 0.01678998, Log Avg loss: 0.01574313, Global Avg Loss: 0.07541445, Time: 0.1608 Steps: 11000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001252, Sample Num: 20032, Cur Loss: 0.00523446, Cur Avg Loss: 0.01648312, Log Avg loss: 0.01486906, Global Avg Loss: 0.07433328, Time: 0.1122 Steps: 11200, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001452, Sample Num: 23232, Cur Loss: 0.01373568, Cur Avg Loss: 0.01619979, Log Avg loss: 0.01442611, Global Avg Loss: 0.07328228, Time: 0.1007 Steps: 11400, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001652, Sample Num: 26432, Cur Loss: 0.01211041, Cur Avg Loss: 0.01599023, Log Avg loss: 0.01446883, Global Avg Loss: 0.07226826, Time: 0.0872 Steps: 11600, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001852, Sample Num: 29632, Cur Loss: 0.01749725, Cur Avg Loss: 0.01576941, Log Avg loss: 0.01394543, Global Avg Loss: 0.07127973, Time: 0.1130 Steps: 11800, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 002052, Sample Num: 32832, Cur Loss: 0.01690444, Cur Avg Loss: 0.01559012, Log Avg loss: 0.01392990, Global Avg Loss: 0.07032390, Time: 0.2273 Steps: 12000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 002252, Sample Num: 36032, Cur Loss: 0.01627786, Cur Avg Loss: 0.01542541, Log Avg loss: 0.01373550, Global Avg Loss: 0.06939622, Time: 0.1597 Steps: 12200, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 002452, Sample Num: 39232, Cur Loss: 0.01376766, Cur Avg Loss: 0.01519869, Log Avg loss: 0.01264588, Global Avg Loss: 0.06848090, Time: 0.1189 Steps: 12400, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 002652, Sample Num: 42432, Cur Loss: 0.01041331, Cur Avg Loss: 0.01499277, Log Avg loss: 0.01246812, Global Avg Loss: 0.06759180, Time: 0.1658 Steps: 12600, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 002852, Sample Num: 45632, Cur Loss: 0.01076717, Cur Avg Loss: 0.01479665, Log Avg loss: 0.01219613, Global Avg Loss: 0.06672625, Time: 0.1762 Steps: 12800, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003052, Sample Num: 48832, Cur Loss: 0.01069030, Cur Avg Loss: 0.01461423, Log Avg loss: 0.01201292, Global Avg Loss: 0.06588450, Time: 0.1152 Steps: 13000, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003252, Sample Num: 52032, Cur Loss: 0.01361081, Cur Avg Loss: 0.01445097, Log Avg loss: 0.01195964, Global Avg Loss: 0.06506746, Time: 0.0656 Steps: 13200, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003452, Sample Num: 55232, Cur Loss: 0.01963283, Cur Avg Loss: 0.01427957, Log Avg loss: 0.01149264, Global Avg Loss: 0.06426783, Time: 0.1809 Steps: 13400, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003652, Sample Num: 58432, Cur Loss: 0.00935341, Cur Avg Loss: 0.01414209, Log Avg loss: 0.01176906, Global Avg Loss: 0.06349579, Time: 0.1757 Steps: 13600, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003852, Sample Num: 61632, Cur Loss: 0.00788402, Cur Avg Loss: 0.01398741, Log Avg loss: 0.01116304, Global Avg Loss: 0.06273735, Time: 0.1208 Steps: 13800, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004052, Sample Num: 64832, Cur Loss: 0.00844050, Cur Avg Loss: 0.01388137, Log Avg loss: 0.01183897, Global Avg Loss: 0.06201023, Time: 0.1681 Steps: 14000, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004252, Sample Num: 68032, Cur Loss: 0.00769329, Cur Avg Loss: 0.01373751, Log Avg loss: 0.01082290, Global Avg Loss: 0.06128928, Time: 0.1208 Steps: 14200, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004452, Sample Num: 71232, Cur Loss: 0.01888794, Cur Avg Loss: 0.01360017, Log Avg loss: 0.01068044, Global Avg Loss: 0.06058638, Time: 0.1182 Steps: 14400, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004652, Sample Num: 74432, Cur Loss: 0.00343432, Cur Avg Loss: 0.01348712, Log Avg loss: 0.01097057, Global Avg Loss: 0.05990671, Time: 0.1131 Steps: 14600, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004852, Sample Num: 77632, Cur Loss: 0.01179859, Cur Avg Loss: 0.01334598, Log Avg loss: 0.01006313, Global Avg Loss: 0.05923315, Time: 0.1761 Steps: 14800, Updated lr: 0.000094 ***** Running evaluation checkpoint-14922 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-14922 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 584.987541, Avg time per batch (s): 0.120000 {"eval_avg_loss": 0.009572, "eval_total_loss": 10.203544, "eval_acc": 0.997328, "eval_jaccard": 0.844146, "eval_prec": 0.851053, "eval_recall": 0.848434, "eval_f1": 0.848183, "eval_pr_auc": 0.936385, "eval_roc_auc": 0.993509, "eval_fmax": 0.938501, "eval_pmax": 0.970629, "eval_rmax": 0.908432, "eval_tmax": 0.21, "update_flag": true, "test_avg_loss": 0.009826, "test_total_loss": 10.474762, "test_acc": 0.997267, "test_jaccard": 0.838061, "test_prec": 0.845584, "test_recall": 0.842389, "test_f1": 0.842388, "test_pr_auc": 0.93408, "test_roc_auc": 0.992985, "test_fmax": 0.935991, "test_pmax": 0.973148, "test_rmax": 0.901568, "test_tmax": 0.22, "lr": 9.437949132014535e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.058833283771333206, "train_cur_epoch_loss": 66.01435642497381, "train_cur_epoch_avg_loss": 0.0132718850874495, "train_cur_epoch_time": 584.9875409603119, "train_cur_epoch_avg_time": 0.11760907538405949, "epoch": 3, "step": 14922} ################################################## Training, Epoch: 0004, Batch: 000078, Sample Num: 1248, Cur Loss: 0.01383932, Cur Avg Loss: 0.00993981, Log Avg loss: 0.01017478, Global Avg Loss: 0.05857904, Time: 0.1163 Steps: 15000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000278, Sample Num: 4448, Cur Loss: 0.01902956, Cur Avg Loss: 0.00982174, Log Avg loss: 0.00977569, Global Avg Loss: 0.05793689, Time: 0.1431 Steps: 15200, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000478, Sample Num: 7648, Cur Loss: 0.01073497, Cur Avg Loss: 0.00988796, Log Avg loss: 0.00998000, Global Avg Loss: 0.05731407, Time: 0.1996 Steps: 15400, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000678, Sample Num: 10848, Cur Loss: 0.00228236, Cur Avg Loss: 0.00981886, Log Avg loss: 0.00965372, Global Avg Loss: 0.05670304, Time: 0.0948 Steps: 15600, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000878, Sample Num: 14048, Cur Loss: 0.01303033, Cur Avg Loss: 0.00969906, Log Avg loss: 0.00929295, Global Avg Loss: 0.05610291, Time: 0.1665 Steps: 15800, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001078, Sample Num: 17248, Cur Loss: 0.01178346, Cur Avg Loss: 0.00956281, Log Avg loss: 0.00896468, Global Avg Loss: 0.05551369, Time: 0.0616 Steps: 16000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001278, Sample Num: 20448, Cur Loss: 0.00751214, Cur Avg Loss: 0.00939283, Log Avg loss: 0.00847661, Global Avg Loss: 0.05493298, Time: 0.1671 Steps: 16200, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001478, Sample Num: 23648, Cur Loss: 0.01012996, Cur Avg Loss: 0.00920833, Log Avg loss: 0.00802937, Global Avg Loss: 0.05436099, Time: 0.1324 Steps: 16400, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001678, Sample Num: 26848, Cur Loss: 0.01669203, Cur Avg Loss: 0.00910420, Log Avg loss: 0.00833470, Global Avg Loss: 0.05380645, Time: 0.0357 Steps: 16600, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001878, Sample Num: 30048, Cur Loss: 0.00553750, Cur Avg Loss: 0.00894915, Log Avg loss: 0.00764823, Global Avg Loss: 0.05325695, Time: 0.0619 Steps: 16800, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 002078, Sample Num: 33248, Cur Loss: 0.00877362, Cur Avg Loss: 0.00889396, Log Avg loss: 0.00837574, Global Avg Loss: 0.05272894, Time: 0.0605 Steps: 17000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 002278, Sample Num: 36448, Cur Loss: 0.00446439, Cur Avg Loss: 0.00882589, Log Avg loss: 0.00811864, Global Avg Loss: 0.05221021, Time: 0.0627 Steps: 17200, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 002478, Sample Num: 39648, Cur Loss: 0.00745148, Cur Avg Loss: 0.00870950, Log Avg loss: 0.00738389, Global Avg Loss: 0.05169497, Time: 0.1174 Steps: 17400, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 002678, Sample Num: 42848, Cur Loss: 0.00294927, Cur Avg Loss: 0.00861271, Log Avg loss: 0.00741337, Global Avg Loss: 0.05119177, Time: 0.0717 Steps: 17600, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 002878, Sample Num: 46048, Cur Loss: 0.00450257, Cur Avg Loss: 0.00851966, Log Avg loss: 0.00727383, Global Avg Loss: 0.05069831, Time: 0.0610 Steps: 17800, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003078, Sample Num: 49248, Cur Loss: 0.00788061, Cur Avg Loss: 0.00840536, Log Avg loss: 0.00676059, Global Avg Loss: 0.05021011, Time: 0.0619 Steps: 18000, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003278, Sample Num: 52448, Cur Loss: 0.00831095, Cur Avg Loss: 0.00833867, Log Avg loss: 0.00731229, Global Avg Loss: 0.04973870, Time: 0.0593 Steps: 18200, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003478, Sample Num: 55648, Cur Loss: 0.00876562, Cur Avg Loss: 0.00826290, Log Avg loss: 0.00702103, Global Avg Loss: 0.04927438, Time: 0.1675 Steps: 18400, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003678, Sample Num: 58848, Cur Loss: 0.00867005, Cur Avg Loss: 0.00819975, Log Avg loss: 0.00710148, Global Avg Loss: 0.04882091, Time: 0.0612 Steps: 18600, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003878, Sample Num: 62048, Cur Loss: 0.00381786, Cur Avg Loss: 0.00813084, Log Avg loss: 0.00686355, Global Avg Loss: 0.04837456, Time: 0.0621 Steps: 18800, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004078, Sample Num: 65248, Cur Loss: 0.00901689, Cur Avg Loss: 0.00809017, Log Avg loss: 0.00730161, Global Avg Loss: 0.04794221, Time: 0.0617 Steps: 19000, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004278, Sample Num: 68448, Cur Loss: 0.00719431, Cur Avg Loss: 0.00800956, Log Avg loss: 0.00636608, Global Avg Loss: 0.04750912, Time: 0.1179 Steps: 19200, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004478, Sample Num: 71648, Cur Loss: 0.00468385, Cur Avg Loss: 0.00794991, Log Avg loss: 0.00667391, Global Avg Loss: 0.04708814, Time: 0.0612 Steps: 19400, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004678, Sample Num: 74848, Cur Loss: 0.00742126, Cur Avg Loss: 0.00790254, Log Avg loss: 0.00684200, Global Avg Loss: 0.04667747, Time: 0.0598 Steps: 19600, Updated lr: 0.000092 Training, Epoch: 0004, Batch: 004878, Sample Num: 78048, Cur Loss: 0.00304103, Cur Avg Loss: 0.00784074, Log Avg loss: 0.00639516, Global Avg Loss: 0.04627058, Time: 0.0610 Steps: 19800, Updated lr: 0.000092 ***** Running evaluation checkpoint-19896 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-19896 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 429.084271, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.006024, "eval_total_loss": 6.42159, "eval_acc": 0.998308, "eval_jaccard": 0.905679, "eval_prec": 0.910588, "eval_recall": 0.908894, "eval_f1": 0.908702, "eval_pr_auc": 0.966156, "eval_roc_auc": 0.996703, "eval_fmax": 0.963199, "eval_pmax": 0.973197, "eval_rmax": 0.953403, "eval_tmax": 0.15, "update_flag": true, "test_avg_loss": 0.006196, "test_total_loss": 6.605432, "test_acc": 0.998247, "test_jaccard": 0.900663, "test_prec": 0.906155, "test_recall": 0.904059, "test_f1": 0.904007, "test_pr_auc": 0.963989, "test_roc_auc": 0.996376, "test_fmax": 0.961121, "test_pmax": 0.972356, "test_rmax": 0.950144, "test_tmax": 0.15, "lr": 9.237141703673799e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.04607910968659186, "train_cur_epoch_loss": 38.87970588859753, "train_cur_epoch_avg_loss": 0.007816587432367818, "train_cur_epoch_time": 429.0842707157135, "train_cur_epoch_avg_time": 0.08626543440203327, "epoch": 4, "step": 19896} ################################################## Training, Epoch: 0005, Batch: 000104, Sample Num: 1664, Cur Loss: 0.00841135, Cur Avg Loss: 0.00655702, Log Avg loss: 0.00657253, Global Avg Loss: 0.04587359, Time: 0.0616 Steps: 20000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000304, Sample Num: 4864, Cur Loss: 0.00417851, Cur Avg Loss: 0.00609595, Log Avg loss: 0.00585620, Global Avg Loss: 0.04547738, Time: 0.0615 Steps: 20200, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000504, Sample Num: 8064, Cur Loss: 0.00487686, Cur Avg Loss: 0.00629215, Log Avg loss: 0.00659038, Global Avg Loss: 0.04509614, Time: 0.0612 Steps: 20400, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000704, Sample Num: 11264, Cur Loss: 0.00291279, Cur Avg Loss: 0.00626019, Log Avg loss: 0.00617965, Global Avg Loss: 0.04471831, Time: 0.1117 Steps: 20600, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000904, Sample Num: 14464, Cur Loss: 0.01325177, Cur Avg Loss: 0.00619739, Log Avg loss: 0.00597633, Global Avg Loss: 0.04434579, Time: 0.0863 Steps: 20800, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001104, Sample Num: 17664, Cur Loss: 0.00427847, Cur Avg Loss: 0.00609315, Log Avg loss: 0.00562201, Global Avg Loss: 0.04397699, Time: 0.0608 Steps: 21000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001304, Sample Num: 20864, Cur Loss: 0.00859279, Cur Avg Loss: 0.00601957, Log Avg loss: 0.00561337, Global Avg Loss: 0.04361507, Time: 0.0609 Steps: 21200, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001504, Sample Num: 24064, Cur Loss: 0.00382826, Cur Avg Loss: 0.00585946, Log Avg loss: 0.00481557, Global Avg Loss: 0.04325246, Time: 0.1231 Steps: 21400, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001704, Sample Num: 27264, Cur Loss: 0.00486119, Cur Avg Loss: 0.00579135, Log Avg loss: 0.00527912, Global Avg Loss: 0.04290085, Time: 0.1232 Steps: 21600, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001904, Sample Num: 30464, Cur Loss: 0.00304984, Cur Avg Loss: 0.00569753, Log Avg loss: 0.00489823, Global Avg Loss: 0.04255220, Time: 0.1466 Steps: 21800, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 002104, Sample Num: 33664, Cur Loss: 0.00076358, Cur Avg Loss: 0.00570576, Log Avg loss: 0.00578405, Global Avg Loss: 0.04221795, Time: 0.0706 Steps: 22000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 002304, Sample Num: 36864, Cur Loss: 0.00475314, Cur Avg Loss: 0.00567745, Log Avg loss: 0.00537968, Global Avg Loss: 0.04188607, Time: 0.0608 Steps: 22200, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 002504, Sample Num: 40064, Cur Loss: 0.00088527, Cur Avg Loss: 0.00560950, Log Avg loss: 0.00482677, Global Avg Loss: 0.04155519, Time: 0.0608 Steps: 22400, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 002704, Sample Num: 43264, Cur Loss: 0.00411975, Cur Avg Loss: 0.00555375, Log Avg loss: 0.00485573, Global Avg Loss: 0.04123041, Time: 0.0278 Steps: 22600, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 002904, Sample Num: 46464, Cur Loss: 0.00043554, Cur Avg Loss: 0.00550174, Log Avg loss: 0.00479859, Global Avg Loss: 0.04091083, Time: 0.1186 Steps: 22800, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003104, Sample Num: 49664, Cur Loss: 0.00827492, Cur Avg Loss: 0.00543976, Log Avg loss: 0.00453973, Global Avg Loss: 0.04059456, Time: 0.0685 Steps: 23000, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003304, Sample Num: 52864, Cur Loss: 0.00765900, Cur Avg Loss: 0.00540311, Log Avg loss: 0.00483439, Global Avg Loss: 0.04028629, Time: 0.0654 Steps: 23200, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003504, Sample Num: 56064, Cur Loss: 0.00621625, Cur Avg Loss: 0.00536494, Log Avg loss: 0.00473426, Global Avg Loss: 0.03998242, Time: 0.1160 Steps: 23400, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003704, Sample Num: 59264, Cur Loss: 0.04047211, Cur Avg Loss: 0.00534961, Log Avg loss: 0.00508103, Global Avg Loss: 0.03968665, Time: 0.0394 Steps: 23600, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003904, Sample Num: 62464, Cur Loss: 0.00428853, Cur Avg Loss: 0.00530622, Log Avg loss: 0.00450265, Global Avg Loss: 0.03939098, Time: 0.0689 Steps: 23800, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004104, Sample Num: 65664, Cur Loss: 0.00611090, Cur Avg Loss: 0.00529985, Log Avg loss: 0.00517553, Global Avg Loss: 0.03910586, Time: 0.1169 Steps: 24000, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004304, Sample Num: 68864, Cur Loss: 0.01190170, Cur Avg Loss: 0.00526484, Log Avg loss: 0.00454641, Global Avg Loss: 0.03882024, Time: 0.0619 Steps: 24200, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004504, Sample Num: 72064, Cur Loss: 0.00518395, Cur Avg Loss: 0.00523671, Log Avg loss: 0.00463128, Global Avg Loss: 0.03854000, Time: 0.0600 Steps: 24400, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004704, Sample Num: 75264, Cur Loss: 0.00156718, Cur Avg Loss: 0.00521170, Log Avg loss: 0.00464863, Global Avg Loss: 0.03826446, Time: 0.0703 Steps: 24600, Updated lr: 0.000090 Training, Epoch: 0005, Batch: 004904, Sample Num: 78464, Cur Loss: 0.00066655, Cur Avg Loss: 0.00518768, Log Avg loss: 0.00462264, Global Avg Loss: 0.03799316, Time: 0.0629 Steps: 24800, Updated lr: 0.000090 ***** Running evaluation checkpoint-24870 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-24870 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 387.391462, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.004438, "eval_total_loss": 4.73079, "eval_acc": 0.998685, "eval_jaccard": 0.92938, "eval_prec": 0.932981, "eval_recall": 0.932457, "eval_f1": 0.931887, "eval_pr_auc": 0.979282, "eval_roc_auc": 0.99788, "eval_fmax": 0.977249, "eval_pmax": 0.984579, "eval_rmax": 0.970027, "eval_tmax": 0.2, "update_flag": true, "test_avg_loss": 0.004542, "test_total_loss": 4.842141, "test_acc": 0.998663, "test_jaccard": 0.925807, "test_prec": 0.929378, "test_recall": 0.928722, "test_f1": 0.928303, "test_pr_auc": 0.977923, "test_roc_auc": 0.997645, "test_fmax": 0.975793, "test_pmax": 0.983329, "test_rmax": 0.968371, "test_tmax": 0.19, "lr": 9.036334275333064e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.037898426583390495, "train_cur_epoch_loss": 25.743902804490062, "train_cur_epoch_avg_loss": 0.0051756941705850545, "train_cur_epoch_time": 387.3914620876312, "train_cur_epoch_avg_time": 0.07788328550213736, "epoch": 5, "step": 24870} ################################################## Training, Epoch: 0006, Batch: 000130, Sample Num: 2080, Cur Loss: 0.00082394, Cur Avg Loss: 0.00460954, Log Avg loss: 0.00451385, Global Avg Loss: 0.03772532, Time: 0.0690 Steps: 25000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000330, Sample Num: 5280, Cur Loss: 0.00927533, Cur Avg Loss: 0.00434185, Log Avg loss: 0.00416785, Global Avg Loss: 0.03745900, Time: 0.0715 Steps: 25200, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000530, Sample Num: 8480, Cur Loss: 0.00923348, Cur Avg Loss: 0.00444128, Log Avg loss: 0.00460534, Global Avg Loss: 0.03720031, Time: 0.1076 Steps: 25400, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000730, Sample Num: 11680, Cur Loss: 0.00510018, Cur Avg Loss: 0.00449181, Log Avg loss: 0.00462571, Global Avg Loss: 0.03694582, Time: 0.0603 Steps: 25600, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000930, Sample Num: 14880, Cur Loss: 0.00150385, Cur Avg Loss: 0.00443311, Log Avg loss: 0.00421884, Global Avg Loss: 0.03669212, Time: 0.1783 Steps: 25800, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001130, Sample Num: 18080, Cur Loss: 0.00848727, Cur Avg Loss: 0.00437020, Log Avg loss: 0.00407769, Global Avg Loss: 0.03644124, Time: 0.0596 Steps: 26000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001330, Sample Num: 21280, Cur Loss: 0.00310898, Cur Avg Loss: 0.00431035, Log Avg loss: 0.00397217, Global Avg Loss: 0.03619338, Time: 0.0608 Steps: 26200, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001530, Sample Num: 24480, Cur Loss: 0.00305355, Cur Avg Loss: 0.00419441, Log Avg loss: 0.00342344, Global Avg Loss: 0.03594513, Time: 0.1147 Steps: 26400, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001730, Sample Num: 27680, Cur Loss: 0.00211098, Cur Avg Loss: 0.00412527, Log Avg loss: 0.00359637, Global Avg Loss: 0.03570190, Time: 0.0697 Steps: 26600, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001930, Sample Num: 30880, Cur Loss: 0.00446987, Cur Avg Loss: 0.00406867, Log Avg loss: 0.00357909, Global Avg Loss: 0.03546218, Time: 0.1383 Steps: 26800, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 002130, Sample Num: 34080, Cur Loss: 0.00052821, Cur Avg Loss: 0.00408364, Log Avg loss: 0.00422802, Global Avg Loss: 0.03523082, Time: 0.0601 Steps: 27000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 002330, Sample Num: 37280, Cur Loss: 0.00615212, Cur Avg Loss: 0.00406170, Log Avg loss: 0.00382811, Global Avg Loss: 0.03499991, Time: 0.0604 Steps: 27200, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 002530, Sample Num: 40480, Cur Loss: 0.00140416, Cur Avg Loss: 0.00401294, Log Avg loss: 0.00344487, Global Avg Loss: 0.03476958, Time: 0.0240 Steps: 27400, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 002730, Sample Num: 43680, Cur Loss: 0.00298775, Cur Avg Loss: 0.00398382, Log Avg loss: 0.00361538, Global Avg Loss: 0.03454383, Time: 0.0612 Steps: 27600, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 002930, Sample Num: 46880, Cur Loss: 0.00320784, Cur Avg Loss: 0.00394723, Log Avg loss: 0.00344784, Global Avg Loss: 0.03432012, Time: 0.1160 Steps: 27800, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003130, Sample Num: 50080, Cur Loss: 0.00641352, Cur Avg Loss: 0.00390968, Log Avg loss: 0.00335957, Global Avg Loss: 0.03409897, Time: 0.0610 Steps: 28000, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003330, Sample Num: 53280, Cur Loss: 0.00068884, Cur Avg Loss: 0.00388355, Log Avg loss: 0.00347467, Global Avg Loss: 0.03388178, Time: 0.1245 Steps: 28200, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003530, Sample Num: 56480, Cur Loss: 0.00113428, Cur Avg Loss: 0.00385671, Log Avg loss: 0.00340969, Global Avg Loss: 0.03366718, Time: 0.0606 Steps: 28400, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003730, Sample Num: 59680, Cur Loss: 0.00312081, Cur Avg Loss: 0.00385918, Log Avg loss: 0.00390290, Global Avg Loss: 0.03345904, Time: 0.0610 Steps: 28600, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003930, Sample Num: 62880, Cur Loss: 0.00221512, Cur Avg Loss: 0.00383774, Log Avg loss: 0.00343780, Global Avg Loss: 0.03325056, Time: 0.1180 Steps: 28800, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004130, Sample Num: 66080, Cur Loss: 0.00250722, Cur Avg Loss: 0.00384164, Log Avg loss: 0.00391834, Global Avg Loss: 0.03304827, Time: 0.0765 Steps: 29000, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004330, Sample Num: 69280, Cur Loss: 0.00365531, Cur Avg Loss: 0.00381630, Log Avg loss: 0.00329301, Global Avg Loss: 0.03284447, Time: 0.0945 Steps: 29200, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004530, Sample Num: 72480, Cur Loss: 0.00073945, Cur Avg Loss: 0.00380046, Log Avg loss: 0.00345750, Global Avg Loss: 0.03264456, Time: 0.0624 Steps: 29400, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004730, Sample Num: 75680, Cur Loss: 0.00325512, Cur Avg Loss: 0.00378230, Log Avg loss: 0.00337108, Global Avg Loss: 0.03244676, Time: 0.1095 Steps: 29600, Updated lr: 0.000088 Training, Epoch: 0006, Batch: 004930, Sample Num: 78880, Cur Loss: 0.00328445, Cur Avg Loss: 0.00378033, Log Avg loss: 0.00373364, Global Avg Loss: 0.03225406, Time: 0.1129 Steps: 29800, Updated lr: 0.000088 ***** Running evaluation checkpoint-29844 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-29844 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 393.057441, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.003461, "eval_total_loss": 3.68901, "eval_acc": 0.999001, "eval_jaccard": 0.948796, "eval_prec": 0.952389, "eval_recall": 0.951002, "eval_f1": 0.950968, "eval_pr_auc": 0.985948, "eval_roc_auc": 0.998393, "eval_fmax": 0.982987, "eval_pmax": 0.991423, "eval_rmax": 0.974693, "eval_tmax": 0.24, "update_flag": true, "test_avg_loss": 0.003558, "test_total_loss": 3.793206, "test_acc": 0.998991, "test_jaccard": 0.94613, "test_prec": 0.949652, "test_recall": 0.948408, "test_f1": 0.948319, "test_pr_auc": 0.985262, "test_roc_auc": 0.998229, "test_fmax": 0.981594, "test_pmax": 0.985266, "test_rmax": 0.977949, "test_tmax": 0.17, "lr": 8.83552684699233e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.032210954795377146, "train_cur_epoch_loss": 18.76986578431388, "train_cur_epoch_avg_loss": 0.00377359585531039, "train_cur_epoch_time": 393.05744099617004, "train_cur_epoch_avg_time": 0.07902240470369322, "epoch": 6, "step": 29844} ################################################## Training, Epoch: 0007, Batch: 000156, Sample Num: 2496, Cur Loss: 0.00058469, Cur Avg Loss: 0.00334593, Log Avg loss: 0.00327402, Global Avg Loss: 0.03206086, Time: 0.0605 Steps: 30000, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000356, Sample Num: 5696, Cur Loss: 0.00057820, Cur Avg Loss: 0.00337287, Log Avg loss: 0.00339389, Global Avg Loss: 0.03187101, Time: 0.0609 Steps: 30200, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000556, Sample Num: 8896, Cur Loss: 0.00185522, Cur Avg Loss: 0.00347148, Log Avg loss: 0.00364701, Global Avg Loss: 0.03168532, Time: 0.0639 Steps: 30400, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000756, Sample Num: 12096, Cur Loss: 0.00374173, Cur Avg Loss: 0.00346643, Log Avg loss: 0.00345237, Global Avg Loss: 0.03150080, Time: 0.0518 Steps: 30600, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000956, Sample Num: 15296, Cur Loss: 0.00201260, Cur Avg Loss: 0.00342593, Log Avg loss: 0.00327285, Global Avg Loss: 0.03131750, Time: 0.1072 Steps: 30800, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001156, Sample Num: 18496, Cur Loss: 0.00075739, Cur Avg Loss: 0.00337133, Log Avg loss: 0.00311033, Global Avg Loss: 0.03113552, Time: 0.0670 Steps: 31000, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001356, Sample Num: 21696, Cur Loss: 0.00701364, Cur Avg Loss: 0.00331357, Log Avg loss: 0.00297975, Global Avg Loss: 0.03095503, Time: 0.0612 Steps: 31200, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001556, Sample Num: 24896, Cur Loss: 0.00096619, Cur Avg Loss: 0.00320326, Log Avg loss: 0.00245536, Global Avg Loss: 0.03077350, Time: 0.0852 Steps: 31400, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001756, Sample Num: 28096, Cur Loss: 0.00300446, Cur Avg Loss: 0.00314203, Log Avg loss: 0.00266561, Global Avg Loss: 0.03059561, Time: 0.0632 Steps: 31600, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001956, Sample Num: 31296, Cur Loss: 0.00448917, Cur Avg Loss: 0.00311299, Log Avg loss: 0.00285802, Global Avg Loss: 0.03042116, Time: 0.0701 Steps: 31800, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 002156, Sample Num: 34496, Cur Loss: 0.00201313, Cur Avg Loss: 0.00312913, Log Avg loss: 0.00328696, Global Avg Loss: 0.03025157, Time: 0.0612 Steps: 32000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002356, Sample Num: 37696, Cur Loss: 0.00388725, Cur Avg Loss: 0.00311410, Log Avg loss: 0.00295210, Global Avg Loss: 0.03008200, Time: 0.0605 Steps: 32200, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002556, Sample Num: 40896, Cur Loss: 0.00098061, Cur Avg Loss: 0.00308727, Log Avg loss: 0.00277119, Global Avg Loss: 0.02991342, Time: 0.1110 Steps: 32400, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002756, Sample Num: 44096, Cur Loss: 0.00981573, Cur Avg Loss: 0.00306068, Log Avg loss: 0.00272085, Global Avg Loss: 0.02974659, Time: 0.2612 Steps: 32600, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002956, Sample Num: 47296, Cur Loss: 0.00029012, Cur Avg Loss: 0.00303030, Log Avg loss: 0.00261177, Global Avg Loss: 0.02958114, Time: 0.0609 Steps: 32800, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003156, Sample Num: 50496, Cur Loss: 0.00295806, Cur Avg Loss: 0.00300084, Log Avg loss: 0.00256529, Global Avg Loss: 0.02941741, Time: 0.0621 Steps: 33000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003356, Sample Num: 53696, Cur Loss: 0.00362601, Cur Avg Loss: 0.00298960, Log Avg loss: 0.00281234, Global Avg Loss: 0.02925713, Time: 0.0603 Steps: 33200, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003556, Sample Num: 56896, Cur Loss: 0.00221804, Cur Avg Loss: 0.00296924, Log Avg loss: 0.00262748, Global Avg Loss: 0.02909767, Time: 0.0673 Steps: 33400, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003756, Sample Num: 60096, Cur Loss: 0.01221500, Cur Avg Loss: 0.00296831, Log Avg loss: 0.00295182, Global Avg Loss: 0.02894204, Time: 0.1470 Steps: 33600, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003956, Sample Num: 63296, Cur Loss: 0.00164327, Cur Avg Loss: 0.00296662, Log Avg loss: 0.00293493, Global Avg Loss: 0.02878816, Time: 0.0316 Steps: 33800, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004156, Sample Num: 66496, Cur Loss: 0.00829703, Cur Avg Loss: 0.00296045, Log Avg loss: 0.00283836, Global Avg Loss: 0.02863551, Time: 0.0612 Steps: 34000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004356, Sample Num: 69696, Cur Loss: 0.00113085, Cur Avg Loss: 0.00293868, Log Avg loss: 0.00248637, Global Avg Loss: 0.02848259, Time: 0.0607 Steps: 34200, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004556, Sample Num: 72896, Cur Loss: 0.00246974, Cur Avg Loss: 0.00293769, Log Avg loss: 0.00291613, Global Avg Loss: 0.02833395, Time: 0.0599 Steps: 34400, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004756, Sample Num: 76096, Cur Loss: 0.00130035, Cur Avg Loss: 0.00291790, Log Avg loss: 0.00246716, Global Avg Loss: 0.02818443, Time: 0.1161 Steps: 34600, Updated lr: 0.000086 Training, Epoch: 0007, Batch: 004956, Sample Num: 79296, Cur Loss: 0.00081922, Cur Avg Loss: 0.00292226, Log Avg loss: 0.00302580, Global Avg Loss: 0.02803984, Time: 0.0611 Steps: 34800, Updated lr: 0.000086 ***** Running evaluation checkpoint-34818 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-34818 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 368.292096, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.002812, "eval_total_loss": 2.997276, "eval_acc": 0.999238, "eval_jaccard": 0.963815, "eval_prec": 0.967122, "eval_recall": 0.965931, "eval_f1": 0.965817, "eval_pr_auc": 0.98993, "eval_roc_auc": 0.998737, "eval_fmax": 0.987244, "eval_pmax": 0.991356, "eval_rmax": 0.983166, "eval_tmax": 0.21, "update_flag": true, "test_avg_loss": 0.002889, "test_total_loss": 3.07936, "test_acc": 0.999233, "test_jaccard": 0.962297, "test_prec": 0.965436, "test_recall": 0.964471, "test_f1": 0.964296, "test_pr_auc": 0.98879, "test_roc_auc": 0.998587, "test_fmax": 0.986435, "test_pmax": 0.991133, "test_rmax": 0.981781, "test_tmax": 0.21, "lr": 8.634719418651595e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.028026349204303972, "train_cur_epoch_loss": 14.517691682220175, "train_cur_epoch_avg_loss": 0.0029187156578649328, "train_cur_epoch_time": 368.29209637641907, "train_cur_epoch_avg_time": 0.07404344519027324, "epoch": 7, "step": 34818} ################################################## Training, Epoch: 0008, Batch: 000182, Sample Num: 2912, Cur Loss: 0.01020056, Cur Avg Loss: 0.00268656, Log Avg loss: 0.00261965, Global Avg Loss: 0.02789458, Time: 0.1194 Steps: 35000, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000382, Sample Num: 6112, Cur Loss: 0.00572245, Cur Avg Loss: 0.00262968, Log Avg loss: 0.00257792, Global Avg Loss: 0.02775074, Time: 0.0599 Steps: 35200, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000582, Sample Num: 9312, Cur Loss: 0.00042650, Cur Avg Loss: 0.00275687, Log Avg loss: 0.00299979, Global Avg Loss: 0.02761090, Time: 0.0616 Steps: 35400, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000782, Sample Num: 12512, Cur Loss: 0.00358687, Cur Avg Loss: 0.00271251, Log Avg loss: 0.00258343, Global Avg Loss: 0.02747030, Time: 0.0304 Steps: 35600, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000982, Sample Num: 15712, Cur Loss: 0.00237874, Cur Avg Loss: 0.00268461, Log Avg loss: 0.00257551, Global Avg Loss: 0.02733122, Time: 0.0608 Steps: 35800, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001182, Sample Num: 18912, Cur Loss: 0.00075264, Cur Avg Loss: 0.00269451, Log Avg loss: 0.00274314, Global Avg Loss: 0.02719462, Time: 0.0623 Steps: 36000, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001382, Sample Num: 22112, Cur Loss: 0.00131282, Cur Avg Loss: 0.00260292, Log Avg loss: 0.00206164, Global Avg Loss: 0.02705576, Time: 0.0605 Steps: 36200, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001582, Sample Num: 25312, Cur Loss: 0.00162734, Cur Avg Loss: 0.00252520, Log Avg loss: 0.00198813, Global Avg Loss: 0.02691803, Time: 0.1650 Steps: 36400, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001782, Sample Num: 28512, Cur Loss: 0.00288000, Cur Avg Loss: 0.00249615, Log Avg loss: 0.00226632, Global Avg Loss: 0.02678332, Time: 0.1199 Steps: 36600, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001982, Sample Num: 31712, Cur Loss: 0.00203753, Cur Avg Loss: 0.00247618, Log Avg loss: 0.00229833, Global Avg Loss: 0.02665025, Time: 0.1230 Steps: 36800, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 002182, Sample Num: 34912, Cur Loss: 0.00014419, Cur Avg Loss: 0.00249141, Log Avg loss: 0.00264225, Global Avg Loss: 0.02652048, Time: 0.0613 Steps: 37000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002382, Sample Num: 38112, Cur Loss: 0.01375678, Cur Avg Loss: 0.00248081, Log Avg loss: 0.00236524, Global Avg Loss: 0.02639061, Time: 0.0618 Steps: 37200, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002582, Sample Num: 41312, Cur Loss: 0.00276771, Cur Avg Loss: 0.00246051, Log Avg loss: 0.00221864, Global Avg Loss: 0.02626135, Time: 0.1067 Steps: 37400, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002782, Sample Num: 44512, Cur Loss: 0.00330135, Cur Avg Loss: 0.00244088, Log Avg loss: 0.00218754, Global Avg Loss: 0.02613330, Time: 0.0613 Steps: 37600, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002982, Sample Num: 47712, Cur Loss: 0.00133560, Cur Avg Loss: 0.00241330, Log Avg loss: 0.00202968, Global Avg Loss: 0.02600576, Time: 0.1120 Steps: 37800, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003182, Sample Num: 50912, Cur Loss: 0.00011247, Cur Avg Loss: 0.00239347, Log Avg loss: 0.00209783, Global Avg Loss: 0.02587993, Time: 0.0629 Steps: 38000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003382, Sample Num: 54112, Cur Loss: 0.00060996, Cur Avg Loss: 0.00238610, Log Avg loss: 0.00226880, Global Avg Loss: 0.02575631, Time: 0.0607 Steps: 38200, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003582, Sample Num: 57312, Cur Loss: 0.00601801, Cur Avg Loss: 0.00236532, Log Avg loss: 0.00201387, Global Avg Loss: 0.02563266, Time: 0.0595 Steps: 38400, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003782, Sample Num: 60512, Cur Loss: 0.00013292, Cur Avg Loss: 0.00236916, Log Avg loss: 0.00243805, Global Avg Loss: 0.02551248, Time: 0.0683 Steps: 38600, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003982, Sample Num: 63712, Cur Loss: 0.00140781, Cur Avg Loss: 0.00236360, Log Avg loss: 0.00225832, Global Avg Loss: 0.02539261, Time: 0.0620 Steps: 38800, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 004182, Sample Num: 66912, Cur Loss: 0.00107890, Cur Avg Loss: 0.00236510, Log Avg loss: 0.00239503, Global Avg Loss: 0.02527467, Time: 0.0333 Steps: 39000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 004382, Sample Num: 70112, Cur Loss: 0.00069947, Cur Avg Loss: 0.00234932, Log Avg loss: 0.00201941, Global Avg Loss: 0.02515602, Time: 0.0606 Steps: 39200, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 004582, Sample Num: 73312, Cur Loss: 0.00060002, Cur Avg Loss: 0.00234940, Log Avg loss: 0.00235104, Global Avg Loss: 0.02504026, Time: 0.0623 Steps: 39400, Updated lr: 0.000084 Training, Epoch: 0008, Batch: 004782, Sample Num: 76512, Cur Loss: 0.00327746, Cur Avg Loss: 0.00234222, Log Avg loss: 0.00217769, Global Avg Loss: 0.02492480, Time: 0.0599 Steps: 39600, Updated lr: 0.000084 ***** Running evaluation checkpoint-39792 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-39792 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 393.930157, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.00246, "eval_total_loss": 2.622656, "eval_acc": 0.999319, "eval_jaccard": 0.970131, "eval_prec": 0.973499, "eval_recall": 0.972548, "eval_f1": 0.97226, "eval_pr_auc": 0.991346, "eval_roc_auc": 0.998933, "eval_fmax": 0.988801, "eval_pmax": 0.993218, "eval_rmax": 0.984423, "eval_tmax": 0.24, "update_flag": true, "test_avg_loss": 0.002537, "test_total_loss": 2.704184, "test_acc": 0.999321, "test_jaccard": 0.968593, "test_prec": 0.971926, "test_recall": 0.970974, "test_f1": 0.970736, "test_pr_auc": 0.990164, "test_roc_auc": 0.998786, "test_fmax": 0.987997, "test_pmax": 0.989485, "test_rmax": 0.986513, "test_tmax": 0.18, "lr": 8.43391199031086e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.02481509686694909, "train_cur_epoch_loss": 11.620907934182469, "train_cur_epoch_avg_loss": 0.0023363305054649114, "train_cur_epoch_time": 393.93015718460083, "train_cur_epoch_avg_time": 0.07919786031053495, "epoch": 8, "step": 39792} ################################################## Training, Epoch: 0009, Batch: 000008, Sample Num: 128, Cur Loss: 0.00283002, Cur Avg Loss: 0.00123619, Log Avg loss: 0.00215160, Global Avg Loss: 0.02481036, Time: 0.0589 Steps: 39800, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000208, Sample Num: 3328, Cur Loss: 0.01002828, Cur Avg Loss: 0.00225978, Log Avg loss: 0.00230072, Global Avg Loss: 0.02469781, Time: 0.1073 Steps: 40000, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000408, Sample Num: 6528, Cur Loss: 0.00080262, Cur Avg Loss: 0.00217196, Log Avg loss: 0.00208062, Global Avg Loss: 0.02458529, Time: 0.0606 Steps: 40200, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000608, Sample Num: 9728, Cur Loss: 0.00146803, Cur Avg Loss: 0.00229145, Log Avg loss: 0.00253521, Global Avg Loss: 0.02447613, Time: 0.0705 Steps: 40400, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000808, Sample Num: 12928, Cur Loss: 0.00102023, Cur Avg Loss: 0.00223049, Log Avg loss: 0.00204519, Global Avg Loss: 0.02436563, Time: 0.0243 Steps: 40600, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001008, Sample Num: 16128, Cur Loss: 0.00048517, Cur Avg Loss: 0.00218574, Log Avg loss: 0.00200493, Global Avg Loss: 0.02425602, Time: 0.0610 Steps: 40800, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001208, Sample Num: 19328, Cur Loss: 0.00026065, Cur Avg Loss: 0.00220510, Log Avg loss: 0.00230268, Global Avg Loss: 0.02414893, Time: 0.0668 Steps: 41000, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001408, Sample Num: 22528, Cur Loss: 0.00018629, Cur Avg Loss: 0.00212600, Log Avg loss: 0.00164824, Global Avg Loss: 0.02403970, Time: 0.0716 Steps: 41200, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001608, Sample Num: 25728, Cur Loss: 0.00051038, Cur Avg Loss: 0.00207259, Log Avg loss: 0.00169655, Global Avg Loss: 0.02393176, Time: 0.0608 Steps: 41400, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001808, Sample Num: 28928, Cur Loss: 0.00033003, Cur Avg Loss: 0.00204829, Log Avg loss: 0.00185297, Global Avg Loss: 0.02382562, Time: 0.0577 Steps: 41600, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 002008, Sample Num: 32128, Cur Loss: 0.00092896, Cur Avg Loss: 0.00202184, Log Avg loss: 0.00178273, Global Avg Loss: 0.02372015, Time: 0.0541 Steps: 41800, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 002208, Sample Num: 35328, Cur Loss: 0.00043160, Cur Avg Loss: 0.00204147, Log Avg loss: 0.00223850, Global Avg Loss: 0.02361785, Time: 0.0609 Steps: 42000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 002408, Sample Num: 38528, Cur Loss: 0.00155476, Cur Avg Loss: 0.00200772, Log Avg loss: 0.00163521, Global Avg Loss: 0.02351367, Time: 0.0635 Steps: 42200, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 002608, Sample Num: 41728, Cur Loss: 0.00215897, Cur Avg Loss: 0.00199488, Log Avg loss: 0.00184030, Global Avg Loss: 0.02341144, Time: 0.0602 Steps: 42400, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 002808, Sample Num: 44928, Cur Loss: 0.00080160, Cur Avg Loss: 0.00198088, Log Avg loss: 0.00179820, Global Avg Loss: 0.02330997, Time: 0.0583 Steps: 42600, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003008, Sample Num: 48128, Cur Loss: 0.00175538, Cur Avg Loss: 0.00197155, Log Avg loss: 0.00184056, Global Avg Loss: 0.02320964, Time: 0.0424 Steps: 42800, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003208, Sample Num: 51328, Cur Loss: 0.00042864, Cur Avg Loss: 0.00194207, Log Avg loss: 0.00149882, Global Avg Loss: 0.02310866, Time: 0.0612 Steps: 43000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003408, Sample Num: 54528, Cur Loss: 0.00129155, Cur Avg Loss: 0.00194349, Log Avg loss: 0.00196620, Global Avg Loss: 0.02301078, Time: 0.0807 Steps: 43200, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003608, Sample Num: 57728, Cur Loss: 0.00036844, Cur Avg Loss: 0.00192863, Log Avg loss: 0.00167544, Global Avg Loss: 0.02291246, Time: 0.1214 Steps: 43400, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003808, Sample Num: 60928, Cur Loss: 0.00021481, Cur Avg Loss: 0.00192837, Log Avg loss: 0.00192375, Global Avg Loss: 0.02281618, Time: 0.1055 Steps: 43600, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004008, Sample Num: 64128, Cur Loss: 0.00411555, Cur Avg Loss: 0.00193201, Log Avg loss: 0.00200126, Global Avg Loss: 0.02272114, Time: 0.0622 Steps: 43800, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004208, Sample Num: 67328, Cur Loss: 0.00494471, Cur Avg Loss: 0.00193364, Log Avg loss: 0.00196637, Global Avg Loss: 0.02262680, Time: 0.0633 Steps: 44000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004408, Sample Num: 70528, Cur Loss: 0.00106574, Cur Avg Loss: 0.00191731, Log Avg loss: 0.00157374, Global Avg Loss: 0.02253154, Time: 0.0601 Steps: 44200, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004608, Sample Num: 73728, Cur Loss: 0.00078268, Cur Avg Loss: 0.00191734, Log Avg loss: 0.00191790, Global Avg Loss: 0.02243868, Time: 0.0696 Steps: 44400, Updated lr: 0.000082 Training, Epoch: 0009, Batch: 004808, Sample Num: 76928, Cur Loss: 0.00118218, Cur Avg Loss: 0.00191665, Log Avg loss: 0.00190084, Global Avg Loss: 0.02234658, Time: 0.0695 Steps: 44600, Updated lr: 0.000082 ***** Running evaluation checkpoint-44766 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-44766 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 390.240990, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.002132, "eval_total_loss": 2.272223, "eval_acc": 0.999417, "eval_jaccard": 0.975082, "eval_prec": 0.978161, "eval_recall": 0.97698, "eval_f1": 0.976907, "eval_pr_auc": 0.993107, "eval_roc_auc": 0.999085, "eval_fmax": 0.99061, "eval_pmax": 0.993693, "eval_rmax": 0.987547, "eval_tmax": 0.22, "update_flag": true, "test_avg_loss": 0.002215, "test_total_loss": 2.360727, "test_acc": 0.999409, "test_jaccard": 0.973652, "test_prec": 0.976832, "test_recall": 0.975716, "test_f1": 0.975615, "test_pr_auc": 0.99199, "test_roc_auc": 0.998946, "test_fmax": 0.989976, "test_pmax": 0.993007, "test_rmax": 0.986962, "test_tmax": 0.21, "lr": 8.233104561970125e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.02226999406699519, "train_cur_epoch_loss": 9.496219873468362, "train_cur_epoch_avg_loss": 0.0019091716673639652, "train_cur_epoch_time": 390.2409899234772, "train_cur_epoch_avg_time": 0.07845617006905452, "epoch": 9, "step": 44766} ################################################## Training, Epoch: 0010, Batch: 000034, Sample Num: 544, Cur Loss: 0.00047123, Cur Avg Loss: 0.00162232, Log Avg loss: 0.00168054, Global Avg Loss: 0.02225432, Time: 0.0256 Steps: 44800, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000234, Sample Num: 3744, Cur Loss: 0.00033287, Cur Avg Loss: 0.00177286, Log Avg loss: 0.00179845, Global Avg Loss: 0.02216341, Time: 0.0624 Steps: 45000, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000434, Sample Num: 6944, Cur Loss: 0.00026709, Cur Avg Loss: 0.00187820, Log Avg loss: 0.00200146, Global Avg Loss: 0.02207420, Time: 0.0596 Steps: 45200, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000634, Sample Num: 10144, Cur Loss: 0.00090805, Cur Avg Loss: 0.00191537, Log Avg loss: 0.00199603, Global Avg Loss: 0.02198575, Time: 0.0618 Steps: 45400, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000834, Sample Num: 13344, Cur Loss: 0.00192247, Cur Avg Loss: 0.00185992, Log Avg loss: 0.00168414, Global Avg Loss: 0.02189670, Time: 0.0705 Steps: 45600, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001034, Sample Num: 16544, Cur Loss: 0.00300196, Cur Avg Loss: 0.00181402, Log Avg loss: 0.00162260, Global Avg Loss: 0.02180817, Time: 0.0607 Steps: 45800, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001234, Sample Num: 19744, Cur Loss: 0.00041375, Cur Avg Loss: 0.00184196, Log Avg loss: 0.00198644, Global Avg Loss: 0.02172199, Time: 0.0617 Steps: 46000, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001434, Sample Num: 22944, Cur Loss: 0.00137062, Cur Avg Loss: 0.00176506, Log Avg loss: 0.00129060, Global Avg Loss: 0.02163354, Time: 0.0613 Steps: 46200, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001634, Sample Num: 26144, Cur Loss: 0.00080877, Cur Avg Loss: 0.00172208, Log Avg loss: 0.00141387, Global Avg Loss: 0.02154639, Time: 0.1186 Steps: 46400, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001834, Sample Num: 29344, Cur Loss: 0.00008826, Cur Avg Loss: 0.00170098, Log Avg loss: 0.00152863, Global Avg Loss: 0.02146048, Time: 0.0611 Steps: 46600, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 002034, Sample Num: 32544, Cur Loss: 0.01096957, Cur Avg Loss: 0.00168878, Log Avg loss: 0.00157684, Global Avg Loss: 0.02137550, Time: 0.0694 Steps: 46800, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 002234, Sample Num: 35744, Cur Loss: 0.00433304, Cur Avg Loss: 0.00169669, Log Avg loss: 0.00177713, Global Avg Loss: 0.02129211, Time: 0.0608 Steps: 47000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 002434, Sample Num: 38944, Cur Loss: 0.00018688, Cur Avg Loss: 0.00167624, Log Avg loss: 0.00144789, Global Avg Loss: 0.02120802, Time: 0.0690 Steps: 47200, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 002634, Sample Num: 42144, Cur Loss: 0.00036632, Cur Avg Loss: 0.00167129, Log Avg loss: 0.00161097, Global Avg Loss: 0.02112533, Time: 0.0602 Steps: 47400, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 002834, Sample Num: 45344, Cur Loss: 0.00075995, Cur Avg Loss: 0.00165900, Log Avg loss: 0.00149719, Global Avg Loss: 0.02104286, Time: 0.0619 Steps: 47600, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003034, Sample Num: 48544, Cur Loss: 0.00423515, Cur Avg Loss: 0.00165449, Log Avg loss: 0.00159063, Global Avg Loss: 0.02096147, Time: 0.0605 Steps: 47800, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003234, Sample Num: 51744, Cur Loss: 0.00212053, Cur Avg Loss: 0.00163176, Log Avg loss: 0.00128685, Global Avg Loss: 0.02087949, Time: 0.0644 Steps: 48000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003434, Sample Num: 54944, Cur Loss: 0.00063684, Cur Avg Loss: 0.00162248, Log Avg loss: 0.00147245, Global Avg Loss: 0.02079897, Time: 0.1179 Steps: 48200, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003634, Sample Num: 58144, Cur Loss: 0.00048856, Cur Avg Loss: 0.00161440, Log Avg loss: 0.00147569, Global Avg Loss: 0.02071912, Time: 0.0612 Steps: 48400, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003834, Sample Num: 61344, Cur Loss: 0.00047708, Cur Avg Loss: 0.00161147, Log Avg loss: 0.00155828, Global Avg Loss: 0.02064027, Time: 0.0607 Steps: 48600, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004034, Sample Num: 64544, Cur Loss: 0.00040566, Cur Avg Loss: 0.00162077, Log Avg loss: 0.00179894, Global Avg Loss: 0.02056305, Time: 0.0619 Steps: 48800, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004234, Sample Num: 67744, Cur Loss: 0.00168648, Cur Avg Loss: 0.00162217, Log Avg loss: 0.00165045, Global Avg Loss: 0.02048585, Time: 0.1238 Steps: 49000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004434, Sample Num: 70944, Cur Loss: 0.00096129, Cur Avg Loss: 0.00160398, Log Avg loss: 0.00121902, Global Avg Loss: 0.02040753, Time: 0.0602 Steps: 49200, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004634, Sample Num: 74144, Cur Loss: 0.00301875, Cur Avg Loss: 0.00160811, Log Avg loss: 0.00169957, Global Avg Loss: 0.02033179, Time: 0.1153 Steps: 49400, Updated lr: 0.000080 Training, Epoch: 0010, Batch: 004834, Sample Num: 77344, Cur Loss: 0.00076136, Cur Avg Loss: 0.00161015, Log Avg loss: 0.00165737, Global Avg Loss: 0.02025649, Time: 0.0608 Steps: 49600, Updated lr: 0.000080 ***** Running evaluation checkpoint-49740 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-49740 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 370.007833, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001918, "eval_total_loss": 2.044692, "eval_acc": 0.999475, "eval_jaccard": 0.978519, "eval_prec": 0.981373, "eval_recall": 0.98011, "eval_f1": 0.980139, "eval_pr_auc": 0.993453, "eval_roc_auc": 0.999131, "eval_fmax": 0.991795, "eval_pmax": 0.995354, "eval_rmax": 0.988261, "eval_tmax": 0.24, "update_flag": true, "test_avg_loss": 0.001975, "test_total_loss": 2.105717, "test_acc": 0.999484, "test_jaccard": 0.977576, "test_prec": 0.980761, "test_recall": 0.97932, "test_f1": 0.979402, "test_pr_auc": 0.992786, "test_roc_auc": 0.999044, "test_fmax": 0.991162, "test_pmax": 0.994345, "test_rmax": 0.987999, "test_tmax": 0.23, "lr": 8.03229713362939e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.020203388477211284, "train_cur_epoch_loss": 7.97798845338275, "train_cur_epoch_avg_loss": 0.001603938169156162, "train_cur_epoch_time": 370.00783252716064, "train_cur_epoch_avg_time": 0.07438838611322088, "epoch": 10, "step": 49740} ################################################## Training, Epoch: 0011, Batch: 000060, Sample Num: 960, Cur Loss: 0.00067477, Cur Avg Loss: 0.00152589, Log Avg loss: 0.00143043, Global Avg Loss: 0.02018089, Time: 0.1121 Steps: 49800, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000260, Sample Num: 4160, Cur Loss: 0.00058519, Cur Avg Loss: 0.00150556, Log Avg loss: 0.00149946, Global Avg Loss: 0.02010616, Time: 0.0639 Steps: 50000, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000460, Sample Num: 7360, Cur Loss: 0.00088523, Cur Avg Loss: 0.00161394, Log Avg loss: 0.00175482, Global Avg Loss: 0.02003305, Time: 0.0240 Steps: 50200, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000660, Sample Num: 10560, Cur Loss: 0.00137383, Cur Avg Loss: 0.00159640, Log Avg loss: 0.00155605, Global Avg Loss: 0.01995973, Time: 0.0628 Steps: 50400, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000860, Sample Num: 13760, Cur Loss: 0.00154744, Cur Avg Loss: 0.00158702, Log Avg loss: 0.00155606, Global Avg Loss: 0.01988698, Time: 0.0645 Steps: 50600, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001060, Sample Num: 16960, Cur Loss: 0.00030154, Cur Avg Loss: 0.00155205, Log Avg loss: 0.00140170, Global Avg Loss: 0.01981421, Time: 0.0611 Steps: 50800, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001260, Sample Num: 20160, Cur Loss: 0.00005662, Cur Avg Loss: 0.00154225, Log Avg loss: 0.00149031, Global Avg Loss: 0.01974235, Time: 0.0603 Steps: 51000, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001460, Sample Num: 23360, Cur Loss: 0.00072271, Cur Avg Loss: 0.00148611, Log Avg loss: 0.00113242, Global Avg Loss: 0.01966965, Time: 0.0597 Steps: 51200, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001660, Sample Num: 26560, Cur Loss: 0.00018079, Cur Avg Loss: 0.00143973, Log Avg loss: 0.00110117, Global Avg Loss: 0.01959740, Time: 0.1151 Steps: 51400, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001860, Sample Num: 29760, Cur Loss: 0.00106698, Cur Avg Loss: 0.00142398, Log Avg loss: 0.00129327, Global Avg Loss: 0.01952646, Time: 0.1207 Steps: 51600, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 002060, Sample Num: 32960, Cur Loss: 0.00036360, Cur Avg Loss: 0.00141712, Log Avg loss: 0.00135326, Global Avg Loss: 0.01945629, Time: 0.1158 Steps: 51800, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002260, Sample Num: 36160, Cur Loss: 0.00527422, Cur Avg Loss: 0.00143390, Log Avg loss: 0.00160682, Global Avg Loss: 0.01938764, Time: 0.0602 Steps: 52000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002460, Sample Num: 39360, Cur Loss: 0.00026382, Cur Avg Loss: 0.00141140, Log Avg loss: 0.00115709, Global Avg Loss: 0.01931779, Time: 0.0625 Steps: 52200, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002660, Sample Num: 42560, Cur Loss: 0.00009179, Cur Avg Loss: 0.00140722, Log Avg loss: 0.00135588, Global Avg Loss: 0.01924923, Time: 0.0617 Steps: 52400, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002860, Sample Num: 45760, Cur Loss: 0.00076463, Cur Avg Loss: 0.00139872, Log Avg loss: 0.00128557, Global Avg Loss: 0.01918093, Time: 0.0689 Steps: 52600, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003060, Sample Num: 48960, Cur Loss: 0.00266923, Cur Avg Loss: 0.00138686, Log Avg loss: 0.00121726, Global Avg Loss: 0.01911289, Time: 0.0607 Steps: 52800, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003260, Sample Num: 52160, Cur Loss: 0.00043632, Cur Avg Loss: 0.00136883, Log Avg loss: 0.00109305, Global Avg Loss: 0.01904489, Time: 0.0632 Steps: 53000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003460, Sample Num: 55360, Cur Loss: 0.00014868, Cur Avg Loss: 0.00136410, Log Avg loss: 0.00128701, Global Avg Loss: 0.01897813, Time: 0.0699 Steps: 53200, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003660, Sample Num: 58560, Cur Loss: 0.00037911, Cur Avg Loss: 0.00135943, Log Avg loss: 0.00127856, Global Avg Loss: 0.01891184, Time: 0.1158 Steps: 53400, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003860, Sample Num: 61760, Cur Loss: 0.00041742, Cur Avg Loss: 0.00135326, Log Avg loss: 0.00124036, Global Avg Loss: 0.01884590, Time: 0.1198 Steps: 53600, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004060, Sample Num: 64960, Cur Loss: 0.00471708, Cur Avg Loss: 0.00136613, Log Avg loss: 0.00161460, Global Avg Loss: 0.01878184, Time: 0.0606 Steps: 53800, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004260, Sample Num: 68160, Cur Loss: 0.00208147, Cur Avg Loss: 0.00136895, Log Avg loss: 0.00142614, Global Avg Loss: 0.01871756, Time: 0.1450 Steps: 54000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004460, Sample Num: 71360, Cur Loss: 0.00143096, Cur Avg Loss: 0.00135836, Log Avg loss: 0.00113284, Global Avg Loss: 0.01865267, Time: 0.0669 Steps: 54200, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004660, Sample Num: 74560, Cur Loss: 0.00006184, Cur Avg Loss: 0.00136303, Log Avg loss: 0.00146708, Global Avg Loss: 0.01858949, Time: 0.0609 Steps: 54400, Updated lr: 0.000078 Training, Epoch: 0011, Batch: 004860, Sample Num: 77760, Cur Loss: 0.00127839, Cur Avg Loss: 0.00136559, Log Avg loss: 0.00142523, Global Avg Loss: 0.01852662, Time: 0.1151 Steps: 54600, Updated lr: 0.000078 ***** Running evaluation checkpoint-54714 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-54714 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 371.474913, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001839, "eval_total_loss": 1.960113, "eval_acc": 0.999512, "eval_jaccard": 0.980812, "eval_prec": 0.983952, "eval_recall": 0.98235, "eval_f1": 0.982484, "eval_pr_auc": 0.9942, "eval_roc_auc": 0.999197, "eval_fmax": 0.992144, "eval_pmax": 0.995884, "eval_rmax": 0.988433, "eval_tmax": 0.27, "update_flag": true, "test_avg_loss": 0.001894, "test_total_loss": 2.018945, "test_acc": 0.999512, "test_jaccard": 0.97969, "test_prec": 0.983004, "test_recall": 0.98136, "test_f1": 0.981518, "test_pr_auc": 0.993336, "test_roc_auc": 0.9991, "test_fmax": 0.991614, "test_pmax": 0.994741, "test_rmax": 0.988506, "test_tmax": 0.23, "lr": 7.831489705288656e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.018490681318822998, "train_cur_epoch_loss": 6.7825948215922836, "train_cur_epoch_avg_loss": 0.0013636097349401454, "train_cur_epoch_time": 371.4749131202698, "train_cur_epoch_avg_time": 0.0746833359711037, "epoch": 11, "step": 54714} ################################################## Training, Epoch: 0012, Batch: 000086, Sample Num: 1376, Cur Loss: 0.00018973, Cur Avg Loss: 0.00133124, Log Avg loss: 0.00130162, Global Avg Loss: 0.01846375, Time: 0.0613 Steps: 54800, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000286, Sample Num: 4576, Cur Loss: 0.00012056, Cur Avg Loss: 0.00134282, Log Avg loss: 0.00134779, Global Avg Loss: 0.01840151, Time: 0.0816 Steps: 55000, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000486, Sample Num: 7776, Cur Loss: 0.00253457, Cur Avg Loss: 0.00145776, Log Avg loss: 0.00162214, Global Avg Loss: 0.01834072, Time: 0.0660 Steps: 55200, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000686, Sample Num: 10976, Cur Loss: 0.00011026, Cur Avg Loss: 0.00139019, Log Avg loss: 0.00122599, Global Avg Loss: 0.01827893, Time: 0.0587 Steps: 55400, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000886, Sample Num: 14176, Cur Loss: 0.00003875, Cur Avg Loss: 0.00139615, Log Avg loss: 0.00141657, Global Avg Loss: 0.01821828, Time: 0.1185 Steps: 55600, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001086, Sample Num: 17376, Cur Loss: 0.00140924, Cur Avg Loss: 0.00138753, Log Avg loss: 0.00134934, Global Avg Loss: 0.01815781, Time: 0.0612 Steps: 55800, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001286, Sample Num: 20576, Cur Loss: 0.00028422, Cur Avg Loss: 0.00133270, Log Avg loss: 0.00103497, Global Avg Loss: 0.01809666, Time: 0.0618 Steps: 56000, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001486, Sample Num: 23776, Cur Loss: 0.00011275, Cur Avg Loss: 0.00127513, Log Avg loss: 0.00090499, Global Avg Loss: 0.01803548, Time: 0.1157 Steps: 56200, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001686, Sample Num: 26976, Cur Loss: 0.00111824, Cur Avg Loss: 0.00124919, Log Avg loss: 0.00105642, Global Avg Loss: 0.01797527, Time: 0.0591 Steps: 56400, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001886, Sample Num: 30176, Cur Loss: 0.00005905, Cur Avg Loss: 0.00122198, Log Avg loss: 0.00099263, Global Avg Loss: 0.01791526, Time: 0.0601 Steps: 56600, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 002086, Sample Num: 33376, Cur Loss: 0.00103871, Cur Avg Loss: 0.00123895, Log Avg loss: 0.00139895, Global Avg Loss: 0.01785711, Time: 0.1068 Steps: 56800, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002286, Sample Num: 36576, Cur Loss: 0.00027617, Cur Avg Loss: 0.00124469, Log Avg loss: 0.00130455, Global Avg Loss: 0.01779903, Time: 0.0609 Steps: 57000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002486, Sample Num: 39776, Cur Loss: 0.00057770, Cur Avg Loss: 0.00122764, Log Avg loss: 0.00103284, Global Avg Loss: 0.01774040, Time: 0.0611 Steps: 57200, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002686, Sample Num: 42976, Cur Loss: 0.00478015, Cur Avg Loss: 0.00122771, Log Avg loss: 0.00122846, Global Avg Loss: 0.01768287, Time: 0.0936 Steps: 57400, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002886, Sample Num: 46176, Cur Loss: 0.00078427, Cur Avg Loss: 0.00122113, Log Avg loss: 0.00113279, Global Avg Loss: 0.01762540, Time: 0.0715 Steps: 57600, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003086, Sample Num: 49376, Cur Loss: 0.00047344, Cur Avg Loss: 0.00120488, Log Avg loss: 0.00097043, Global Avg Loss: 0.01756778, Time: 0.1521 Steps: 57800, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003286, Sample Num: 52576, Cur Loss: 0.00506057, Cur Avg Loss: 0.00119333, Log Avg loss: 0.00101505, Global Avg Loss: 0.01751070, Time: 0.0605 Steps: 58000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003486, Sample Num: 55776, Cur Loss: 0.00016142, Cur Avg Loss: 0.00118655, Log Avg loss: 0.00107529, Global Avg Loss: 0.01745422, Time: 0.0605 Steps: 58200, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003686, Sample Num: 58976, Cur Loss: 0.03010305, Cur Avg Loss: 0.00119343, Log Avg loss: 0.00131323, Global Avg Loss: 0.01739894, Time: 0.0625 Steps: 58400, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003886, Sample Num: 62176, Cur Loss: 0.00507416, Cur Avg Loss: 0.00117750, Log Avg loss: 0.00088386, Global Avg Loss: 0.01734257, Time: 0.0600 Steps: 58600, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004086, Sample Num: 65376, Cur Loss: 0.00023647, Cur Avg Loss: 0.00118722, Log Avg loss: 0.00137615, Global Avg Loss: 0.01728827, Time: 0.0601 Steps: 58800, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004286, Sample Num: 68576, Cur Loss: 0.00069166, Cur Avg Loss: 0.00119037, Log Avg loss: 0.00125474, Global Avg Loss: 0.01723392, Time: 0.1216 Steps: 59000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004486, Sample Num: 71776, Cur Loss: 0.00342792, Cur Avg Loss: 0.00118206, Log Avg loss: 0.00100399, Global Avg Loss: 0.01717909, Time: 0.0700 Steps: 59200, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004686, Sample Num: 74976, Cur Loss: 0.00026159, Cur Avg Loss: 0.00118546, Log Avg loss: 0.00126174, Global Avg Loss: 0.01712549, Time: 0.0612 Steps: 59400, Updated lr: 0.000076 Training, Epoch: 0012, Batch: 004886, Sample Num: 78176, Cur Loss: 0.00389813, Cur Avg Loss: 0.00118880, Log Avg loss: 0.00126694, Global Avg Loss: 0.01707228, Time: 0.1013 Steps: 59600, Updated lr: 0.000076 ***** Running evaluation checkpoint-59688 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-59688 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 385.046832, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001697, "eval_total_loss": 1.809233, "eval_acc": 0.999546, "eval_jaccard": 0.982669, "eval_prec": 0.985403, "eval_recall": 0.984065, "eval_f1": 0.98417, "eval_pr_auc": 0.994519, "eval_roc_auc": 0.99924, "eval_fmax": 0.99289, "eval_pmax": 0.995021, "eval_rmax": 0.990767, "eval_tmax": 0.25, "update_flag": true, "test_avg_loss": 0.00176, "test_total_loss": 1.875873, "test_acc": 0.999538, "test_jaccard": 0.981345, "test_prec": 0.984189, "test_recall": 0.983169, "test_f1": 0.983071, "test_pr_auc": 0.993579, "test_roc_auc": 0.999132, "test_fmax": 0.992535, "test_pmax": 0.994599, "test_rmax": 0.99048, "test_tmax": 0.24, "lr": 7.630682276947921e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.017048810452031746, "train_cur_epoch_loss": 5.910260582789306, "train_cur_epoch_avg_loss": 0.0011882309173279666, "train_cur_epoch_time": 385.0468316078186, "train_cur_epoch_avg_time": 0.07741190824443478, "epoch": 12, "step": 59688} ################################################## Training, Epoch: 0013, Batch: 000112, Sample Num: 1792, Cur Loss: 0.00180334, Cur Avg Loss: 0.00128839, Log Avg loss: 0.00123052, Global Avg Loss: 0.01701929, Time: 0.1115 Steps: 59800, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000312, Sample Num: 4992, Cur Loss: 0.00823953, Cur Avg Loss: 0.00123442, Log Avg loss: 0.00120420, Global Avg Loss: 0.01696658, Time: 0.0615 Steps: 60000, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000512, Sample Num: 8192, Cur Loss: 0.00009782, Cur Avg Loss: 0.00129536, Log Avg loss: 0.00139043, Global Avg Loss: 0.01691483, Time: 0.0254 Steps: 60200, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000712, Sample Num: 11392, Cur Loss: 0.00039179, Cur Avg Loss: 0.00122247, Log Avg loss: 0.00103588, Global Avg Loss: 0.01686225, Time: 0.0238 Steps: 60400, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000912, Sample Num: 14592, Cur Loss: 0.00233890, Cur Avg Loss: 0.00121540, Log Avg loss: 0.00119023, Global Avg Loss: 0.01681053, Time: 0.0598 Steps: 60600, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001112, Sample Num: 17792, Cur Loss: 0.00052526, Cur Avg Loss: 0.00120443, Log Avg loss: 0.00115442, Global Avg Loss: 0.01675903, Time: 0.0242 Steps: 60800, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001312, Sample Num: 20992, Cur Loss: 0.00013870, Cur Avg Loss: 0.00116707, Log Avg loss: 0.00095930, Global Avg Loss: 0.01670722, Time: 0.0610 Steps: 61000, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001512, Sample Num: 24192, Cur Loss: 0.00032179, Cur Avg Loss: 0.00110876, Log Avg loss: 0.00072624, Global Avg Loss: 0.01665500, Time: 0.1195 Steps: 61200, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001712, Sample Num: 27392, Cur Loss: 0.00023140, Cur Avg Loss: 0.00108571, Log Avg loss: 0.00091148, Global Avg Loss: 0.01660372, Time: 0.0615 Steps: 61400, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001912, Sample Num: 30592, Cur Loss: 0.00062873, Cur Avg Loss: 0.00106442, Log Avg loss: 0.00088213, Global Avg Loss: 0.01655267, Time: 0.0616 Steps: 61600, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 002112, Sample Num: 33792, Cur Loss: 0.01099779, Cur Avg Loss: 0.00108246, Log Avg loss: 0.00125497, Global Avg Loss: 0.01650316, Time: 0.1279 Steps: 61800, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002312, Sample Num: 36992, Cur Loss: 0.00057087, Cur Avg Loss: 0.00108241, Log Avg loss: 0.00108192, Global Avg Loss: 0.01645342, Time: 0.0603 Steps: 62000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002512, Sample Num: 40192, Cur Loss: 0.00036837, Cur Avg Loss: 0.00106567, Log Avg loss: 0.00087214, Global Avg Loss: 0.01640332, Time: 0.1231 Steps: 62200, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002712, Sample Num: 43392, Cur Loss: 0.00255130, Cur Avg Loss: 0.00106683, Log Avg loss: 0.00108140, Global Avg Loss: 0.01635421, Time: 0.0608 Steps: 62400, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002912, Sample Num: 46592, Cur Loss: 0.00006412, Cur Avg Loss: 0.00105973, Log Avg loss: 0.00096337, Global Avg Loss: 0.01630504, Time: 0.0688 Steps: 62600, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003112, Sample Num: 49792, Cur Loss: 0.00074476, Cur Avg Loss: 0.00104920, Log Avg loss: 0.00089591, Global Avg Loss: 0.01625596, Time: 0.0641 Steps: 62800, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003312, Sample Num: 52992, Cur Loss: 0.00013676, Cur Avg Loss: 0.00104127, Log Avg loss: 0.00091793, Global Avg Loss: 0.01620727, Time: 0.1025 Steps: 63000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003512, Sample Num: 56192, Cur Loss: 0.00035356, Cur Avg Loss: 0.00103657, Log Avg loss: 0.00095882, Global Avg Loss: 0.01615902, Time: 0.1524 Steps: 63200, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003712, Sample Num: 59392, Cur Loss: 0.00014515, Cur Avg Loss: 0.00104335, Log Avg loss: 0.00116231, Global Avg Loss: 0.01611171, Time: 0.0645 Steps: 63400, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003912, Sample Num: 62592, Cur Loss: 0.00097079, Cur Avg Loss: 0.00103300, Log Avg loss: 0.00084097, Global Avg Loss: 0.01606369, Time: 0.0598 Steps: 63600, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 004112, Sample Num: 65792, Cur Loss: 0.00125487, Cur Avg Loss: 0.00104139, Log Avg loss: 0.00120555, Global Avg Loss: 0.01601711, Time: 0.0475 Steps: 63800, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 004312, Sample Num: 68992, Cur Loss: 0.00019575, Cur Avg Loss: 0.00104170, Log Avg loss: 0.00104806, Global Avg Loss: 0.01597033, Time: 0.0864 Steps: 64000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 004512, Sample Num: 72192, Cur Loss: 0.00024515, Cur Avg Loss: 0.00103832, Log Avg loss: 0.00096531, Global Avg Loss: 0.01592359, Time: 0.1021 Steps: 64200, Updated lr: 0.000074 Training, Epoch: 0013, Batch: 004712, Sample Num: 75392, Cur Loss: 0.00009947, Cur Avg Loss: 0.00103973, Log Avg loss: 0.00107167, Global Avg Loss: 0.01587746, Time: 0.1188 Steps: 64400, Updated lr: 0.000074 Training, Epoch: 0013, Batch: 004912, Sample Num: 78592, Cur Loss: 0.00370865, Cur Avg Loss: 0.00104285, Log Avg loss: 0.00111626, Global Avg Loss: 0.01583176, Time: 0.1184 Steps: 64600, Updated lr: 0.000074 ***** Running evaluation checkpoint-64662 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-64662 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 367.250870, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001607, "eval_total_loss": 1.712625, "eval_acc": 0.999571, "eval_jaccard": 0.983395, "eval_prec": 0.985775, "eval_recall": 0.984729, "eval_f1": 0.984741, "eval_pr_auc": 0.994754, "eval_roc_auc": 0.99928, "eval_fmax": 0.993599, "eval_pmax": 0.997016, "eval_rmax": 0.990206, "eval_tmax": 0.3, "update_flag": true, "test_avg_loss": 0.001672, "test_total_loss": 1.782099, "test_acc": 0.999567, "test_jaccard": 0.982413, "test_prec": 0.984895, "test_recall": 0.984013, "test_f1": 0.983914, "test_pr_auc": 0.993893, "test_roc_auc": 0.999184, "test_fmax": 0.993109, "test_pmax": 0.996554, "test_rmax": 0.989688, "test_tmax": 0.3, "lr": 7.429874848607187e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.015817651590872138, "train_cur_epoch_loss": 5.191588908103313, "train_cur_epoch_avg_loss": 0.0010437452569568382, "train_cur_epoch_time": 367.25086975097656, "train_cur_epoch_avg_time": 0.07383411132910667, "epoch": 13, "step": 64662} ################################################## Training, Epoch: 0014, Batch: 000138, Sample Num: 2208, Cur Loss: 0.00006475, Cur Avg Loss: 0.00110331, Log Avg loss: 0.00110685, Global Avg Loss: 0.01578632, Time: 0.0637 Steps: 64800, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000338, Sample Num: 5408, Cur Loss: 0.00028049, Cur Avg Loss: 0.00109512, Log Avg loss: 0.00108947, Global Avg Loss: 0.01574109, Time: 0.0608 Steps: 65000, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000538, Sample Num: 8608, Cur Loss: 0.00012983, Cur Avg Loss: 0.00114990, Log Avg loss: 0.00124247, Global Avg Loss: 0.01569662, Time: 0.0606 Steps: 65200, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000738, Sample Num: 11808, Cur Loss: 0.00086954, Cur Avg Loss: 0.00111579, Log Avg loss: 0.00102404, Global Avg Loss: 0.01565175, Time: 0.0707 Steps: 65400, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000938, Sample Num: 15008, Cur Loss: 0.00012931, Cur Avg Loss: 0.00109778, Log Avg loss: 0.00103130, Global Avg Loss: 0.01560718, Time: 0.0627 Steps: 65600, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001138, Sample Num: 18208, Cur Loss: 0.00500146, Cur Avg Loss: 0.00109298, Log Avg loss: 0.00107049, Global Avg Loss: 0.01556299, Time: 0.1115 Steps: 65800, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001338, Sample Num: 21408, Cur Loss: 0.00015417, Cur Avg Loss: 0.00105373, Log Avg loss: 0.00083039, Global Avg Loss: 0.01551835, Time: 0.0961 Steps: 66000, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001538, Sample Num: 24608, Cur Loss: 0.00081991, Cur Avg Loss: 0.00099795, Log Avg loss: 0.00062477, Global Avg Loss: 0.01547335, Time: 0.0686 Steps: 66200, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001738, Sample Num: 27808, Cur Loss: 0.00005737, Cur Avg Loss: 0.00097478, Log Avg loss: 0.00079659, Global Avg Loss: 0.01542914, Time: 0.0603 Steps: 66400, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001938, Sample Num: 31008, Cur Loss: 0.00064425, Cur Avg Loss: 0.00095255, Log Avg loss: 0.00075935, Global Avg Loss: 0.01538509, Time: 0.0952 Steps: 66600, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 002138, Sample Num: 34208, Cur Loss: 0.00009899, Cur Avg Loss: 0.00097589, Log Avg loss: 0.00120214, Global Avg Loss: 0.01534263, Time: 0.1216 Steps: 66800, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002338, Sample Num: 37408, Cur Loss: 0.00171478, Cur Avg Loss: 0.00096962, Log Avg loss: 0.00090255, Global Avg Loss: 0.01529952, Time: 0.0598 Steps: 67000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002538, Sample Num: 40608, Cur Loss: 0.00010031, Cur Avg Loss: 0.00094885, Log Avg loss: 0.00070606, Global Avg Loss: 0.01525609, Time: 0.0616 Steps: 67200, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002738, Sample Num: 43808, Cur Loss: 0.00169085, Cur Avg Loss: 0.00095647, Log Avg loss: 0.00105315, Global Avg Loss: 0.01521394, Time: 0.0607 Steps: 67400, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002938, Sample Num: 47008, Cur Loss: 0.00041942, Cur Avg Loss: 0.00095026, Log Avg loss: 0.00086527, Global Avg Loss: 0.01517149, Time: 0.0598 Steps: 67600, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003138, Sample Num: 50208, Cur Loss: 0.00013813, Cur Avg Loss: 0.00094326, Log Avg loss: 0.00084034, Global Avg Loss: 0.01512922, Time: 0.0685 Steps: 67800, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003338, Sample Num: 53408, Cur Loss: 0.00007678, Cur Avg Loss: 0.00093325, Log Avg loss: 0.00077632, Global Avg Loss: 0.01508700, Time: 0.0621 Steps: 68000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003538, Sample Num: 56608, Cur Loss: 0.00128467, Cur Avg Loss: 0.00092804, Log Avg loss: 0.00084100, Global Avg Loss: 0.01504523, Time: 0.0572 Steps: 68200, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003738, Sample Num: 59808, Cur Loss: 0.00053859, Cur Avg Loss: 0.00092905, Log Avg loss: 0.00094702, Global Avg Loss: 0.01500400, Time: 0.1000 Steps: 68400, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003938, Sample Num: 63008, Cur Loss: 0.00499366, Cur Avg Loss: 0.00092852, Log Avg loss: 0.00091851, Global Avg Loss: 0.01496294, Time: 0.0638 Steps: 68600, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 004138, Sample Num: 66208, Cur Loss: 0.00073589, Cur Avg Loss: 0.00093131, Log Avg loss: 0.00098618, Global Avg Loss: 0.01492231, Time: 0.1192 Steps: 68800, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 004338, Sample Num: 69408, Cur Loss: 0.00031929, Cur Avg Loss: 0.00092712, Log Avg loss: 0.00084057, Global Avg Loss: 0.01488149, Time: 0.0609 Steps: 69000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 004538, Sample Num: 72608, Cur Loss: 0.00094858, Cur Avg Loss: 0.00092475, Log Avg loss: 0.00087323, Global Avg Loss: 0.01484100, Time: 0.0686 Steps: 69200, Updated lr: 0.000072 Training, Epoch: 0014, Batch: 004738, Sample Num: 75808, Cur Loss: 0.00031969, Cur Avg Loss: 0.00092708, Log Avg loss: 0.00098005, Global Avg Loss: 0.01480106, Time: 0.0612 Steps: 69400, Updated lr: 0.000072 Training, Epoch: 0014, Batch: 004938, Sample Num: 79008, Cur Loss: 0.00375319, Cur Avg Loss: 0.00093343, Log Avg loss: 0.00108380, Global Avg Loss: 0.01476164, Time: 0.1100 Steps: 69600, Updated lr: 0.000072 ***** Running evaluation checkpoint-69636 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-69636 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 391.583600, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001559, "eval_total_loss": 1.662197, "eval_acc": 0.999602, "eval_jaccard": 0.98553, "eval_prec": 0.988008, "eval_recall": 0.986927, "eval_f1": 0.986928, "eval_pr_auc": 0.995078, "eval_roc_auc": 0.999297, "eval_fmax": 0.993851, "eval_pmax": 0.996858, "eval_rmax": 0.990863, "eval_tmax": 0.29, "update_flag": true, "test_avg_loss": 0.00162, "test_total_loss": 1.726748, "test_acc": 0.999579, "test_jaccard": 0.983222, "test_prec": 0.985874, "test_recall": 0.984768, "test_f1": 0.984782, "test_pr_auc": 0.994085, "test_roc_auc": 0.999243, "test_fmax": 0.993271, "test_pmax": 0.996086, "test_rmax": 0.990472, "test_tmax": 0.26, "lr": 7.229067420266452e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.01475445467013979, "train_cur_epoch_loss": 4.640218240880131, "train_cur_epoch_avg_loss": 0.0009328947006192463, "train_cur_epoch_time": 391.5835998058319, "train_cur_epoch_avg_time": 0.07872609565859105, "epoch": 14, "step": 69636} ################################################## Training, Epoch: 0015, Batch: 000164, Sample Num: 2624, Cur Loss: 0.00039571, Cur Avg Loss: 0.00083512, Log Avg loss: 0.00083953, Global Avg Loss: 0.01472175, Time: 0.1120 Steps: 69800, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000364, Sample Num: 5824, Cur Loss: 0.00007225, Cur Avg Loss: 0.00094295, Log Avg loss: 0.00103138, Global Avg Loss: 0.01468263, Time: 0.0597 Steps: 70000, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000564, Sample Num: 9024, Cur Loss: 0.00083440, Cur Avg Loss: 0.00103249, Log Avg loss: 0.00119545, Global Avg Loss: 0.01464421, Time: 0.0602 Steps: 70200, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000764, Sample Num: 12224, Cur Loss: 0.00007987, Cur Avg Loss: 0.00101666, Log Avg loss: 0.00097203, Global Avg Loss: 0.01460537, Time: 0.0606 Steps: 70400, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000964, Sample Num: 15424, Cur Loss: 0.00022253, Cur Avg Loss: 0.00098495, Log Avg loss: 0.00086382, Global Avg Loss: 0.01456644, Time: 0.0606 Steps: 70600, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001164, Sample Num: 18624, Cur Loss: 0.00070140, Cur Avg Loss: 0.00098660, Log Avg loss: 0.00099456, Global Avg Loss: 0.01452810, Time: 0.0606 Steps: 70800, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001364, Sample Num: 21824, Cur Loss: 0.00049410, Cur Avg Loss: 0.00093547, Log Avg loss: 0.00063789, Global Avg Loss: 0.01448897, Time: 0.0605 Steps: 71000, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001564, Sample Num: 25024, Cur Loss: 0.00171054, Cur Avg Loss: 0.00088637, Log Avg loss: 0.00055147, Global Avg Loss: 0.01444982, Time: 0.0615 Steps: 71200, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001764, Sample Num: 28224, Cur Loss: 0.00030356, Cur Avg Loss: 0.00086935, Log Avg loss: 0.00073628, Global Avg Loss: 0.01441141, Time: 0.0620 Steps: 71400, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001964, Sample Num: 31424, Cur Loss: 0.00007506, Cur Avg Loss: 0.00085069, Log Avg loss: 0.00068614, Global Avg Loss: 0.01437307, Time: 0.0585 Steps: 71600, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002164, Sample Num: 34624, Cur Loss: 0.00268778, Cur Avg Loss: 0.00087064, Log Avg loss: 0.00106653, Global Avg Loss: 0.01433601, Time: 0.1716 Steps: 71800, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002364, Sample Num: 37824, Cur Loss: 0.00171273, Cur Avg Loss: 0.00086290, Log Avg loss: 0.00077909, Global Avg Loss: 0.01429835, Time: 0.0601 Steps: 72000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002564, Sample Num: 41024, Cur Loss: 0.00007836, Cur Avg Loss: 0.00085066, Log Avg loss: 0.00070608, Global Avg Loss: 0.01426070, Time: 0.0596 Steps: 72200, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002764, Sample Num: 44224, Cur Loss: 0.00066043, Cur Avg Loss: 0.00085194, Log Avg loss: 0.00086829, Global Avg Loss: 0.01422370, Time: 0.0627 Steps: 72400, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002964, Sample Num: 47424, Cur Loss: 0.00116610, Cur Avg Loss: 0.00084606, Log Avg loss: 0.00076481, Global Avg Loss: 0.01418662, Time: 0.0937 Steps: 72600, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003164, Sample Num: 50624, Cur Loss: 0.00157114, Cur Avg Loss: 0.00083898, Log Avg loss: 0.00073401, Global Avg Loss: 0.01414967, Time: 0.1166 Steps: 72800, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003364, Sample Num: 53824, Cur Loss: 0.00048505, Cur Avg Loss: 0.00083348, Log Avg loss: 0.00074660, Global Avg Loss: 0.01411295, Time: 0.0619 Steps: 73000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003564, Sample Num: 57024, Cur Loss: 0.00037115, Cur Avg Loss: 0.00083230, Log Avg loss: 0.00081239, Global Avg Loss: 0.01407661, Time: 0.0939 Steps: 73200, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003764, Sample Num: 60224, Cur Loss: 0.00046417, Cur Avg Loss: 0.00083336, Log Avg loss: 0.00085217, Global Avg Loss: 0.01404057, Time: 0.0603 Steps: 73400, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003964, Sample Num: 63424, Cur Loss: 0.00010587, Cur Avg Loss: 0.00083509, Log Avg loss: 0.00086766, Global Avg Loss: 0.01400478, Time: 0.0634 Steps: 73600, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 004164, Sample Num: 66624, Cur Loss: 0.00804771, Cur Avg Loss: 0.00083817, Log Avg loss: 0.00089921, Global Avg Loss: 0.01396926, Time: 0.1188 Steps: 73800, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 004364, Sample Num: 69824, Cur Loss: 0.00024246, Cur Avg Loss: 0.00083551, Log Avg loss: 0.00078009, Global Avg Loss: 0.01393361, Time: 0.0810 Steps: 74000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 004564, Sample Num: 73024, Cur Loss: 0.00031011, Cur Avg Loss: 0.00084033, Log Avg loss: 0.00094557, Global Avg Loss: 0.01389860, Time: 0.1043 Steps: 74200, Updated lr: 0.000070 Training, Epoch: 0015, Batch: 004764, Sample Num: 76224, Cur Loss: 0.00014586, Cur Avg Loss: 0.00083408, Log Avg loss: 0.00069156, Global Avg Loss: 0.01386310, Time: 0.1135 Steps: 74400, Updated lr: 0.000070 Training, Epoch: 0015, Batch: 004964, Sample Num: 79424, Cur Loss: 0.00084412, Cur Avg Loss: 0.00084009, Log Avg loss: 0.00098306, Global Avg Loss: 0.01382857, Time: 0.0641 Steps: 74600, Updated lr: 0.000070 ***** Running evaluation checkpoint-74610 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-74610 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 388.211870, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001513, "eval_total_loss": 1.612684, "eval_acc": 0.99961, "eval_jaccard": 0.985983, "eval_prec": 0.988248, "eval_recall": 0.987441, "eval_f1": 0.987347, "eval_pr_auc": 0.995074, "eval_roc_auc": 0.999315, "eval_fmax": 0.993905, "eval_pmax": 0.99599, "eval_rmax": 0.991828, "eval_tmax": 0.24, "update_flag": true, "test_avg_loss": 0.001553, "test_total_loss": 1.655338, "test_acc": 0.999607, "test_jaccard": 0.984554, "test_prec": 0.986629, "test_recall": 0.986379, "test_f1": 0.986005, "test_pr_auc": 0.994386, "test_roc_auc": 0.999284, "test_fmax": 0.993505, "test_pmax": 0.99639, "test_rmax": 0.990637, "test_tmax": 0.29, "lr": 7.028259991925716e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.013826832241652438, "train_cur_epoch_loss": 4.17874813983417, "train_cur_epoch_avg_loss": 0.0008401182428295475, "train_cur_epoch_time": 388.2118704319, "train_cur_epoch_avg_time": 0.07804822485562928, "epoch": 15, "step": 74610} ################################################## Training, Epoch: 0016, Batch: 000190, Sample Num: 3040, Cur Loss: 0.00044905, Cur Avg Loss: 0.00086809, Log Avg loss: 0.00086751, Global Avg Loss: 0.01379392, Time: 0.1185 Steps: 74800, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000390, Sample Num: 6240, Cur Loss: 0.00435276, Cur Avg Loss: 0.00088297, Log Avg loss: 0.00089710, Global Avg Loss: 0.01375952, Time: 0.1780 Steps: 75000, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000590, Sample Num: 9440, Cur Loss: 0.00015153, Cur Avg Loss: 0.00094991, Log Avg loss: 0.00108046, Global Avg Loss: 0.01372580, Time: 0.0616 Steps: 75200, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000790, Sample Num: 12640, Cur Loss: 0.00015793, Cur Avg Loss: 0.00091801, Log Avg loss: 0.00082388, Global Avg Loss: 0.01369158, Time: 0.0599 Steps: 75400, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000990, Sample Num: 15840, Cur Loss: 0.00009008, Cur Avg Loss: 0.00088078, Log Avg loss: 0.00073374, Global Avg Loss: 0.01365730, Time: 0.0732 Steps: 75600, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001190, Sample Num: 19040, Cur Loss: 0.00106555, Cur Avg Loss: 0.00089684, Log Avg loss: 0.00097632, Global Avg Loss: 0.01362384, Time: 0.1073 Steps: 75800, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001390, Sample Num: 22240, Cur Loss: 0.00063881, Cur Avg Loss: 0.00084239, Log Avg loss: 0.00051841, Global Avg Loss: 0.01358935, Time: 0.1955 Steps: 76000, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001590, Sample Num: 25440, Cur Loss: 0.00114264, Cur Avg Loss: 0.00079695, Log Avg loss: 0.00048117, Global Avg Loss: 0.01355495, Time: 0.0619 Steps: 76200, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001790, Sample Num: 28640, Cur Loss: 0.00000860, Cur Avg Loss: 0.00077895, Log Avg loss: 0.00063583, Global Avg Loss: 0.01352113, Time: 0.1130 Steps: 76400, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001990, Sample Num: 31840, Cur Loss: 0.00040395, Cur Avg Loss: 0.00076757, Log Avg loss: 0.00066571, Global Avg Loss: 0.01348756, Time: 0.0497 Steps: 76600, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002190, Sample Num: 35040, Cur Loss: 0.00243495, Cur Avg Loss: 0.00079026, Log Avg loss: 0.00101602, Global Avg Loss: 0.01345509, Time: 0.0598 Steps: 76800, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002390, Sample Num: 38240, Cur Loss: 0.00017748, Cur Avg Loss: 0.00077092, Log Avg loss: 0.00055910, Global Avg Loss: 0.01342159, Time: 0.0600 Steps: 77000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002590, Sample Num: 41440, Cur Loss: 0.00440730, Cur Avg Loss: 0.00077344, Log Avg loss: 0.00080359, Global Avg Loss: 0.01338890, Time: 0.1769 Steps: 77200, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002790, Sample Num: 44640, Cur Loss: 0.00009894, Cur Avg Loss: 0.00077038, Log Avg loss: 0.00073075, Global Avg Loss: 0.01335619, Time: 0.1456 Steps: 77400, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002990, Sample Num: 47840, Cur Loss: 0.00057939, Cur Avg Loss: 0.00077516, Log Avg loss: 0.00084181, Global Avg Loss: 0.01332394, Time: 0.0605 Steps: 77600, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003190, Sample Num: 51040, Cur Loss: 0.00014997, Cur Avg Loss: 0.00076252, Log Avg loss: 0.00057356, Global Avg Loss: 0.01329116, Time: 0.0595 Steps: 77800, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003390, Sample Num: 54240, Cur Loss: 0.00046856, Cur Avg Loss: 0.00076040, Log Avg loss: 0.00072664, Global Avg Loss: 0.01325895, Time: 0.1013 Steps: 78000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003590, Sample Num: 57440, Cur Loss: 0.00037256, Cur Avg Loss: 0.00075456, Log Avg loss: 0.00065563, Global Avg Loss: 0.01322671, Time: 0.0639 Steps: 78200, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003790, Sample Num: 60640, Cur Loss: 0.00006703, Cur Avg Loss: 0.00075793, Log Avg loss: 0.00081835, Global Avg Loss: 0.01319506, Time: 0.0613 Steps: 78400, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003990, Sample Num: 63840, Cur Loss: 0.00014259, Cur Avg Loss: 0.00075973, Log Avg loss: 0.00079388, Global Avg Loss: 0.01316350, Time: 0.0605 Steps: 78600, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 004190, Sample Num: 67040, Cur Loss: 0.00667726, Cur Avg Loss: 0.00076244, Log Avg loss: 0.00081643, Global Avg Loss: 0.01313216, Time: 0.0484 Steps: 78800, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 004390, Sample Num: 70240, Cur Loss: 0.00006812, Cur Avg Loss: 0.00076215, Log Avg loss: 0.00075606, Global Avg Loss: 0.01310083, Time: 0.1210 Steps: 79000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 004590, Sample Num: 73440, Cur Loss: 0.00014789, Cur Avg Loss: 0.00076586, Log Avg loss: 0.00084733, Global Avg Loss: 0.01306989, Time: 0.0610 Steps: 79200, Updated lr: 0.000068 Training, Epoch: 0016, Batch: 004790, Sample Num: 76640, Cur Loss: 0.00701513, Cur Avg Loss: 0.00076253, Log Avg loss: 0.00068621, Global Avg Loss: 0.01303870, Time: 0.1109 Steps: 79400, Updated lr: 0.000068 ***** Running evaluation checkpoint-79584 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-79584 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 387.141640, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001472, "eval_total_loss": 1.569111, "eval_acc": 0.999631, "eval_jaccard": 0.98735, "eval_prec": 0.989797, "eval_recall": 0.988595, "eval_f1": 0.988688, "eval_pr_auc": 0.995435, "eval_roc_auc": 0.999334, "eval_fmax": 0.994198, "eval_pmax": 0.996405, "eval_rmax": 0.992001, "eval_tmax": 0.23, "update_flag": true, "test_avg_loss": 0.001528, "test_total_loss": 1.62927, "test_acc": 0.999636, "test_jaccard": 0.985921, "test_prec": 0.988316, "test_recall": 0.987721, "test_f1": 0.987466, "test_pr_auc": 0.99441, "test_roc_auc": 0.999293, "test_fmax": 0.993775, "test_pmax": 0.996214, "test_rmax": 0.991348, "test_tmax": 0.26, "lr": 6.827452563584982e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.01301047132269563, "train_cur_epoch_loss": 3.8053961957205047, "train_cur_epoch_avg_loss": 0.0007650575383434871, "train_cur_epoch_time": 387.14163994789124, "train_cur_epoch_avg_time": 0.07783305990106378, "epoch": 16, "step": 79584} ################################################## Training, Epoch: 0017, Batch: 000016, Sample Num: 256, Cur Loss: 0.00005192, Cur Avg Loss: 0.00102725, Log Avg loss: 0.00084649, Global Avg Loss: 0.01300806, Time: 0.1094 Steps: 79600, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000216, Sample Num: 3456, Cur Loss: 0.00008078, Cur Avg Loss: 0.00078786, Log Avg loss: 0.00076871, Global Avg Loss: 0.01297739, Time: 0.0805 Steps: 79800, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000416, Sample Num: 6656, Cur Loss: 0.00009695, Cur Avg Loss: 0.00077115, Log Avg loss: 0.00075309, Global Avg Loss: 0.01294683, Time: 0.0707 Steps: 80000, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000616, Sample Num: 9856, Cur Loss: 0.00016731, Cur Avg Loss: 0.00083280, Log Avg loss: 0.00096104, Global Avg Loss: 0.01291694, Time: 0.0611 Steps: 80200, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000816, Sample Num: 13056, Cur Loss: 0.00118773, Cur Avg Loss: 0.00081366, Log Avg loss: 0.00075473, Global Avg Loss: 0.01288668, Time: 0.1782 Steps: 80400, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001016, Sample Num: 16256, Cur Loss: 0.00008287, Cur Avg Loss: 0.00076401, Log Avg loss: 0.00056142, Global Avg Loss: 0.01285610, Time: 0.0238 Steps: 80600, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001216, Sample Num: 19456, Cur Loss: 0.00009297, Cur Avg Loss: 0.00078755, Log Avg loss: 0.00090712, Global Avg Loss: 0.01282652, Time: 0.1230 Steps: 80800, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001416, Sample Num: 22656, Cur Loss: 0.00000917, Cur Avg Loss: 0.00074192, Log Avg loss: 0.00046451, Global Avg Loss: 0.01279600, Time: 0.0612 Steps: 81000, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001616, Sample Num: 25856, Cur Loss: 0.00007038, Cur Avg Loss: 0.00070762, Log Avg loss: 0.00046475, Global Avg Loss: 0.01276563, Time: 0.0314 Steps: 81200, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001816, Sample Num: 29056, Cur Loss: 0.00101249, Cur Avg Loss: 0.00069288, Log Avg loss: 0.00057383, Global Avg Loss: 0.01273567, Time: 0.1121 Steps: 81400, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 002016, Sample Num: 32256, Cur Loss: 0.00011230, Cur Avg Loss: 0.00068616, Log Avg loss: 0.00062508, Global Avg Loss: 0.01270599, Time: 0.1220 Steps: 81600, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002216, Sample Num: 35456, Cur Loss: 0.00034616, Cur Avg Loss: 0.00070816, Log Avg loss: 0.00092992, Global Avg Loss: 0.01267720, Time: 0.0611 Steps: 81800, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002416, Sample Num: 38656, Cur Loss: 0.00004310, Cur Avg Loss: 0.00069356, Log Avg loss: 0.00053187, Global Avg Loss: 0.01264757, Time: 0.0599 Steps: 82000, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002616, Sample Num: 41856, Cur Loss: 0.00066878, Cur Avg Loss: 0.00069500, Log Avg loss: 0.00071232, Global Avg Loss: 0.01261853, Time: 0.0604 Steps: 82200, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002816, Sample Num: 45056, Cur Loss: 0.00082861, Cur Avg Loss: 0.00068916, Log Avg loss: 0.00061282, Global Avg Loss: 0.01258939, Time: 0.0696 Steps: 82400, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003016, Sample Num: 48256, Cur Loss: 0.00028993, Cur Avg Loss: 0.00069021, Log Avg loss: 0.00070502, Global Avg Loss: 0.01256062, Time: 0.0616 Steps: 82600, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003216, Sample Num: 51456, Cur Loss: 0.00017162, Cur Avg Loss: 0.00067985, Log Avg loss: 0.00052364, Global Avg Loss: 0.01253154, Time: 0.1716 Steps: 82800, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003416, Sample Num: 54656, Cur Loss: 0.00036700, Cur Avg Loss: 0.00067470, Log Avg loss: 0.00059187, Global Avg Loss: 0.01250277, Time: 0.0610 Steps: 83000, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003616, Sample Num: 57856, Cur Loss: 0.00027703, Cur Avg Loss: 0.00066997, Log Avg loss: 0.00058922, Global Avg Loss: 0.01247413, Time: 0.0468 Steps: 83200, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003816, Sample Num: 61056, Cur Loss: 0.00003918, Cur Avg Loss: 0.00067047, Log Avg loss: 0.00067939, Global Avg Loss: 0.01244585, Time: 0.0412 Steps: 83400, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 004016, Sample Num: 64256, Cur Loss: 0.00004476, Cur Avg Loss: 0.00067162, Log Avg loss: 0.00069353, Global Avg Loss: 0.01241773, Time: 0.0595 Steps: 83600, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 004216, Sample Num: 67456, Cur Loss: 0.00042796, Cur Avg Loss: 0.00067586, Log Avg loss: 0.00076107, Global Avg Loss: 0.01238991, Time: 0.0607 Steps: 83800, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 004416, Sample Num: 70656, Cur Loss: 0.00100903, Cur Avg Loss: 0.00067539, Log Avg loss: 0.00066537, Global Avg Loss: 0.01236200, Time: 0.1205 Steps: 84000, Updated lr: 0.000066 Training, Epoch: 0017, Batch: 004616, Sample Num: 73856, Cur Loss: 0.00034647, Cur Avg Loss: 0.00068176, Log Avg loss: 0.00082248, Global Avg Loss: 0.01233459, Time: 0.0606 Steps: 84200, Updated lr: 0.000066 Training, Epoch: 0017, Batch: 004816, Sample Num: 77056, Cur Loss: 0.00381413, Cur Avg Loss: 0.00068559, Log Avg loss: 0.00077397, Global Avg Loss: 0.01230719, Time: 0.1129 Steps: 84400, Updated lr: 0.000066 ***** Running evaluation checkpoint-84558 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-84558 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 393.023288, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001447, "eval_total_loss": 1.542993, "eval_acc": 0.999637, "eval_jaccard": 0.98751, "eval_prec": 0.989762, "eval_recall": 0.988824, "eval_f1": 0.9888, "eval_pr_auc": 0.99527, "eval_roc_auc": 0.999336, "eval_fmax": 0.99419, "eval_pmax": 0.996761, "eval_rmax": 0.991631, "eval_tmax": 0.26, "update_flag": true, "test_avg_loss": 0.001508, "test_total_loss": 1.607964, "test_acc": 0.999636, "test_jaccard": 0.985943, "test_prec": 0.988076, "test_recall": 0.987804, "test_f1": 0.987416, "test_pr_auc": 0.994455, "test_roc_auc": 0.999301, "test_fmax": 0.993727, "test_pmax": 0.99633, "test_rmax": 0.991137, "test_tmax": 0.29, "lr": 6.626645135244247e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.012285302610642212, "train_cur_epoch_loss": 3.3952684052751465, "train_cur_epoch_avg_loss": 0.0006826032177875244, "train_cur_epoch_time": 393.02328848838806, "train_cur_epoch_avg_time": 0.07901553849786652, "epoch": 17, "step": 84558} ################################################## Training, Epoch: 0018, Batch: 000042, Sample Num: 672, Cur Loss: 0.00002442, Cur Avg Loss: 0.00069638, Log Avg loss: 0.00061363, Global Avg Loss: 0.01227955, Time: 0.0619 Steps: 84600, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000242, Sample Num: 3872, Cur Loss: 0.00008878, Cur Avg Loss: 0.00071370, Log Avg loss: 0.00071734, Global Avg Loss: 0.01225228, Time: 0.0614 Steps: 84800, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000442, Sample Num: 7072, Cur Loss: 0.00142054, Cur Avg Loss: 0.00076331, Log Avg loss: 0.00082333, Global Avg Loss: 0.01222539, Time: 0.1225 Steps: 85000, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000642, Sample Num: 10272, Cur Loss: 0.00009788, Cur Avg Loss: 0.00077420, Log Avg loss: 0.00079829, Global Avg Loss: 0.01219856, Time: 0.0692 Steps: 85200, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000842, Sample Num: 13472, Cur Loss: 0.00066936, Cur Avg Loss: 0.00074486, Log Avg loss: 0.00065067, Global Avg Loss: 0.01217152, Time: 0.0251 Steps: 85400, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001042, Sample Num: 16672, Cur Loss: 0.00011013, Cur Avg Loss: 0.00071679, Log Avg loss: 0.00059861, Global Avg Loss: 0.01214448, Time: 0.0595 Steps: 85600, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001242, Sample Num: 19872, Cur Loss: 0.00018808, Cur Avg Loss: 0.00071889, Log Avg loss: 0.00072984, Global Avg Loss: 0.01211787, Time: 0.1099 Steps: 85800, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001442, Sample Num: 23072, Cur Loss: 0.00053338, Cur Avg Loss: 0.00068179, Log Avg loss: 0.00045137, Global Avg Loss: 0.01209074, Time: 0.0687 Steps: 86000, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001642, Sample Num: 26272, Cur Loss: 0.00008563, Cur Avg Loss: 0.00064694, Log Avg loss: 0.00039565, Global Avg Loss: 0.01206361, Time: 0.0832 Steps: 86200, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001842, Sample Num: 29472, Cur Loss: 0.00018713, Cur Avg Loss: 0.00063412, Log Avg loss: 0.00052888, Global Avg Loss: 0.01203691, Time: 0.0594 Steps: 86400, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 002042, Sample Num: 32672, Cur Loss: 0.00106581, Cur Avg Loss: 0.00063202, Log Avg loss: 0.00061270, Global Avg Loss: 0.01201052, Time: 0.0610 Steps: 86600, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002242, Sample Num: 35872, Cur Loss: 0.00001573, Cur Avg Loss: 0.00063472, Log Avg loss: 0.00066232, Global Avg Loss: 0.01198437, Time: 0.0609 Steps: 86800, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002442, Sample Num: 39072, Cur Loss: 0.00010039, Cur Avg Loss: 0.00062654, Log Avg loss: 0.00053488, Global Avg Loss: 0.01195805, Time: 0.0613 Steps: 87000, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002642, Sample Num: 42272, Cur Loss: 0.00022079, Cur Avg Loss: 0.00062518, Log Avg loss: 0.00060855, Global Avg Loss: 0.01193202, Time: 0.0655 Steps: 87200, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002842, Sample Num: 45472, Cur Loss: 0.00088339, Cur Avg Loss: 0.00062200, Log Avg loss: 0.00057998, Global Avg Loss: 0.01190605, Time: 0.0613 Steps: 87400, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003042, Sample Num: 48672, Cur Loss: 0.00010679, Cur Avg Loss: 0.00062909, Log Avg loss: 0.00072978, Global Avg Loss: 0.01188053, Time: 0.0691 Steps: 87600, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003242, Sample Num: 51872, Cur Loss: 0.00045944, Cur Avg Loss: 0.00061945, Log Avg loss: 0.00047288, Global Avg Loss: 0.01185454, Time: 0.0582 Steps: 87800, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003442, Sample Num: 55072, Cur Loss: 0.00006294, Cur Avg Loss: 0.00061583, Log Avg loss: 0.00055704, Global Avg Loss: 0.01182887, Time: 0.0597 Steps: 88000, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003642, Sample Num: 58272, Cur Loss: 0.00088832, Cur Avg Loss: 0.00061904, Log Avg loss: 0.00067442, Global Avg Loss: 0.01180357, Time: 0.0606 Steps: 88200, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003842, Sample Num: 61472, Cur Loss: 0.00085578, Cur Avg Loss: 0.00061661, Log Avg loss: 0.00057235, Global Avg Loss: 0.01177816, Time: 0.0703 Steps: 88400, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 004042, Sample Num: 64672, Cur Loss: 0.00003855, Cur Avg Loss: 0.00062152, Log Avg loss: 0.00071584, Global Avg Loss: 0.01175319, Time: 0.0616 Steps: 88600, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 004242, Sample Num: 67872, Cur Loss: 0.00010441, Cur Avg Loss: 0.00062100, Log Avg loss: 0.00061055, Global Avg Loss: 0.01172810, Time: 0.0606 Steps: 88800, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 004442, Sample Num: 71072, Cur Loss: 0.00027665, Cur Avg Loss: 0.00062115, Log Avg loss: 0.00062426, Global Avg Loss: 0.01170314, Time: 0.0915 Steps: 89000, Updated lr: 0.000064 Training, Epoch: 0018, Batch: 004642, Sample Num: 74272, Cur Loss: 0.00033852, Cur Avg Loss: 0.00062703, Log Avg loss: 0.00075771, Global Avg Loss: 0.01167860, Time: 0.1002 Steps: 89200, Updated lr: 0.000064 Training, Epoch: 0018, Batch: 004842, Sample Num: 77472, Cur Loss: 0.00023520, Cur Avg Loss: 0.00063168, Log Avg loss: 0.00073948, Global Avg Loss: 0.01165413, Time: 0.0666 Steps: 89400, Updated lr: 0.000064 ***** Running evaluation checkpoint-89532 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-89532 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 389.992600, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001433, "eval_total_loss": 1.527449, "eval_acc": 0.999635, "eval_jaccard": 0.987581, "eval_prec": 0.990227, "eval_recall": 0.989013, "eval_f1": 0.989028, "eval_pr_auc": 0.99548, "eval_roc_auc": 0.999353, "eval_fmax": 0.994378, "eval_pmax": 0.997451, "eval_rmax": 0.991324, "eval_tmax": 0.28, "update_flag": true, "test_avg_loss": 0.001491, "test_total_loss": 1.589801, "test_acc": 0.999645, "test_jaccard": 0.986348, "test_prec": 0.988583, "test_recall": 0.988047, "test_f1": 0.987787, "test_pr_auc": 0.994529, "test_roc_auc": 0.999315, "test_fmax": 0.994002, "test_pmax": 0.996726, "test_rmax": 0.991293, "test_tmax": 0.27, "lr": 6.425837706903513e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.011637881832893417, "train_cur_epoch_loss": 3.1422181119291963, "train_cur_epoch_avg_loss": 0.0006317286111638915, "train_cur_epoch_time": 389.99259972572327, "train_cur_epoch_avg_time": 0.07840623235338225, "epoch": 18, "step": 89532} ################################################## Training, Epoch: 0019, Batch: 000068, Sample Num: 1088, Cur Loss: 0.00046601, Cur Avg Loss: 0.00061630, Log Avg loss: 0.00062768, Global Avg Loss: 0.01162952, Time: 0.1133 Steps: 89600, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000268, Sample Num: 4288, Cur Loss: 0.00006618, Cur Avg Loss: 0.00062214, Log Avg loss: 0.00062412, Global Avg Loss: 0.01160501, Time: 0.0603 Steps: 89800, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000468, Sample Num: 7488, Cur Loss: 0.00174334, Cur Avg Loss: 0.00074866, Log Avg loss: 0.00091819, Global Avg Loss: 0.01158126, Time: 0.0606 Steps: 90000, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000668, Sample Num: 10688, Cur Loss: 0.00020873, Cur Avg Loss: 0.00072818, Log Avg loss: 0.00068026, Global Avg Loss: 0.01155709, Time: 0.0615 Steps: 90200, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000868, Sample Num: 13888, Cur Loss: 0.00188602, Cur Avg Loss: 0.00071028, Log Avg loss: 0.00065051, Global Avg Loss: 0.01153296, Time: 0.0825 Steps: 90400, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001068, Sample Num: 17088, Cur Loss: 0.01008805, Cur Avg Loss: 0.00068503, Log Avg loss: 0.00057544, Global Avg Loss: 0.01150877, Time: 0.0480 Steps: 90600, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001268, Sample Num: 20288, Cur Loss: 0.00049808, Cur Avg Loss: 0.00066987, Log Avg loss: 0.00058890, Global Avg Loss: 0.01148472, Time: 0.0595 Steps: 90800, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001468, Sample Num: 23488, Cur Loss: 0.00046395, Cur Avg Loss: 0.00063017, Log Avg loss: 0.00037850, Global Avg Loss: 0.01146031, Time: 0.0790 Steps: 91000, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001668, Sample Num: 26688, Cur Loss: 0.00008520, Cur Avg Loss: 0.00059479, Log Avg loss: 0.00033512, Global Avg Loss: 0.01143591, Time: 0.0263 Steps: 91200, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001868, Sample Num: 29888, Cur Loss: 0.00075357, Cur Avg Loss: 0.00058628, Log Avg loss: 0.00051527, Global Avg Loss: 0.01141201, Time: 0.0618 Steps: 91400, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 002068, Sample Num: 33088, Cur Loss: 0.00018626, Cur Avg Loss: 0.00058811, Log Avg loss: 0.00060522, Global Avg Loss: 0.01138842, Time: 0.1223 Steps: 91600, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002268, Sample Num: 36288, Cur Loss: 0.00006251, Cur Avg Loss: 0.00059733, Log Avg loss: 0.00069265, Global Avg Loss: 0.01136512, Time: 0.0607 Steps: 91800, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002468, Sample Num: 39488, Cur Loss: 0.00010125, Cur Avg Loss: 0.00058663, Log Avg loss: 0.00046534, Global Avg Loss: 0.01134142, Time: 0.0618 Steps: 92000, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002668, Sample Num: 42688, Cur Loss: 0.00181819, Cur Avg Loss: 0.00058847, Log Avg loss: 0.00061112, Global Avg Loss: 0.01131814, Time: 0.0615 Steps: 92200, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002868, Sample Num: 45888, Cur Loss: 0.00056538, Cur Avg Loss: 0.00059134, Log Avg loss: 0.00062965, Global Avg Loss: 0.01129501, Time: 0.0936 Steps: 92400, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003068, Sample Num: 49088, Cur Loss: 0.00006441, Cur Avg Loss: 0.00058714, Log Avg loss: 0.00052692, Global Avg Loss: 0.01127175, Time: 0.1135 Steps: 92600, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003268, Sample Num: 52288, Cur Loss: 0.00005113, Cur Avg Loss: 0.00057841, Log Avg loss: 0.00044449, Global Avg Loss: 0.01124842, Time: 0.1162 Steps: 92800, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003468, Sample Num: 55488, Cur Loss: 0.00078775, Cur Avg Loss: 0.00057649, Log Avg loss: 0.00054511, Global Avg Loss: 0.01122540, Time: 0.0597 Steps: 93000, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003668, Sample Num: 58688, Cur Loss: 0.00143328, Cur Avg Loss: 0.00058063, Log Avg loss: 0.00065249, Global Avg Loss: 0.01120271, Time: 0.0515 Steps: 93200, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003868, Sample Num: 61888, Cur Loss: 0.00060855, Cur Avg Loss: 0.00057468, Log Avg loss: 0.00046549, Global Avg Loss: 0.01117972, Time: 0.1151 Steps: 93400, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 004068, Sample Num: 65088, Cur Loss: 0.00168223, Cur Avg Loss: 0.00057957, Log Avg loss: 0.00067418, Global Avg Loss: 0.01115727, Time: 0.0613 Steps: 93600, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 004268, Sample Num: 68288, Cur Loss: 0.00058474, Cur Avg Loss: 0.00058020, Log Avg loss: 0.00059298, Global Avg Loss: 0.01113475, Time: 0.0589 Steps: 93800, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 004468, Sample Num: 71488, Cur Loss: 0.00008714, Cur Avg Loss: 0.00057792, Log Avg loss: 0.00052925, Global Avg Loss: 0.01111218, Time: 0.1010 Steps: 94000, Updated lr: 0.000062 Training, Epoch: 0019, Batch: 004668, Sample Num: 74688, Cur Loss: 0.00010377, Cur Avg Loss: 0.00058538, Log Avg loss: 0.00075193, Global Avg Loss: 0.01109018, Time: 0.0604 Steps: 94200, Updated lr: 0.000062 Training, Epoch: 0019, Batch: 004868, Sample Num: 77888, Cur Loss: 0.00045766, Cur Avg Loss: 0.00058719, Log Avg loss: 0.00062950, Global Avg Loss: 0.01106802, Time: 0.1175 Steps: 94400, Updated lr: 0.000062 ***** Running evaluation checkpoint-94506 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-94506 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 383.947938, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001411, "eval_total_loss": 1.504006, "eval_acc": 0.999638, "eval_jaccard": 0.987884, "eval_prec": 0.990442, "eval_recall": 0.989129, "eval_f1": 0.989231, "eval_pr_auc": 0.995468, "eval_roc_auc": 0.99936, "eval_fmax": 0.994526, "eval_pmax": 0.996858, "eval_rmax": 0.992205, "eval_tmax": 0.22, "update_flag": true, "test_avg_loss": 0.00148, "test_total_loss": 1.577628, "test_acc": 0.999646, "test_jaccard": 0.986464, "test_prec": 0.988632, "test_recall": 0.988181, "test_f1": 0.987875, "test_pr_auc": 0.994551, "test_roc_auc": 0.999312, "test_fmax": 0.993975, "test_pmax": 0.996598, "test_rmax": 0.991367, "test_tmax": 0.25, "lr": 6.225030278562778e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.011056311228729434, "train_cur_epoch_loss": 2.924912719690724, "train_cur_epoch_avg_loss": 0.0005880403537777893, "train_cur_epoch_time": 383.9479384422302, "train_cur_epoch_avg_time": 0.07719098078854648, "epoch": 19, "step": 94506} ################################################## Training, Epoch: 0020, Batch: 000094, Sample Num: 1504, Cur Loss: 0.00007260, Cur Avg Loss: 0.00057053, Log Avg loss: 0.00060054, Global Avg Loss: 0.01104589, Time: 0.0589 Steps: 94600, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000294, Sample Num: 4704, Cur Loss: 0.00047956, Cur Avg Loss: 0.00060083, Log Avg loss: 0.00061508, Global Avg Loss: 0.01102389, Time: 0.0242 Steps: 94800, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000494, Sample Num: 7904, Cur Loss: 0.00022537, Cur Avg Loss: 0.00071541, Log Avg loss: 0.00088383, Global Avg Loss: 0.01100254, Time: 0.0605 Steps: 95000, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000694, Sample Num: 11104, Cur Loss: 0.01251108, Cur Avg Loss: 0.00068054, Log Avg loss: 0.00059441, Global Avg Loss: 0.01098067, Time: 0.1143 Steps: 95200, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000894, Sample Num: 14304, Cur Loss: 0.00198223, Cur Avg Loss: 0.00065927, Log Avg loss: 0.00058547, Global Avg Loss: 0.01095888, Time: 0.0612 Steps: 95400, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001094, Sample Num: 17504, Cur Loss: 0.00105501, Cur Avg Loss: 0.00064696, Log Avg loss: 0.00059195, Global Avg Loss: 0.01093719, Time: 0.0604 Steps: 95600, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001294, Sample Num: 20704, Cur Loss: 0.00025675, Cur Avg Loss: 0.00060711, Log Avg loss: 0.00038911, Global Avg Loss: 0.01091517, Time: 0.0617 Steps: 95800, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001494, Sample Num: 23904, Cur Loss: 0.00001965, Cur Avg Loss: 0.00057075, Log Avg loss: 0.00033548, Global Avg Loss: 0.01089313, Time: 0.0644 Steps: 96000, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001694, Sample Num: 27104, Cur Loss: 0.00012406, Cur Avg Loss: 0.00054492, Log Avg loss: 0.00035203, Global Avg Loss: 0.01087121, Time: 0.0614 Steps: 96200, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001894, Sample Num: 30304, Cur Loss: 0.00079664, Cur Avg Loss: 0.00052942, Log Avg loss: 0.00039805, Global Avg Loss: 0.01084949, Time: 0.1594 Steps: 96400, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002094, Sample Num: 33504, Cur Loss: 0.00003186, Cur Avg Loss: 0.00053717, Log Avg loss: 0.00061059, Global Avg Loss: 0.01082829, Time: 0.0600 Steps: 96600, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002294, Sample Num: 36704, Cur Loss: 0.00004730, Cur Avg Loss: 0.00054587, Log Avg loss: 0.00063703, Global Avg Loss: 0.01080723, Time: 0.0615 Steps: 96800, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002494, Sample Num: 39904, Cur Loss: 0.00013287, Cur Avg Loss: 0.00053695, Log Avg loss: 0.00043459, Global Avg Loss: 0.01078584, Time: 0.0689 Steps: 97000, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002694, Sample Num: 43104, Cur Loss: 0.00027594, Cur Avg Loss: 0.00053588, Log Avg loss: 0.00052255, Global Avg Loss: 0.01076473, Time: 0.0608 Steps: 97200, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002894, Sample Num: 46304, Cur Loss: 0.00014454, Cur Avg Loss: 0.00054002, Log Avg loss: 0.00059575, Global Avg Loss: 0.01074385, Time: 0.0798 Steps: 97400, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003094, Sample Num: 49504, Cur Loss: 0.00007418, Cur Avg Loss: 0.00053484, Log Avg loss: 0.00045984, Global Avg Loss: 0.01072277, Time: 0.0617 Steps: 97600, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003294, Sample Num: 52704, Cur Loss: 0.00002655, Cur Avg Loss: 0.00053001, Log Avg loss: 0.00045533, Global Avg Loss: 0.01070178, Time: 0.1055 Steps: 97800, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003494, Sample Num: 55904, Cur Loss: 0.00005117, Cur Avg Loss: 0.00052976, Log Avg loss: 0.00052572, Global Avg Loss: 0.01068101, Time: 0.0603 Steps: 98000, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003694, Sample Num: 59104, Cur Loss: 0.00041537, Cur Avg Loss: 0.00053357, Log Avg loss: 0.00060001, Global Avg Loss: 0.01066048, Time: 0.0605 Steps: 98200, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003894, Sample Num: 62304, Cur Loss: 0.00009475, Cur Avg Loss: 0.00052672, Log Avg loss: 0.00040026, Global Avg Loss: 0.01063962, Time: 0.1161 Steps: 98400, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 004094, Sample Num: 65504, Cur Loss: 0.00009409, Cur Avg Loss: 0.00052965, Log Avg loss: 0.00058668, Global Avg Loss: 0.01061923, Time: 0.0653 Steps: 98600, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 004294, Sample Num: 68704, Cur Loss: 0.00216800, Cur Avg Loss: 0.00052971, Log Avg loss: 0.00053095, Global Avg Loss: 0.01059881, Time: 0.1438 Steps: 98800, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 004494, Sample Num: 71904, Cur Loss: 0.00015591, Cur Avg Loss: 0.00052962, Log Avg loss: 0.00052762, Global Avg Loss: 0.01057846, Time: 0.0485 Steps: 99000, Updated lr: 0.000060 Training, Epoch: 0020, Batch: 004694, Sample Num: 75104, Cur Loss: 0.00005032, Cur Avg Loss: 0.00053318, Log Avg loss: 0.00061328, Global Avg Loss: 0.01055837, Time: 0.0578 Steps: 99200, Updated lr: 0.000060 Training, Epoch: 0020, Batch: 004894, Sample Num: 78304, Cur Loss: 0.00043705, Cur Avg Loss: 0.00053755, Log Avg loss: 0.00064003, Global Avg Loss: 0.01053842, Time: 0.0748 Steps: 99400, Updated lr: 0.000060 ***** Running evaluation checkpoint-99480 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-99480 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 387.750691, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001396, "eval_total_loss": 1.488253, "eval_acc": 0.999661, "eval_jaccard": 0.988376, "eval_prec": 0.990661, "eval_recall": 0.989794, "eval_f1": 0.989715, "eval_pr_auc": 0.995552, "eval_roc_auc": 0.999369, "eval_fmax": 0.994539, "eval_pmax": 0.997303, "eval_rmax": 0.991791, "eval_tmax": 0.27, "update_flag": true, "test_avg_loss": 0.001471, "test_total_loss": 1.567874, "test_acc": 0.999652, "test_jaccard": 0.98688, "test_prec": 0.988907, "test_recall": 0.988727, "test_f1": 0.988285, "test_pr_auc": 0.994752, "test_roc_auc": 0.99932, "test_fmax": 0.994162, "test_pmax": 0.99682, "test_rmax": 0.991518, "test_tmax": 0.26, "lr": 6.024222850222043e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.010530411157436944, "train_cur_epoch_loss": 2.6775529595231546, "train_cur_epoch_avg_loss": 0.0005383098028796049, "train_cur_epoch_time": 387.75069093704224, "train_cur_epoch_avg_time": 0.07795550682288746, "epoch": 20, "step": 99480} ################################################## Training, Epoch: 0021, Batch: 000120, Sample Num: 1920, Cur Loss: 0.00016184, Cur Avg Loss: 0.00060787, Log Avg loss: 0.00059869, Global Avg Loss: 0.01051846, Time: 0.1156 Steps: 99600, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000320, Sample Num: 5120, Cur Loss: 0.00010420, Cur Avg Loss: 0.00058743, Log Avg loss: 0.00057516, Global Avg Loss: 0.01049853, Time: 0.0687 Steps: 99800, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00003786, Cur Avg Loss: 0.00065699, Log Avg loss: 0.00076829, Global Avg Loss: 0.01047907, Time: 0.1173 Steps: 100000, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000720, Sample Num: 11520, Cur Loss: 0.00002403, Cur Avg Loss: 0.00062980, Log Avg loss: 0.00055911, Global Avg Loss: 0.01045927, Time: 0.0630 Steps: 100200, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000920, Sample Num: 14720, Cur Loss: 0.00004034, Cur Avg Loss: 0.00060481, Log Avg loss: 0.00051484, Global Avg Loss: 0.01043946, Time: 0.0272 Steps: 100400, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001120, Sample Num: 17920, Cur Loss: 0.00004155, Cur Avg Loss: 0.00058778, Log Avg loss: 0.00050943, Global Avg Loss: 0.01041972, Time: 0.0603 Steps: 100600, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001320, Sample Num: 21120, Cur Loss: 0.00073073, Cur Avg Loss: 0.00055388, Log Avg loss: 0.00036404, Global Avg Loss: 0.01039977, Time: 0.0923 Steps: 100800, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00004772, Cur Avg Loss: 0.00052093, Log Avg loss: 0.00030349, Global Avg Loss: 0.01037977, Time: 0.0601 Steps: 101000, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001720, Sample Num: 27520, Cur Loss: 0.00059901, Cur Avg Loss: 0.00050736, Log Avg loss: 0.00040421, Global Avg Loss: 0.01036006, Time: 0.0612 Steps: 101200, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00002889, Cur Avg Loss: 0.00048881, Log Avg loss: 0.00032925, Global Avg Loss: 0.01034027, Time: 0.0687 Steps: 101400, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002120, Sample Num: 33920, Cur Loss: 0.00126716, Cur Avg Loss: 0.00050393, Log Avg loss: 0.00064914, Global Avg Loss: 0.01032120, Time: 0.0607 Steps: 101600, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00001588, Cur Avg Loss: 0.00050698, Log Avg loss: 0.00053933, Global Avg Loss: 0.01030198, Time: 0.0610 Steps: 101800, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002520, Sample Num: 40320, Cur Loss: 0.00003975, Cur Avg Loss: 0.00049919, Log Avg loss: 0.00040880, Global Avg Loss: 0.01028258, Time: 0.0943 Steps: 102000, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00001987, Cur Avg Loss: 0.00049829, Log Avg loss: 0.00048694, Global Avg Loss: 0.01026341, Time: 0.1094 Steps: 102200, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002920, Sample Num: 46720, Cur Loss: 0.00003864, Cur Avg Loss: 0.00049721, Log Avg loss: 0.00048258, Global Avg Loss: 0.01024431, Time: 0.0601 Steps: 102400, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003120, Sample Num: 49920, Cur Loss: 0.00136592, Cur Avg Loss: 0.00049534, Log Avg loss: 0.00046803, Global Avg Loss: 0.01022525, Time: 0.0635 Steps: 102600, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003320, Sample Num: 53120, Cur Loss: 0.00088475, Cur Avg Loss: 0.00048926, Log Avg loss: 0.00039430, Global Avg Loss: 0.01020612, Time: 0.1211 Steps: 102800, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00002962, Cur Avg Loss: 0.00048910, Log Avg loss: 0.00048654, Global Avg Loss: 0.01018725, Time: 0.0687 Steps: 103000, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003720, Sample Num: 59520, Cur Loss: 0.00001489, Cur Avg Loss: 0.00048920, Log Avg loss: 0.00049100, Global Avg Loss: 0.01016846, Time: 0.1130 Steps: 103200, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003920, Sample Num: 62720, Cur Loss: 0.00034932, Cur Avg Loss: 0.00048338, Log Avg loss: 0.00037508, Global Avg Loss: 0.01014952, Time: 0.1146 Steps: 103400, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00018964, Cur Avg Loss: 0.00048755, Log Avg loss: 0.00056923, Global Avg Loss: 0.01013102, Time: 0.0603 Steps: 103600, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 004320, Sample Num: 69120, Cur Loss: 0.00001055, Cur Avg Loss: 0.00048730, Log Avg loss: 0.00048220, Global Avg Loss: 0.01011243, Time: 0.0686 Steps: 103800, Updated lr: 0.000058 Training, Epoch: 0021, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00050312, Cur Avg Loss: 0.00048857, Log Avg loss: 0.00051588, Global Avg Loss: 0.01009398, Time: 0.1896 Steps: 104000, Updated lr: 0.000058 Training, Epoch: 0021, Batch: 004720, Sample Num: 75520, Cur Loss: 0.00000895, Cur Avg Loss: 0.00049035, Log Avg loss: 0.00053066, Global Avg Loss: 0.01007562, Time: 0.1197 Steps: 104200, Updated lr: 0.000058 Training, Epoch: 0021, Batch: 004920, Sample Num: 78720, Cur Loss: 0.00004056, Cur Avg Loss: 0.00049267, Log Avg loss: 0.00054736, Global Avg Loss: 0.01005737, Time: 0.0678 Steps: 104400, Updated lr: 0.000058 ***** Running evaluation checkpoint-104454 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-104454 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 373.142107, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001404, "eval_total_loss": 1.497071, "eval_acc": 0.999654, "eval_jaccard": 0.988198, "eval_prec": 0.990593, "eval_recall": 0.989625, "eval_f1": 0.989533, "eval_pr_auc": 0.995538, "eval_roc_auc": 0.999371, "eval_fmax": 0.994556, "eval_pmax": 0.997332, "eval_rmax": 0.991796, "eval_tmax": 0.27, "update_flag": false, "test_avg_loss": 0.001479, "test_total_loss": 1.576646, "test_acc": 0.999663, "test_jaccard": 0.987108, "test_prec": 0.989029, "test_recall": 0.988721, "test_f1": 0.988398, "test_pr_auc": 0.994914, "test_roc_auc": 0.999322, "test_fmax": 0.99401, "test_pmax": 0.996049, "test_rmax": 0.991979, "test_tmax": 0.22, "lr": 5.8234154218813086e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.010052516885705222, "train_cur_epoch_loss": 2.4602968376261742, "train_cur_epoch_avg_loss": 0.000494631451070803, "train_cur_epoch_time": 373.1421067714691, "train_cur_epoch_avg_time": 0.0750185176460533, "epoch": 21, "step": 104454} ################################################## Training, Epoch: 0022, Batch: 000146, Sample Num: 2336, Cur Loss: 0.00005112, Cur Avg Loss: 0.00050486, Log Avg loss: 0.00055043, Global Avg Loss: 0.01003919, Time: 0.0605 Steps: 104600, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000346, Sample Num: 5536, Cur Loss: 0.00046884, Cur Avg Loss: 0.00050743, Log Avg loss: 0.00050931, Global Avg Loss: 0.01002100, Time: 0.0615 Steps: 104800, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000546, Sample Num: 8736, Cur Loss: 0.00008659, Cur Avg Loss: 0.00057449, Log Avg loss: 0.00069049, Global Avg Loss: 0.01000323, Time: 0.0689 Steps: 105000, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000746, Sample Num: 11936, Cur Loss: 0.00063175, Cur Avg Loss: 0.00056720, Log Avg loss: 0.00054730, Global Avg Loss: 0.00998525, Time: 0.0630 Steps: 105200, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000946, Sample Num: 15136, Cur Loss: 0.00023810, Cur Avg Loss: 0.00054120, Log Avg loss: 0.00044423, Global Avg Loss: 0.00996715, Time: 0.1186 Steps: 105400, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001146, Sample Num: 18336, Cur Loss: 0.00024246, Cur Avg Loss: 0.00053057, Log Avg loss: 0.00048026, Global Avg Loss: 0.00994918, Time: 0.0614 Steps: 105600, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001346, Sample Num: 21536, Cur Loss: 0.00007169, Cur Avg Loss: 0.00050001, Log Avg loss: 0.00032494, Global Avg Loss: 0.00993099, Time: 0.0760 Steps: 105800, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001546, Sample Num: 24736, Cur Loss: 0.00177069, Cur Avg Loss: 0.00047212, Log Avg loss: 0.00028443, Global Avg Loss: 0.00991279, Time: 0.0592 Steps: 106000, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001746, Sample Num: 27936, Cur Loss: 0.00019131, Cur Avg Loss: 0.00046389, Log Avg loss: 0.00040023, Global Avg Loss: 0.00989487, Time: 0.0612 Steps: 106200, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001946, Sample Num: 31136, Cur Loss: 0.00000577, Cur Avg Loss: 0.00044486, Log Avg loss: 0.00027878, Global Avg Loss: 0.00987680, Time: 0.1181 Steps: 106400, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002146, Sample Num: 34336, Cur Loss: 0.00003409, Cur Avg Loss: 0.00047013, Log Avg loss: 0.00071602, Global Avg Loss: 0.00985961, Time: 0.0635 Steps: 106600, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002346, Sample Num: 37536, Cur Loss: 0.00123149, Cur Avg Loss: 0.00047105, Log Avg loss: 0.00048092, Global Avg Loss: 0.00984205, Time: 0.1331 Steps: 106800, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002546, Sample Num: 40736, Cur Loss: 0.00300530, Cur Avg Loss: 0.00046159, Log Avg loss: 0.00035054, Global Avg Loss: 0.00982431, Time: 0.0600 Steps: 107000, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002746, Sample Num: 43936, Cur Loss: 0.00030934, Cur Avg Loss: 0.00046135, Log Avg loss: 0.00045832, Global Avg Loss: 0.00980683, Time: 0.1021 Steps: 107200, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002946, Sample Num: 47136, Cur Loss: 0.00429864, Cur Avg Loss: 0.00046222, Log Avg loss: 0.00047424, Global Avg Loss: 0.00978945, Time: 0.0719 Steps: 107400, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003146, Sample Num: 50336, Cur Loss: 0.00001304, Cur Avg Loss: 0.00046084, Log Avg loss: 0.00044044, Global Avg Loss: 0.00977208, Time: 0.0599 Steps: 107600, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003346, Sample Num: 53536, Cur Loss: 0.00257562, Cur Avg Loss: 0.00045798, Log Avg loss: 0.00041305, Global Avg Loss: 0.00975471, Time: 0.1183 Steps: 107800, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003546, Sample Num: 56736, Cur Loss: 0.00001826, Cur Avg Loss: 0.00045712, Log Avg loss: 0.00044274, Global Avg Loss: 0.00973747, Time: 0.0602 Steps: 108000, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003746, Sample Num: 59936, Cur Loss: 0.00003399, Cur Avg Loss: 0.00045768, Log Avg loss: 0.00046748, Global Avg Loss: 0.00972033, Time: 0.0684 Steps: 108200, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003946, Sample Num: 63136, Cur Loss: 0.00181636, Cur Avg Loss: 0.00045556, Log Avg loss: 0.00041601, Global Avg Loss: 0.00970317, Time: 0.0605 Steps: 108400, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 004146, Sample Num: 66336, Cur Loss: 0.00014021, Cur Avg Loss: 0.00045848, Log Avg loss: 0.00051600, Global Avg Loss: 0.00968625, Time: 0.1194 Steps: 108600, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 004346, Sample Num: 69536, Cur Loss: 0.00001874, Cur Avg Loss: 0.00045754, Log Avg loss: 0.00043800, Global Avg Loss: 0.00966925, Time: 0.0579 Steps: 108800, Updated lr: 0.000056 Training, Epoch: 0022, Batch: 004546, Sample Num: 72736, Cur Loss: 0.00020429, Cur Avg Loss: 0.00046343, Log Avg loss: 0.00059141, Global Avg Loss: 0.00965259, Time: 0.0601 Steps: 109000, Updated lr: 0.000056 Training, Epoch: 0022, Batch: 004746, Sample Num: 75936, Cur Loss: 0.00002136, Cur Avg Loss: 0.00046148, Log Avg loss: 0.00041718, Global Avg Loss: 0.00963568, Time: 0.1165 Steps: 109200, Updated lr: 0.000056 Training, Epoch: 0022, Batch: 004946, Sample Num: 79136, Cur Loss: 0.00024222, Cur Avg Loss: 0.00046526, Log Avg loss: 0.00055494, Global Avg Loss: 0.00961907, Time: 0.1115 Steps: 109400, Updated lr: 0.000056 ***** Running evaluation checkpoint-109428 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-109428 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 395.119971, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001395, "eval_total_loss": 1.487299, "eval_acc": 0.999659, "eval_jaccard": 0.988358, "eval_prec": 0.990611, "eval_recall": 0.989871, "eval_f1": 0.9897, "eval_pr_auc": 0.995444, "eval_roc_auc": 0.999367, "eval_fmax": 0.994699, "eval_pmax": 0.996511, "eval_rmax": 0.992893, "eval_tmax": 0.2, "update_flag": false, "test_avg_loss": 0.001463, "test_total_loss": 1.559455, "test_acc": 0.999664, "test_jaccard": 0.987253, "test_prec": 0.989077, "test_recall": 0.989044, "test_f1": 0.988558, "test_pr_auc": 0.994908, "test_roc_auc": 0.999332, "test_fmax": 0.994303, "test_pmax": 0.996857, "test_rmax": 0.991762, "test_tmax": 0.26, "lr": 5.622607993540574e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.009616735819178601, "train_cur_epoch_loss": 2.3145684416227823, "train_cur_epoch_avg_loss": 0.0004653334221195783, "train_cur_epoch_time": 395.11997079849243, "train_cur_epoch_avg_time": 0.0794370669076181, "epoch": 22, "step": 109428} ################################################## Training, Epoch: 0023, Batch: 000172, Sample Num: 2752, Cur Loss: 0.00019687, Cur Avg Loss: 0.00054325, Log Avg loss: 0.00053422, Global Avg Loss: 0.00960250, Time: 0.0611 Steps: 109600, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000372, Sample Num: 5952, Cur Loss: 0.00008636, Cur Avg Loss: 0.00050991, Log Avg loss: 0.00048124, Global Avg Loss: 0.00958588, Time: 0.0609 Steps: 109800, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000572, Sample Num: 9152, Cur Loss: 0.00034193, Cur Avg Loss: 0.00056042, Log Avg loss: 0.00065437, Global Avg Loss: 0.00956964, Time: 0.1374 Steps: 110000, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000772, Sample Num: 12352, Cur Loss: 0.00007783, Cur Avg Loss: 0.00055390, Log Avg loss: 0.00053527, Global Avg Loss: 0.00955325, Time: 0.0598 Steps: 110200, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000972, Sample Num: 15552, Cur Loss: 0.00003217, Cur Avg Loss: 0.00051998, Log Avg loss: 0.00038905, Global Avg Loss: 0.00953664, Time: 0.0463 Steps: 110400, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001172, Sample Num: 18752, Cur Loss: 0.00022691, Cur Avg Loss: 0.00050486, Log Avg loss: 0.00043134, Global Avg Loss: 0.00952018, Time: 0.0611 Steps: 110600, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001372, Sample Num: 21952, Cur Loss: 0.00006088, Cur Avg Loss: 0.00047411, Log Avg loss: 0.00029394, Global Avg Loss: 0.00950353, Time: 0.0600 Steps: 110800, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001572, Sample Num: 25152, Cur Loss: 0.00001200, Cur Avg Loss: 0.00044324, Log Avg loss: 0.00023149, Global Avg Loss: 0.00948682, Time: 0.0771 Steps: 111000, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001772, Sample Num: 28352, Cur Loss: 0.00001988, Cur Avg Loss: 0.00043239, Log Avg loss: 0.00034712, Global Avg Loss: 0.00947038, Time: 0.0642 Steps: 111200, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001972, Sample Num: 31552, Cur Loss: 0.00017419, Cur Avg Loss: 0.00041995, Log Avg loss: 0.00030970, Global Avg Loss: 0.00945393, Time: 0.0616 Steps: 111400, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002172, Sample Num: 34752, Cur Loss: 0.00046581, Cur Avg Loss: 0.00043372, Log Avg loss: 0.00056950, Global Avg Loss: 0.00943801, Time: 0.0608 Steps: 111600, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002372, Sample Num: 37952, Cur Loss: 0.00005983, Cur Avg Loss: 0.00043711, Log Avg loss: 0.00047391, Global Avg Loss: 0.00942198, Time: 0.0328 Steps: 111800, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002572, Sample Num: 41152, Cur Loss: 0.00003772, Cur Avg Loss: 0.00042580, Log Avg loss: 0.00029164, Global Avg Loss: 0.00940567, Time: 0.1697 Steps: 112000, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002772, Sample Num: 44352, Cur Loss: 0.00017987, Cur Avg Loss: 0.00043315, Log Avg loss: 0.00052767, Global Avg Loss: 0.00938985, Time: 0.1424 Steps: 112200, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002972, Sample Num: 47552, Cur Loss: 0.00001783, Cur Avg Loss: 0.00043489, Log Avg loss: 0.00045905, Global Avg Loss: 0.00937396, Time: 0.0650 Steps: 112400, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003172, Sample Num: 50752, Cur Loss: 0.00016360, Cur Avg Loss: 0.00043514, Log Avg loss: 0.00043879, Global Avg Loss: 0.00935809, Time: 0.0610 Steps: 112600, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003372, Sample Num: 53952, Cur Loss: 0.00043080, Cur Avg Loss: 0.00042986, Log Avg loss: 0.00034614, Global Avg Loss: 0.00934211, Time: 0.0603 Steps: 112800, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003572, Sample Num: 57152, Cur Loss: 0.00001638, Cur Avg Loss: 0.00043104, Log Avg loss: 0.00045093, Global Avg Loss: 0.00932637, Time: 0.0594 Steps: 113000, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003772, Sample Num: 60352, Cur Loss: 0.00006681, Cur Avg Loss: 0.00042935, Log Avg loss: 0.00039928, Global Avg Loss: 0.00931060, Time: 0.1167 Steps: 113200, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003972, Sample Num: 63552, Cur Loss: 0.00002217, Cur Avg Loss: 0.00043002, Log Avg loss: 0.00044253, Global Avg Loss: 0.00929496, Time: 0.0616 Steps: 113400, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 004172, Sample Num: 66752, Cur Loss: 0.00006614, Cur Avg Loss: 0.00042923, Log Avg loss: 0.00041362, Global Avg Loss: 0.00927932, Time: 0.0615 Steps: 113600, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 004372, Sample Num: 69952, Cur Loss: 0.00001177, Cur Avg Loss: 0.00042901, Log Avg loss: 0.00042448, Global Avg Loss: 0.00926376, Time: 0.0763 Steps: 113800, Updated lr: 0.000054 Training, Epoch: 0023, Batch: 004572, Sample Num: 73152, Cur Loss: 0.00510280, Cur Avg Loss: 0.00043551, Log Avg loss: 0.00057757, Global Avg Loss: 0.00924852, Time: 0.0550 Steps: 114000, Updated lr: 0.000054 Training, Epoch: 0023, Batch: 004772, Sample Num: 76352, Cur Loss: 0.00241079, Cur Avg Loss: 0.00043306, Log Avg loss: 0.00037689, Global Avg Loss: 0.00923298, Time: 0.0612 Steps: 114200, Updated lr: 0.000054 Training, Epoch: 0023, Batch: 004972, Sample Num: 79552, Cur Loss: 0.00012898, Cur Avg Loss: 0.00043906, Log Avg loss: 0.00058222, Global Avg Loss: 0.00921786, Time: 0.0602 Steps: 114400, Updated lr: 0.000054 ***** Running evaluation checkpoint-114402 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-114402 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 378.642743, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001385, "eval_total_loss": 1.476139, "eval_acc": 0.999664, "eval_jaccard": 0.988375, "eval_prec": 0.990701, "eval_recall": 0.989792, "eval_f1": 0.989689, "eval_pr_auc": 0.995546, "eval_roc_auc": 0.999386, "eval_fmax": 0.994826, "eval_pmax": 0.996682, "eval_rmax": 0.992977, "eval_tmax": 0.18, "update_flag": false, "test_avg_loss": 0.001464, "test_total_loss": 1.560905, "test_acc": 0.999669, "test_jaccard": 0.987187, "test_prec": 0.98904, "test_recall": 0.988977, "test_f1": 0.988496, "test_pr_auc": 0.994878, "test_roc_auc": 0.999322, "test_fmax": 0.994187, "test_pmax": 0.995965, "test_rmax": 0.992416, "test_tmax": 0.18, "lr": 5.421800565199838e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.009217699263686766, "train_cur_epoch_loss": 2.183063943217803, "train_cur_epoch_avg_loss": 0.00043889504286646625, "train_cur_epoch_time": 378.64274287223816, "train_cur_epoch_avg_time": 0.0761243954306872, "epoch": 23, "step": 114402} ################################################## Training, Epoch: 0024, Batch: 000198, Sample Num: 3168, Cur Loss: 0.00005290, Cur Avg Loss: 0.00049703, Log Avg loss: 0.00049245, Global Avg Loss: 0.00920263, Time: 0.0660 Steps: 114600, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000398, Sample Num: 6368, Cur Loss: 0.00018848, Cur Avg Loss: 0.00047767, Log Avg loss: 0.00045850, Global Avg Loss: 0.00918740, Time: 0.0610 Steps: 114800, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000598, Sample Num: 9568, Cur Loss: 0.00030375, Cur Avg Loss: 0.00052263, Log Avg loss: 0.00061211, Global Avg Loss: 0.00917248, Time: 0.0817 Steps: 115000, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000798, Sample Num: 12768, Cur Loss: 0.00013574, Cur Avg Loss: 0.00050345, Log Avg loss: 0.00044611, Global Avg Loss: 0.00915733, Time: 0.0215 Steps: 115200, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000998, Sample Num: 15968, Cur Loss: 0.00008345, Cur Avg Loss: 0.00046719, Log Avg loss: 0.00032251, Global Avg Loss: 0.00914202, Time: 0.0616 Steps: 115400, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001198, Sample Num: 19168, Cur Loss: 0.00028748, Cur Avg Loss: 0.00046149, Log Avg loss: 0.00043306, Global Avg Loss: 0.00912696, Time: 0.0985 Steps: 115600, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001398, Sample Num: 22368, Cur Loss: 0.00001117, Cur Avg Loss: 0.00043071, Log Avg loss: 0.00024630, Global Avg Loss: 0.00911162, Time: 0.0614 Steps: 115800, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001598, Sample Num: 25568, Cur Loss: 0.00001288, Cur Avg Loss: 0.00040188, Log Avg loss: 0.00020035, Global Avg Loss: 0.00909625, Time: 0.0611 Steps: 116000, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001798, Sample Num: 28768, Cur Loss: 0.00019347, Cur Avg Loss: 0.00039203, Log Avg loss: 0.00031337, Global Avg Loss: 0.00908114, Time: 0.1213 Steps: 116200, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 001998, Sample Num: 31968, Cur Loss: 0.00001260, Cur Avg Loss: 0.00038679, Log Avg loss: 0.00033964, Global Avg Loss: 0.00906612, Time: 0.0628 Steps: 116400, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002198, Sample Num: 35168, Cur Loss: 0.00004117, Cur Avg Loss: 0.00041018, Log Avg loss: 0.00064387, Global Avg Loss: 0.00905167, Time: 0.0639 Steps: 116600, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002398, Sample Num: 38368, Cur Loss: 0.00301089, Cur Avg Loss: 0.00040577, Log Avg loss: 0.00035728, Global Avg Loss: 0.00903678, Time: 0.0248 Steps: 116800, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002598, Sample Num: 41568, Cur Loss: 0.00001774, Cur Avg Loss: 0.00040448, Log Avg loss: 0.00038904, Global Avg Loss: 0.00902200, Time: 0.0625 Steps: 117000, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002798, Sample Num: 44768, Cur Loss: 0.00001175, Cur Avg Loss: 0.00040879, Log Avg loss: 0.00046479, Global Avg Loss: 0.00900740, Time: 0.0228 Steps: 117200, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002998, Sample Num: 47968, Cur Loss: 0.00013370, Cur Avg Loss: 0.00041322, Log Avg loss: 0.00047521, Global Avg Loss: 0.00899286, Time: 0.0613 Steps: 117400, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003198, Sample Num: 51168, Cur Loss: 0.00000605, Cur Avg Loss: 0.00041051, Log Avg loss: 0.00036982, Global Avg Loss: 0.00897820, Time: 0.0739 Steps: 117600, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003398, Sample Num: 54368, Cur Loss: 0.00024587, Cur Avg Loss: 0.00040554, Log Avg loss: 0.00032619, Global Avg Loss: 0.00896351, Time: 0.1714 Steps: 117800, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003598, Sample Num: 57568, Cur Loss: 0.00001418, Cur Avg Loss: 0.00040502, Log Avg loss: 0.00039610, Global Avg Loss: 0.00894899, Time: 0.0615 Steps: 118000, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003798, Sample Num: 60768, Cur Loss: 0.00004527, Cur Avg Loss: 0.00040421, Log Avg loss: 0.00038967, Global Avg Loss: 0.00893450, Time: 0.0612 Steps: 118200, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003998, Sample Num: 63968, Cur Loss: 0.00002599, Cur Avg Loss: 0.00040609, Log Avg loss: 0.00044186, Global Avg Loss: 0.00892016, Time: 0.0615 Steps: 118400, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 004198, Sample Num: 67168, Cur Loss: 0.00012864, Cur Avg Loss: 0.00040711, Log Avg loss: 0.00042740, Global Avg Loss: 0.00890584, Time: 0.0603 Steps: 118600, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 004398, Sample Num: 70368, Cur Loss: 0.00262380, Cur Avg Loss: 0.00040608, Log Avg loss: 0.00038436, Global Avg Loss: 0.00889149, Time: 0.0608 Steps: 118800, Updated lr: 0.000052 Training, Epoch: 0024, Batch: 004598, Sample Num: 73568, Cur Loss: 0.00049581, Cur Avg Loss: 0.00041078, Log Avg loss: 0.00051412, Global Avg Loss: 0.00887741, Time: 0.2026 Steps: 119000, Updated lr: 0.000052 Training, Epoch: 0024, Batch: 004798, Sample Num: 76768, Cur Loss: 0.00828678, Cur Avg Loss: 0.00041403, Log Avg loss: 0.00048878, Global Avg Loss: 0.00886334, Time: 0.2160 Steps: 119200, Updated lr: 0.000052 ***** Running evaluation checkpoint-119376 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-119376 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 381.631589, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001415, "eval_total_loss": 1.508492, "eval_acc": 0.999664, "eval_jaccard": 0.988491, "eval_prec": 0.990822, "eval_recall": 0.990002, "eval_f1": 0.989824, "eval_pr_auc": 0.995554, "eval_roc_auc": 0.999381, "eval_fmax": 0.994555, "eval_pmax": 0.997439, "eval_rmax": 0.991688, "eval_tmax": 0.28, "update_flag": true, "test_avg_loss": 0.001478, "test_total_loss": 1.575371, "test_acc": 0.999677, "test_jaccard": 0.987605, "test_prec": 0.989472, "test_recall": 0.989222, "test_f1": 0.988866, "test_pr_auc": 0.994983, "test_roc_auc": 0.999325, "test_fmax": 0.994229, "test_pmax": 0.996894, "test_rmax": 0.991578, "test_tmax": 0.25, "lr": 5.220993136859104e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.008850878786902976, "train_cur_epoch_loss": 2.05927490103619, "train_cur_epoch_avg_loss": 0.0004140078208757921, "train_cur_epoch_time": 381.6315894126892, "train_cur_epoch_avg_time": 0.07672528938735207, "epoch": 24, "step": 119376} ################################################## Training, Epoch: 0025, Batch: 000024, Sample Num: 384, Cur Loss: 0.00001149, Cur Avg Loss: 0.00038584, Log Avg loss: 0.00041018, Global Avg Loss: 0.00884918, Time: 0.0697 Steps: 119400, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000224, Sample Num: 3584, Cur Loss: 0.00003780, Cur Avg Loss: 0.00042343, Log Avg loss: 0.00042794, Global Avg Loss: 0.00883509, Time: 0.1233 Steps: 119600, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000424, Sample Num: 6784, Cur Loss: 0.00004505, Cur Avg Loss: 0.00042415, Log Avg loss: 0.00042496, Global Avg Loss: 0.00882105, Time: 0.0602 Steps: 119800, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000624, Sample Num: 9984, Cur Loss: 0.00022798, Cur Avg Loss: 0.00048973, Log Avg loss: 0.00062876, Global Avg Loss: 0.00880740, Time: 0.0619 Steps: 120000, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000824, Sample Num: 13184, Cur Loss: 0.00206071, Cur Avg Loss: 0.00048536, Log Avg loss: 0.00047173, Global Avg Loss: 0.00879353, Time: 0.1669 Steps: 120200, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001024, Sample Num: 16384, Cur Loss: 0.00001931, Cur Avg Loss: 0.00045608, Log Avg loss: 0.00033545, Global Avg Loss: 0.00877948, Time: 0.0603 Steps: 120400, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001224, Sample Num: 19584, Cur Loss: 0.00053672, Cur Avg Loss: 0.00043790, Log Avg loss: 0.00034482, Global Avg Loss: 0.00876549, Time: 0.0685 Steps: 120600, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001424, Sample Num: 22784, Cur Loss: 0.00005256, Cur Avg Loss: 0.00040890, Log Avg loss: 0.00023142, Global Avg Loss: 0.00875136, Time: 0.0610 Steps: 120800, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001624, Sample Num: 25984, Cur Loss: 0.00001125, Cur Avg Loss: 0.00038476, Log Avg loss: 0.00021290, Global Avg Loss: 0.00873725, Time: 0.0610 Steps: 121000, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001824, Sample Num: 29184, Cur Loss: 0.00017531, Cur Avg Loss: 0.00037626, Log Avg loss: 0.00030726, Global Avg Loss: 0.00872334, Time: 0.0553 Steps: 121200, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002024, Sample Num: 32384, Cur Loss: 0.00004795, Cur Avg Loss: 0.00037145, Log Avg loss: 0.00032759, Global Avg Loss: 0.00870951, Time: 0.0581 Steps: 121400, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002224, Sample Num: 35584, Cur Loss: 0.00002135, Cur Avg Loss: 0.00038706, Log Avg loss: 0.00054498, Global Avg Loss: 0.00869608, Time: 0.1151 Steps: 121600, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002424, Sample Num: 38784, Cur Loss: 0.00002508, Cur Avg Loss: 0.00038506, Log Avg loss: 0.00036281, Global Avg Loss: 0.00868240, Time: 0.0613 Steps: 121800, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002624, Sample Num: 41984, Cur Loss: 0.00004467, Cur Avg Loss: 0.00038217, Log Avg loss: 0.00034722, Global Avg Loss: 0.00866873, Time: 0.1178 Steps: 122000, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002824, Sample Num: 45184, Cur Loss: 0.00008498, Cur Avg Loss: 0.00038088, Log Avg loss: 0.00036384, Global Avg Loss: 0.00865514, Time: 0.0647 Steps: 122200, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003024, Sample Num: 48384, Cur Loss: 0.00033243, Cur Avg Loss: 0.00038720, Log Avg loss: 0.00047653, Global Avg Loss: 0.00864178, Time: 0.0406 Steps: 122400, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003224, Sample Num: 51584, Cur Loss: 0.00008945, Cur Avg Loss: 0.00038207, Log Avg loss: 0.00030455, Global Avg Loss: 0.00862818, Time: 0.0595 Steps: 122600, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003424, Sample Num: 54784, Cur Loss: 0.00001699, Cur Avg Loss: 0.00037862, Log Avg loss: 0.00032302, Global Avg Loss: 0.00861465, Time: 0.0696 Steps: 122800, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003624, Sample Num: 57984, Cur Loss: 0.00002002, Cur Avg Loss: 0.00037835, Log Avg loss: 0.00037366, Global Avg Loss: 0.00860125, Time: 0.1510 Steps: 123000, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003824, Sample Num: 61184, Cur Loss: 0.00001305, Cur Avg Loss: 0.00037933, Log Avg loss: 0.00039702, Global Avg Loss: 0.00858793, Time: 0.0602 Steps: 123200, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 004024, Sample Num: 64384, Cur Loss: 0.00005012, Cur Avg Loss: 0.00038243, Log Avg loss: 0.00044167, Global Avg Loss: 0.00857473, Time: 0.1220 Steps: 123400, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 004224, Sample Num: 67584, Cur Loss: 0.00002494, Cur Avg Loss: 0.00038074, Log Avg loss: 0.00034677, Global Avg Loss: 0.00856141, Time: 0.0635 Steps: 123600, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 004424, Sample Num: 70784, Cur Loss: 0.00000104, Cur Avg Loss: 0.00038114, Log Avg loss: 0.00038964, Global Avg Loss: 0.00854821, Time: 0.0616 Steps: 123800, Updated lr: 0.000050 Training, Epoch: 0025, Batch: 004624, Sample Num: 73984, Cur Loss: 0.00013746, Cur Avg Loss: 0.00038611, Log Avg loss: 0.00049599, Global Avg Loss: 0.00853522, Time: 0.0596 Steps: 124000, Updated lr: 0.000050 Training, Epoch: 0025, Batch: 004824, Sample Num: 77184, Cur Loss: 0.00007946, Cur Avg Loss: 0.00039009, Log Avg loss: 0.00048210, Global Avg Loss: 0.00852226, Time: 0.0468 Steps: 124200, Updated lr: 0.000050 ***** Running evaluation checkpoint-124350 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-124350 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 374.338203, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001393, "eval_total_loss": 1.484579, "eval_acc": 0.99968, "eval_jaccard": 0.988847, "eval_prec": 0.990842, "eval_recall": 0.990278, "eval_f1": 0.990039, "eval_pr_auc": 0.995552, "eval_roc_auc": 0.999379, "eval_fmax": 0.994693, "eval_pmax": 0.997536, "eval_rmax": 0.991866, "eval_tmax": 0.28, "update_flag": true, "test_avg_loss": 0.001471, "test_total_loss": 1.56836, "test_acc": 0.999675, "test_jaccard": 0.987453, "test_prec": 0.989223, "test_recall": 0.989157, "test_f1": 0.988719, "test_pr_auc": 0.994965, "test_roc_auc": 0.999325, "test_fmax": 0.99431, "test_pmax": 0.99687, "test_rmax": 0.991764, "test_tmax": 0.25, "lr": 5.020185708518369e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.008512442342872718, "train_cur_epoch_loss": 1.9396992708926746, "train_cur_epoch_avg_loss": 0.0003899676861464967, "train_cur_epoch_time": 374.3382034301758, "train_cur_epoch_avg_time": 0.0752589874206224, "epoch": 25, "step": 124350} ################################################## Training, Epoch: 0026, Batch: 000050, Sample Num: 800, Cur Loss: 0.00003809, Cur Avg Loss: 0.00029185, Log Avg loss: 0.00036254, Global Avg Loss: 0.00850914, Time: 0.0698 Steps: 124400, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000250, Sample Num: 4000, Cur Loss: 0.00083138, Cur Avg Loss: 0.00042806, Log Avg loss: 0.00046211, Global Avg Loss: 0.00849622, Time: 0.1142 Steps: 124600, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000450, Sample Num: 7200, Cur Loss: 0.00225090, Cur Avg Loss: 0.00045021, Log Avg loss: 0.00047790, Global Avg Loss: 0.00848337, Time: 0.1138 Steps: 124800, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000650, Sample Num: 10400, Cur Loss: 0.00010266, Cur Avg Loss: 0.00046989, Log Avg loss: 0.00051419, Global Avg Loss: 0.00847062, Time: 0.0239 Steps: 125000, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000850, Sample Num: 13600, Cur Loss: 0.00002629, Cur Avg Loss: 0.00046644, Log Avg loss: 0.00045521, Global Avg Loss: 0.00845782, Time: 0.0607 Steps: 125200, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001050, Sample Num: 16800, Cur Loss: 0.00003197, Cur Avg Loss: 0.00044188, Log Avg loss: 0.00033748, Global Avg Loss: 0.00844487, Time: 0.0608 Steps: 125400, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001250, Sample Num: 20000, Cur Loss: 0.00013754, Cur Avg Loss: 0.00041726, Log Avg loss: 0.00028803, Global Avg Loss: 0.00843188, Time: 0.1143 Steps: 125600, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001450, Sample Num: 23200, Cur Loss: 0.00009869, Cur Avg Loss: 0.00039466, Log Avg loss: 0.00025340, Global Avg Loss: 0.00841887, Time: 0.0985 Steps: 125800, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001650, Sample Num: 26400, Cur Loss: 0.00002666, Cur Avg Loss: 0.00037364, Log Avg loss: 0.00022122, Global Avg Loss: 0.00840586, Time: 0.0626 Steps: 126000, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001850, Sample Num: 29600, Cur Loss: 0.00105594, Cur Avg Loss: 0.00036494, Log Avg loss: 0.00029318, Global Avg Loss: 0.00839301, Time: 0.0608 Steps: 126200, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002050, Sample Num: 32800, Cur Loss: 0.00000571, Cur Avg Loss: 0.00036340, Log Avg loss: 0.00034915, Global Avg Loss: 0.00838028, Time: 0.0289 Steps: 126400, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002250, Sample Num: 36000, Cur Loss: 0.00146364, Cur Avg Loss: 0.00037100, Log Avg loss: 0.00044890, Global Avg Loss: 0.00836775, Time: 0.1273 Steps: 126600, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002450, Sample Num: 39200, Cur Loss: 0.00000867, Cur Avg Loss: 0.00036796, Log Avg loss: 0.00033372, Global Avg Loss: 0.00835508, Time: 0.1126 Steps: 126800, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002650, Sample Num: 42400, Cur Loss: 0.00430737, Cur Avg Loss: 0.00037213, Log Avg loss: 0.00042333, Global Avg Loss: 0.00834259, Time: 0.1128 Steps: 127000, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002850, Sample Num: 45600, Cur Loss: 0.00002614, Cur Avg Loss: 0.00036759, Log Avg loss: 0.00030738, Global Avg Loss: 0.00832995, Time: 0.0315 Steps: 127200, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003050, Sample Num: 48800, Cur Loss: 0.00007217, Cur Avg Loss: 0.00037433, Log Avg loss: 0.00047031, Global Avg Loss: 0.00831761, Time: 0.0356 Steps: 127400, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003250, Sample Num: 52000, Cur Loss: 0.00037318, Cur Avg Loss: 0.00036794, Log Avg loss: 0.00027051, Global Avg Loss: 0.00830500, Time: 0.0930 Steps: 127600, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003450, Sample Num: 55200, Cur Loss: 0.00001955, Cur Avg Loss: 0.00036840, Log Avg loss: 0.00037590, Global Avg Loss: 0.00829259, Time: 0.1114 Steps: 127800, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003650, Sample Num: 58400, Cur Loss: 0.00001568, Cur Avg Loss: 0.00036858, Log Avg loss: 0.00037166, Global Avg Loss: 0.00828021, Time: 0.0609 Steps: 128000, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003850, Sample Num: 61600, Cur Loss: 0.00003821, Cur Avg Loss: 0.00036400, Log Avg loss: 0.00028053, Global Avg Loss: 0.00826773, Time: 0.0598 Steps: 128200, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 004050, Sample Num: 64800, Cur Loss: 0.00003219, Cur Avg Loss: 0.00036698, Log Avg loss: 0.00042431, Global Avg Loss: 0.00825552, Time: 0.0604 Steps: 128400, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 004250, Sample Num: 68000, Cur Loss: 0.00001174, Cur Avg Loss: 0.00036606, Log Avg loss: 0.00034739, Global Avg Loss: 0.00824322, Time: 0.0237 Steps: 128600, Updated lr: 0.000048 Training, Epoch: 0026, Batch: 004450, Sample Num: 71200, Cur Loss: 0.00382257, Cur Avg Loss: 0.00036441, Log Avg loss: 0.00032932, Global Avg Loss: 0.00823093, Time: 0.1612 Steps: 128800, Updated lr: 0.000048 Training, Epoch: 0026, Batch: 004650, Sample Num: 74400, Cur Loss: 0.00013539, Cur Avg Loss: 0.00036806, Log Avg loss: 0.00044940, Global Avg Loss: 0.00821887, Time: 0.1194 Steps: 129000, Updated lr: 0.000048 Training, Epoch: 0026, Batch: 004850, Sample Num: 77600, Cur Loss: 0.00036903, Cur Avg Loss: 0.00037290, Log Avg loss: 0.00048541, Global Avg Loss: 0.00820689, Time: 0.0608 Steps: 129200, Updated lr: 0.000048 ***** Running evaluation checkpoint-129324 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-129324 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 382.116991, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001419, "eval_total_loss": 1.512453, "eval_acc": 0.99967, "eval_jaccard": 0.988658, "eval_prec": 0.990791, "eval_recall": 0.990096, "eval_f1": 0.989878, "eval_pr_auc": 0.995516, "eval_roc_auc": 0.999381, "eval_fmax": 0.994762, "eval_pmax": 0.997741, "eval_rmax": 0.9918, "eval_tmax": 0.3, "update_flag": false, "test_avg_loss": 0.001483, "test_total_loss": 1.580985, "test_acc": 0.999672, "test_jaccard": 0.987436, "test_prec": 0.989237, "test_recall": 0.989332, "test_f1": 0.988778, "test_pr_auc": 0.994957, "test_roc_auc": 0.999332, "test_fmax": 0.994349, "test_pmax": 0.996654, "test_rmax": 0.992054, "test_tmax": 0.23, "lr": 4.8193782801776346e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00819942115030748, "train_cur_epoch_loss": 1.8597355061424423, "train_cur_epoch_avg_loss": 0.00037389133617660684, "train_cur_epoch_time": 382.1169910430908, "train_cur_epoch_avg_time": 0.07682287716990165, "epoch": 26, "step": 129324} ################################################## Training, Epoch: 0027, Batch: 000076, Sample Num: 1216, Cur Loss: 0.00399600, Cur Avg Loss: 0.00037063, Log Avg loss: 0.00039663, Global Avg Loss: 0.00819482, Time: 0.1128 Steps: 129400, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000276, Sample Num: 4416, Cur Loss: 0.00051797, Cur Avg Loss: 0.00039022, Log Avg loss: 0.00039767, Global Avg Loss: 0.00818279, Time: 0.1163 Steps: 129600, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000476, Sample Num: 7616, Cur Loss: 0.00001626, Cur Avg Loss: 0.00048646, Log Avg loss: 0.00061926, Global Avg Loss: 0.00817114, Time: 0.0608 Steps: 129800, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000676, Sample Num: 10816, Cur Loss: 0.00004051, Cur Avg Loss: 0.00045478, Log Avg loss: 0.00037940, Global Avg Loss: 0.00815915, Time: 0.0675 Steps: 130000, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000876, Sample Num: 14016, Cur Loss: 0.00007529, Cur Avg Loss: 0.00044603, Log Avg loss: 0.00041644, Global Avg Loss: 0.00814726, Time: 0.1163 Steps: 130200, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001076, Sample Num: 17216, Cur Loss: 0.00012503, Cur Avg Loss: 0.00040701, Log Avg loss: 0.00023610, Global Avg Loss: 0.00813512, Time: 0.1152 Steps: 130400, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001276, Sample Num: 20416, Cur Loss: 0.00000802, Cur Avg Loss: 0.00038550, Log Avg loss: 0.00026976, Global Avg Loss: 0.00812308, Time: 0.0364 Steps: 130600, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001476, Sample Num: 23616, Cur Loss: 0.00003356, Cur Avg Loss: 0.00036343, Log Avg loss: 0.00022262, Global Avg Loss: 0.00811100, Time: 0.0662 Steps: 130800, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001676, Sample Num: 26816, Cur Loss: 0.00002084, Cur Avg Loss: 0.00034352, Log Avg loss: 0.00019658, Global Avg Loss: 0.00809891, Time: 0.1799 Steps: 131000, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001876, Sample Num: 30016, Cur Loss: 0.00002808, Cur Avg Loss: 0.00033372, Log Avg loss: 0.00025166, Global Avg Loss: 0.00808695, Time: 0.0631 Steps: 131200, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002076, Sample Num: 33216, Cur Loss: 0.00002573, Cur Avg Loss: 0.00034546, Log Avg loss: 0.00045551, Global Avg Loss: 0.00807534, Time: 0.0603 Steps: 131400, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002276, Sample Num: 36416, Cur Loss: 0.00005759, Cur Avg Loss: 0.00035382, Log Avg loss: 0.00044065, Global Avg Loss: 0.00806373, Time: 0.0609 Steps: 131600, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002476, Sample Num: 39616, Cur Loss: 0.00035058, Cur Avg Loss: 0.00035060, Log Avg loss: 0.00031393, Global Avg Loss: 0.00805197, Time: 0.0920 Steps: 131800, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002676, Sample Num: 42816, Cur Loss: 0.00565727, Cur Avg Loss: 0.00035513, Log Avg loss: 0.00041122, Global Avg Loss: 0.00804040, Time: 0.1108 Steps: 132000, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002876, Sample Num: 46016, Cur Loss: 0.00030208, Cur Avg Loss: 0.00035697, Log Avg loss: 0.00038155, Global Avg Loss: 0.00802881, Time: 0.1151 Steps: 132200, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003076, Sample Num: 49216, Cur Loss: 0.00015001, Cur Avg Loss: 0.00035688, Log Avg loss: 0.00035565, Global Avg Loss: 0.00801722, Time: 0.1085 Steps: 132400, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003276, Sample Num: 52416, Cur Loss: 0.00039774, Cur Avg Loss: 0.00035242, Log Avg loss: 0.00028382, Global Avg Loss: 0.00800555, Time: 0.1156 Steps: 132600, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003476, Sample Num: 55616, Cur Loss: 0.00005393, Cur Avg Loss: 0.00035012, Log Avg loss: 0.00031238, Global Avg Loss: 0.00799397, Time: 0.0594 Steps: 132800, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003676, Sample Num: 58816, Cur Loss: 0.00013216, Cur Avg Loss: 0.00035314, Log Avg loss: 0.00040570, Global Avg Loss: 0.00798256, Time: 0.1067 Steps: 133000, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003876, Sample Num: 62016, Cur Loss: 0.00006020, Cur Avg Loss: 0.00034637, Log Avg loss: 0.00022184, Global Avg Loss: 0.00797090, Time: 0.0890 Steps: 133200, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 004076, Sample Num: 65216, Cur Loss: 0.00005085, Cur Avg Loss: 0.00034911, Log Avg loss: 0.00040218, Global Avg Loss: 0.00795956, Time: 0.0885 Steps: 133400, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 004276, Sample Num: 68416, Cur Loss: 0.00001805, Cur Avg Loss: 0.00035116, Log Avg loss: 0.00039296, Global Avg Loss: 0.00794823, Time: 0.0627 Steps: 133600, Updated lr: 0.000046 Training, Epoch: 0027, Batch: 004476, Sample Num: 71616, Cur Loss: 0.00002382, Cur Avg Loss: 0.00035190, Log Avg loss: 0.00036780, Global Avg Loss: 0.00793690, Time: 0.0608 Steps: 133800, Updated lr: 0.000046 Training, Epoch: 0027, Batch: 004676, Sample Num: 74816, Cur Loss: 0.00004179, Cur Avg Loss: 0.00035704, Log Avg loss: 0.00047204, Global Avg Loss: 0.00792576, Time: 0.0616 Steps: 134000, Updated lr: 0.000046 Training, Epoch: 0027, Batch: 004876, Sample Num: 78016, Cur Loss: 0.00007855, Cur Avg Loss: 0.00036173, Log Avg loss: 0.00047141, Global Avg Loss: 0.00791465, Time: 0.0606 Steps: 134200, Updated lr: 0.000046 ***** Running evaluation checkpoint-134298 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-134298 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 381.478228, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001399, "eval_total_loss": 1.491521, "eval_acc": 0.999677, "eval_jaccard": 0.988746, "eval_prec": 0.990787, "eval_recall": 0.990293, "eval_f1": 0.989992, "eval_pr_auc": 0.995516, "eval_roc_auc": 0.999391, "eval_fmax": 0.994709, "eval_pmax": 0.996671, "eval_rmax": 0.992755, "eval_tmax": 0.19, "update_flag": false, "test_avg_loss": 0.001503, "test_total_loss": 1.602142, "test_acc": 0.999677, "test_jaccard": 0.987728, "test_prec": 0.989489, "test_recall": 0.989554, "test_f1": 0.98903, "test_pr_auc": 0.994743, "test_roc_auc": 0.999324, "test_fmax": 0.994376, "test_pmax": 0.996964, "test_rmax": 0.9918, "test_tmax": 0.26, "lr": 4.6185708518368994e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.007909221226844561, "train_cur_epoch_loss": 1.810651480405312, "train_cur_epoch_avg_loss": 0.0003640232168084664, "train_cur_epoch_time": 381.47822761535645, "train_cur_epoch_avg_time": 0.07669445669790037, "epoch": 27, "step": 134298} ################################################## Training, Epoch: 0028, Batch: 000102, Sample Num: 1632, Cur Loss: 0.00006782, Cur Avg Loss: 0.00033014, Log Avg loss: 0.00040266, Global Avg Loss: 0.00790347, Time: 0.1131 Steps: 134400, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000302, Sample Num: 4832, Cur Loss: 0.00004362, Cur Avg Loss: 0.00037673, Log Avg loss: 0.00040049, Global Avg Loss: 0.00789232, Time: 0.0544 Steps: 134600, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000502, Sample Num: 8032, Cur Loss: 0.00000288, Cur Avg Loss: 0.00046108, Log Avg loss: 0.00058846, Global Avg Loss: 0.00788148, Time: 0.0245 Steps: 134800, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000702, Sample Num: 11232, Cur Loss: 0.00002539, Cur Avg Loss: 0.00043883, Log Avg loss: 0.00038297, Global Avg Loss: 0.00787038, Time: 0.0613 Steps: 135000, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000902, Sample Num: 14432, Cur Loss: 0.00092909, Cur Avg Loss: 0.00042431, Log Avg loss: 0.00037337, Global Avg Loss: 0.00785928, Time: 0.0615 Steps: 135200, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001102, Sample Num: 17632, Cur Loss: 0.00000243, Cur Avg Loss: 0.00039707, Log Avg loss: 0.00027419, Global Avg Loss: 0.00784808, Time: 0.0553 Steps: 135400, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001302, Sample Num: 20832, Cur Loss: 0.00005530, Cur Avg Loss: 0.00037271, Log Avg loss: 0.00023849, Global Avg Loss: 0.00783686, Time: 0.1080 Steps: 135600, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001502, Sample Num: 24032, Cur Loss: 0.00001242, Cur Avg Loss: 0.00035097, Log Avg loss: 0.00020943, Global Avg Loss: 0.00782562, Time: 0.0553 Steps: 135800, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001702, Sample Num: 27232, Cur Loss: 0.00008696, Cur Avg Loss: 0.00033889, Log Avg loss: 0.00024815, Global Avg Loss: 0.00781448, Time: 0.1227 Steps: 136000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 001902, Sample Num: 30432, Cur Loss: 0.00024672, Cur Avg Loss: 0.00032714, Log Avg loss: 0.00022717, Global Avg Loss: 0.00780334, Time: 0.0597 Steps: 136200, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002102, Sample Num: 33632, Cur Loss: 0.00036959, Cur Avg Loss: 0.00034588, Log Avg loss: 0.00052408, Global Avg Loss: 0.00779267, Time: 0.1011 Steps: 136400, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002302, Sample Num: 36832, Cur Loss: 0.00020159, Cur Avg Loss: 0.00035047, Log Avg loss: 0.00039875, Global Avg Loss: 0.00778184, Time: 0.0614 Steps: 136600, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002502, Sample Num: 40032, Cur Loss: 0.00000949, Cur Avg Loss: 0.00034281, Log Avg loss: 0.00025465, Global Avg Loss: 0.00777084, Time: 0.0253 Steps: 136800, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002702, Sample Num: 43232, Cur Loss: 0.00000633, Cur Avg Loss: 0.00034389, Log Avg loss: 0.00035738, Global Avg Loss: 0.00776001, Time: 0.1160 Steps: 137000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002902, Sample Num: 46432, Cur Loss: 0.00001232, Cur Avg Loss: 0.00034554, Log Avg loss: 0.00036778, Global Avg Loss: 0.00774924, Time: 0.0615 Steps: 137200, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003102, Sample Num: 49632, Cur Loss: 0.00000642, Cur Avg Loss: 0.00034692, Log Avg loss: 0.00036692, Global Avg Loss: 0.00773849, Time: 0.0792 Steps: 137400, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003302, Sample Num: 52832, Cur Loss: 0.00002098, Cur Avg Loss: 0.00034082, Log Avg loss: 0.00024625, Global Avg Loss: 0.00772760, Time: 0.0584 Steps: 137600, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003502, Sample Num: 56032, Cur Loss: 0.00078037, Cur Avg Loss: 0.00033914, Log Avg loss: 0.00031141, Global Avg Loss: 0.00771684, Time: 0.0563 Steps: 137800, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003702, Sample Num: 59232, Cur Loss: 0.00001287, Cur Avg Loss: 0.00034057, Log Avg loss: 0.00036572, Global Avg Loss: 0.00770618, Time: 0.0586 Steps: 138000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003902, Sample Num: 62432, Cur Loss: 0.00004339, Cur Avg Loss: 0.00033612, Log Avg loss: 0.00025367, Global Avg Loss: 0.00769540, Time: 0.0602 Steps: 138200, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 004102, Sample Num: 65632, Cur Loss: 0.00241511, Cur Avg Loss: 0.00033652, Log Avg loss: 0.00034439, Global Avg Loss: 0.00768478, Time: 0.1180 Steps: 138400, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 004302, Sample Num: 68832, Cur Loss: 0.00000979, Cur Avg Loss: 0.00033805, Log Avg loss: 0.00036933, Global Avg Loss: 0.00767422, Time: 0.1217 Steps: 138600, Updated lr: 0.000044 Training, Epoch: 0028, Batch: 004502, Sample Num: 72032, Cur Loss: 0.00003523, Cur Avg Loss: 0.00034142, Log Avg loss: 0.00041395, Global Avg Loss: 0.00766376, Time: 0.0904 Steps: 138800, Updated lr: 0.000044 Training, Epoch: 0028, Batch: 004702, Sample Num: 75232, Cur Loss: 0.00016115, Cur Avg Loss: 0.00034297, Log Avg loss: 0.00037795, Global Avg Loss: 0.00765328, Time: 0.0611 Steps: 139000, Updated lr: 0.000044 Training, Epoch: 0028, Batch: 004902, Sample Num: 78432, Cur Loss: 0.00000843, Cur Avg Loss: 0.00034698, Log Avg loss: 0.00044127, Global Avg Loss: 0.00764291, Time: 0.0615 Steps: 139200, Updated lr: 0.000044 ***** Running evaluation checkpoint-139272 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-139272 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 389.701471, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001368, "eval_total_loss": 1.45784, "eval_acc": 0.999677, "eval_jaccard": 0.988813, "eval_prec": 0.990805, "eval_recall": 0.990308, "eval_f1": 0.990021, "eval_pr_auc": 0.995639, "eval_roc_auc": 0.999406, "eval_fmax": 0.994704, "eval_pmax": 0.996893, "eval_rmax": 0.992525, "eval_tmax": 0.22, "update_flag": false, "test_avg_loss": 0.001487, "test_total_loss": 1.585023, "test_acc": 0.999676, "test_jaccard": 0.987605, "test_prec": 0.989417, "test_recall": 0.989428, "test_f1": 0.988913, "test_pr_auc": 0.995091, "test_roc_auc": 0.99933, "test_fmax": 0.994448, "test_pmax": 0.996835, "test_rmax": 0.992074, "test_tmax": 0.24, "lr": 4.417763423496165e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.007639207234210993, "train_cur_epoch_loss": 1.7350776002620876, "train_cur_epoch_avg_loss": 0.0003488294331045612, "train_cur_epoch_time": 389.70147132873535, "train_cur_epoch_avg_time": 0.07834770231779963, "epoch": 28, "step": 139272} ################################################## Training, Epoch: 0029, Batch: 000128, Sample Num: 2048, Cur Loss: 0.00353753, Cur Avg Loss: 0.00039920, Log Avg loss: 0.00042629, Global Avg Loss: 0.00763256, Time: 0.1426 Steps: 139400, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000328, Sample Num: 5248, Cur Loss: 0.00036845, Cur Avg Loss: 0.00038191, Log Avg loss: 0.00037085, Global Avg Loss: 0.00762216, Time: 0.0606 Steps: 139600, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000528, Sample Num: 8448, Cur Loss: 0.00024277, Cur Avg Loss: 0.00043079, Log Avg loss: 0.00051094, Global Avg Loss: 0.00761198, Time: 0.1137 Steps: 139800, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000728, Sample Num: 11648, Cur Loss: 0.00201656, Cur Avg Loss: 0.00042477, Log Avg loss: 0.00040889, Global Avg Loss: 0.00760169, Time: 0.1257 Steps: 140000, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000928, Sample Num: 14848, Cur Loss: 0.00003049, Cur Avg Loss: 0.00041587, Log Avg loss: 0.00038349, Global Avg Loss: 0.00759140, Time: 0.0614 Steps: 140200, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001128, Sample Num: 18048, Cur Loss: 0.00001183, Cur Avg Loss: 0.00038894, Log Avg loss: 0.00026398, Global Avg Loss: 0.00758096, Time: 0.0609 Steps: 140400, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001328, Sample Num: 21248, Cur Loss: 0.00003756, Cur Avg Loss: 0.00036199, Log Avg loss: 0.00020998, Global Avg Loss: 0.00757047, Time: 0.0602 Steps: 140600, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001528, Sample Num: 24448, Cur Loss: 0.00001584, Cur Avg Loss: 0.00033952, Log Avg loss: 0.00019030, Global Avg Loss: 0.00755999, Time: 0.0606 Steps: 140800, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001728, Sample Num: 27648, Cur Loss: 0.00002103, Cur Avg Loss: 0.00032914, Log Avg loss: 0.00024984, Global Avg Loss: 0.00754962, Time: 0.0593 Steps: 141000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 001928, Sample Num: 30848, Cur Loss: 0.00024554, Cur Avg Loss: 0.00031830, Log Avg loss: 0.00022466, Global Avg Loss: 0.00753924, Time: 0.0695 Steps: 141200, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002128, Sample Num: 34048, Cur Loss: 0.00004227, Cur Avg Loss: 0.00032973, Log Avg loss: 0.00043992, Global Avg Loss: 0.00752920, Time: 0.0609 Steps: 141400, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002328, Sample Num: 37248, Cur Loss: 0.00001560, Cur Avg Loss: 0.00033417, Log Avg loss: 0.00038134, Global Avg Loss: 0.00751911, Time: 0.0702 Steps: 141600, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002528, Sample Num: 40448, Cur Loss: 0.00005881, Cur Avg Loss: 0.00032410, Log Avg loss: 0.00020693, Global Avg Loss: 0.00750879, Time: 0.0687 Steps: 141800, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002728, Sample Num: 43648, Cur Loss: 0.00003899, Cur Avg Loss: 0.00032781, Log Avg loss: 0.00037469, Global Avg Loss: 0.00749875, Time: 0.0599 Steps: 142000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002928, Sample Num: 46848, Cur Loss: 0.00131877, Cur Avg Loss: 0.00032979, Log Avg loss: 0.00035687, Global Avg Loss: 0.00748870, Time: 0.1800 Steps: 142200, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003128, Sample Num: 50048, Cur Loss: 0.00011156, Cur Avg Loss: 0.00033175, Log Avg loss: 0.00036039, Global Avg Loss: 0.00747869, Time: 0.0612 Steps: 142400, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003328, Sample Num: 53248, Cur Loss: 0.00008437, Cur Avg Loss: 0.00032724, Log Avg loss: 0.00025672, Global Avg Loss: 0.00746856, Time: 0.0652 Steps: 142600, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003528, Sample Num: 56448, Cur Loss: 0.00001064, Cur Avg Loss: 0.00032770, Log Avg loss: 0.00033537, Global Avg Loss: 0.00745857, Time: 0.0256 Steps: 142800, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003728, Sample Num: 59648, Cur Loss: 0.00001932, Cur Avg Loss: 0.00032844, Log Avg loss: 0.00034138, Global Avg Loss: 0.00744862, Time: 0.1550 Steps: 143000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003928, Sample Num: 62848, Cur Loss: 0.00001080, Cur Avg Loss: 0.00032268, Log Avg loss: 0.00021533, Global Avg Loss: 0.00743851, Time: 0.0857 Steps: 143200, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 004128, Sample Num: 66048, Cur Loss: 0.00045993, Cur Avg Loss: 0.00032440, Log Avg loss: 0.00035827, Global Avg Loss: 0.00742864, Time: 0.0701 Steps: 143400, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 004328, Sample Num: 69248, Cur Loss: 0.00001656, Cur Avg Loss: 0.00032368, Log Avg loss: 0.00030875, Global Avg Loss: 0.00741872, Time: 0.0613 Steps: 143600, Updated lr: 0.000042 Training, Epoch: 0029, Batch: 004528, Sample Num: 72448, Cur Loss: 0.00083109, Cur Avg Loss: 0.00032922, Log Avg loss: 0.00044914, Global Avg Loss: 0.00740903, Time: 0.0613 Steps: 143800, Updated lr: 0.000042 Training, Epoch: 0029, Batch: 004728, Sample Num: 75648, Cur Loss: 0.00000996, Cur Avg Loss: 0.00032973, Log Avg loss: 0.00034124, Global Avg Loss: 0.00739921, Time: 0.0583 Steps: 144000, Updated lr: 0.000042 Training, Epoch: 0029, Batch: 004928, Sample Num: 78848, Cur Loss: 0.00023521, Cur Avg Loss: 0.00033333, Log Avg loss: 0.00041852, Global Avg Loss: 0.00738953, Time: 0.1156 Steps: 144200, Updated lr: 0.000042 ***** Running evaluation checkpoint-144246 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-144246 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 391.490086, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001386, "eval_total_loss": 1.477314, "eval_acc": 0.99968, "eval_jaccard": 0.988928, "eval_prec": 0.990875, "eval_recall": 0.990456, "eval_f1": 0.990142, "eval_pr_auc": 0.995677, "eval_roc_auc": 0.999405, "eval_fmax": 0.994708, "eval_pmax": 0.996331, "eval_rmax": 0.993091, "eval_tmax": 0.17, "update_flag": true, "test_avg_loss": 0.001488, "test_total_loss": 1.58621, "test_acc": 0.999685, "test_jaccard": 0.98797, "test_prec": 0.989722, "test_recall": 0.989698, "test_f1": 0.989215, "test_pr_auc": 0.994984, "test_roc_auc": 0.999327, "test_fmax": 0.994453, "test_pmax": 0.997456, "test_rmax": 0.991468, "test_tmax": 0.31, "lr": 4.21695599515543e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.007387296528494423, "train_cur_epoch_loss": 1.6603051261731991, "train_cur_epoch_avg_loss": 0.0003337967684304783, "train_cur_epoch_time": 391.4900858402252, "train_cur_epoch_avg_time": 0.07870729510257846, "epoch": 29, "step": 144246} ################################################## Training, Epoch: 0030, Batch: 000154, Sample Num: 2464, Cur Loss: 0.00001908, Cur Avg Loss: 0.00029724, Log Avg loss: 0.00031710, Global Avg Loss: 0.00737974, Time: 0.0603 Steps: 144400, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000354, Sample Num: 5664, Cur Loss: 0.00077804, Cur Avg Loss: 0.00032698, Log Avg loss: 0.00034987, Global Avg Loss: 0.00737001, Time: 0.0683 Steps: 144600, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000554, Sample Num: 8864, Cur Loss: 0.00025454, Cur Avg Loss: 0.00038114, Log Avg loss: 0.00047701, Global Avg Loss: 0.00736049, Time: 0.0605 Steps: 144800, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000754, Sample Num: 12064, Cur Loss: 0.00000631, Cur Avg Loss: 0.00038188, Log Avg loss: 0.00038395, Global Avg Loss: 0.00735087, Time: 0.1652 Steps: 145000, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000954, Sample Num: 15264, Cur Loss: 0.00001889, Cur Avg Loss: 0.00037063, Log Avg loss: 0.00032819, Global Avg Loss: 0.00734120, Time: 0.0602 Steps: 145200, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001154, Sample Num: 18464, Cur Loss: 0.00012725, Cur Avg Loss: 0.00035288, Log Avg loss: 0.00026821, Global Avg Loss: 0.00733147, Time: 0.0634 Steps: 145400, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001354, Sample Num: 21664, Cur Loss: 0.00000474, Cur Avg Loss: 0.00032719, Log Avg loss: 0.00017895, Global Avg Loss: 0.00732164, Time: 0.0604 Steps: 145600, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001554, Sample Num: 24864, Cur Loss: 0.00004391, Cur Avg Loss: 0.00031045, Log Avg loss: 0.00019714, Global Avg Loss: 0.00731187, Time: 0.0602 Steps: 145800, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001754, Sample Num: 28064, Cur Loss: 0.00017148, Cur Avg Loss: 0.00030127, Log Avg loss: 0.00022990, Global Avg Loss: 0.00730217, Time: 0.0602 Steps: 146000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 001954, Sample Num: 31264, Cur Loss: 0.00015274, Cur Avg Loss: 0.00028827, Log Avg loss: 0.00017434, Global Avg Loss: 0.00729242, Time: 0.1143 Steps: 146200, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002154, Sample Num: 34464, Cur Loss: 0.00006956, Cur Avg Loss: 0.00030946, Log Avg loss: 0.00051649, Global Avg Loss: 0.00728316, Time: 0.0627 Steps: 146400, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002354, Sample Num: 37664, Cur Loss: 0.00005797, Cur Avg Loss: 0.00031406, Log Avg loss: 0.00036361, Global Avg Loss: 0.00727372, Time: 0.1064 Steps: 146600, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002554, Sample Num: 40864, Cur Loss: 0.00003871, Cur Avg Loss: 0.00030615, Log Avg loss: 0.00021300, Global Avg Loss: 0.00726410, Time: 0.0614 Steps: 146800, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002754, Sample Num: 44064, Cur Loss: 0.00001373, Cur Avg Loss: 0.00031019, Log Avg loss: 0.00036179, Global Avg Loss: 0.00725471, Time: 0.0669 Steps: 147000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002954, Sample Num: 47264, Cur Loss: 0.00795755, Cur Avg Loss: 0.00031480, Log Avg loss: 0.00037827, Global Avg Loss: 0.00724537, Time: 0.0586 Steps: 147200, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003154, Sample Num: 50464, Cur Loss: 0.00013462, Cur Avg Loss: 0.00030960, Log Avg loss: 0.00023279, Global Avg Loss: 0.00723585, Time: 0.1726 Steps: 147400, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003354, Sample Num: 53664, Cur Loss: 0.00004880, Cur Avg Loss: 0.00030620, Log Avg loss: 0.00025263, Global Avg Loss: 0.00722639, Time: 0.0595 Steps: 147600, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003554, Sample Num: 56864, Cur Loss: 0.00003835, Cur Avg Loss: 0.00030812, Log Avg loss: 0.00034025, Global Avg Loss: 0.00721707, Time: 0.0604 Steps: 147800, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003754, Sample Num: 60064, Cur Loss: 0.00063034, Cur Avg Loss: 0.00030627, Log Avg loss: 0.00027336, Global Avg Loss: 0.00720769, Time: 0.0250 Steps: 148000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003954, Sample Num: 63264, Cur Loss: 0.00007027, Cur Avg Loss: 0.00030400, Log Avg loss: 0.00026148, Global Avg Loss: 0.00719831, Time: 0.0622 Steps: 148200, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 004154, Sample Num: 66464, Cur Loss: 0.00001623, Cur Avg Loss: 0.00030682, Log Avg loss: 0.00036250, Global Avg Loss: 0.00718910, Time: 0.0353 Steps: 148400, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004354, Sample Num: 69664, Cur Loss: 0.00004068, Cur Avg Loss: 0.00030513, Log Avg loss: 0.00027008, Global Avg Loss: 0.00717979, Time: 0.0870 Steps: 148600, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004554, Sample Num: 72864, Cur Loss: 0.00001432, Cur Avg Loss: 0.00031293, Log Avg loss: 0.00048269, Global Avg Loss: 0.00717079, Time: 0.0837 Steps: 148800, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004754, Sample Num: 76064, Cur Loss: 0.00006404, Cur Avg Loss: 0.00031034, Log Avg loss: 0.00025149, Global Avg Loss: 0.00716150, Time: 0.0612 Steps: 149000, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004954, Sample Num: 79264, Cur Loss: 0.00002761, Cur Avg Loss: 0.00031567, Log Avg loss: 0.00044233, Global Avg Loss: 0.00715249, Time: 0.1654 Steps: 149200, Updated lr: 0.000040 ***** Running evaluation checkpoint-149220 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-149220 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 382.937515, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.00136, "eval_total_loss": 1.449384, "eval_acc": 0.999688, "eval_jaccard": 0.989106, "eval_prec": 0.990828, "eval_recall": 0.990611, "eval_f1": 0.990229, "eval_pr_auc": 0.995621, "eval_roc_auc": 0.999407, "eval_fmax": 0.99478, "eval_pmax": 0.997806, "eval_rmax": 0.991772, "eval_tmax": 0.35, "update_flag": true, "test_avg_loss": 0.001477, "test_total_loss": 1.574349, "test_acc": 0.999679, "test_jaccard": 0.987797, "test_prec": 0.989505, "test_recall": 0.989752, "test_f1": 0.989119, "test_pr_auc": 0.995086, "test_roc_auc": 0.999328, "test_fmax": 0.994385, "test_pmax": 0.996458, "test_rmax": 0.99232, "test_tmax": 0.22, "lr": 4.016148566814695e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.007151581002454668, "train_cur_epoch_loss": 1.5709421370784469, "train_cur_epoch_avg_loss": 0.000315830747301658, "train_cur_epoch_time": 382.9375145435333, "train_cur_epoch_avg_time": 0.07698783967501675, "epoch": 30, "step": 149220} ################################################## Training, Epoch: 0031, Batch: 000180, Sample Num: 2880, Cur Loss: 0.00031894, Cur Avg Loss: 0.00042293, Log Avg loss: 0.00041616, Global Avg Loss: 0.00714347, Time: 0.0614 Steps: 149400, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000380, Sample Num: 6080, Cur Loss: 0.00010849, Cur Avg Loss: 0.00036840, Log Avg loss: 0.00031932, Global Avg Loss: 0.00713435, Time: 0.1207 Steps: 149600, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000580, Sample Num: 9280, Cur Loss: 0.00000545, Cur Avg Loss: 0.00041519, Log Avg loss: 0.00050410, Global Avg Loss: 0.00712550, Time: 0.0609 Steps: 149800, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000780, Sample Num: 12480, Cur Loss: 0.00001965, Cur Avg Loss: 0.00041387, Log Avg loss: 0.00041004, Global Avg Loss: 0.00711654, Time: 0.1070 Steps: 150000, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000980, Sample Num: 15680, Cur Loss: 0.00000474, Cur Avg Loss: 0.00038406, Log Avg loss: 0.00026777, Global Avg Loss: 0.00710743, Time: 0.0237 Steps: 150200, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001180, Sample Num: 18880, Cur Loss: 0.00004069, Cur Avg Loss: 0.00036003, Log Avg loss: 0.00024227, Global Avg Loss: 0.00709830, Time: 0.0598 Steps: 150400, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001380, Sample Num: 22080, Cur Loss: 0.00022802, Cur Avg Loss: 0.00033627, Log Avg loss: 0.00019613, Global Avg Loss: 0.00708913, Time: 0.0648 Steps: 150600, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001580, Sample Num: 25280, Cur Loss: 0.00017489, Cur Avg Loss: 0.00031195, Log Avg loss: 0.00014412, Global Avg Loss: 0.00707992, Time: 0.0606 Steps: 150800, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001780, Sample Num: 28480, Cur Loss: 0.00002899, Cur Avg Loss: 0.00029941, Log Avg loss: 0.00020031, Global Avg Loss: 0.00707081, Time: 0.0593 Steps: 151000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 001980, Sample Num: 31680, Cur Loss: 0.00011757, Cur Avg Loss: 0.00028842, Log Avg loss: 0.00019063, Global Avg Loss: 0.00706171, Time: 0.1220 Steps: 151200, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002180, Sample Num: 34880, Cur Loss: 0.00013880, Cur Avg Loss: 0.00029964, Log Avg loss: 0.00041069, Global Avg Loss: 0.00705292, Time: 0.0607 Steps: 151400, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002380, Sample Num: 38080, Cur Loss: 0.00005652, Cur Avg Loss: 0.00031043, Log Avg loss: 0.00042806, Global Avg Loss: 0.00704418, Time: 0.0608 Steps: 151600, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002580, Sample Num: 41280, Cur Loss: 0.00001150, Cur Avg Loss: 0.00030689, Log Avg loss: 0.00026480, Global Avg Loss: 0.00703525, Time: 0.0599 Steps: 151800, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002780, Sample Num: 44480, Cur Loss: 0.00002910, Cur Avg Loss: 0.00031018, Log Avg loss: 0.00035264, Global Avg Loss: 0.00702646, Time: 0.1174 Steps: 152000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002980, Sample Num: 47680, Cur Loss: 0.00000386, Cur Avg Loss: 0.00031377, Log Avg loss: 0.00036371, Global Avg Loss: 0.00701770, Time: 0.1649 Steps: 152200, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003180, Sample Num: 50880, Cur Loss: 0.00001007, Cur Avg Loss: 0.00031045, Log Avg loss: 0.00026097, Global Avg Loss: 0.00700883, Time: 0.0236 Steps: 152400, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003380, Sample Num: 54080, Cur Loss: 0.00002575, Cur Avg Loss: 0.00030899, Log Avg loss: 0.00028566, Global Avg Loss: 0.00700002, Time: 0.0607 Steps: 152600, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003580, Sample Num: 57280, Cur Loss: 0.00003567, Cur Avg Loss: 0.00030893, Log Avg loss: 0.00030798, Global Avg Loss: 0.00699126, Time: 0.1727 Steps: 152800, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003780, Sample Num: 60480, Cur Loss: 0.00001938, Cur Avg Loss: 0.00030784, Log Avg loss: 0.00028824, Global Avg Loss: 0.00698250, Time: 0.0615 Steps: 153000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003980, Sample Num: 63680, Cur Loss: 0.00001631, Cur Avg Loss: 0.00030631, Log Avg loss: 0.00027753, Global Avg Loss: 0.00697375, Time: 0.0276 Steps: 153200, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 004180, Sample Num: 66880, Cur Loss: 0.00005898, Cur Avg Loss: 0.00030433, Log Avg loss: 0.00026482, Global Avg Loss: 0.00696500, Time: 0.0614 Steps: 153400, Updated lr: 0.000038 Training, Epoch: 0031, Batch: 004380, Sample Num: 70080, Cur Loss: 0.00002066, Cur Avg Loss: 0.00030314, Log Avg loss: 0.00027838, Global Avg Loss: 0.00695629, Time: 0.1009 Steps: 153600, Updated lr: 0.000038 Training, Epoch: 0031, Batch: 004580, Sample Num: 73280, Cur Loss: 0.00004773, Cur Avg Loss: 0.00030833, Log Avg loss: 0.00042200, Global Avg Loss: 0.00694780, Time: 0.0339 Steps: 153800, Updated lr: 0.000038 Training, Epoch: 0031, Batch: 004780, Sample Num: 76480, Cur Loss: 0.00002161, Cur Avg Loss: 0.00030500, Log Avg loss: 0.00022861, Global Avg Loss: 0.00693907, Time: 0.0605 Steps: 154000, Updated lr: 0.000038 ***** Running evaluation checkpoint-154194 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-154194 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 393.206522, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001395, "eval_total_loss": 1.487107, "eval_acc": 0.999687, "eval_jaccard": 0.989009, "eval_prec": 0.990892, "eval_recall": 0.99047, "eval_f1": 0.990161, "eval_pr_auc": 0.995798, "eval_roc_auc": 0.999411, "eval_fmax": 0.994919, "eval_pmax": 0.997177, "eval_rmax": 0.99267, "eval_tmax": 0.25, "update_flag": false, "test_avg_loss": 0.001498, "test_total_loss": 1.597183, "test_acc": 0.99968, "test_jaccard": 0.987815, "test_prec": 0.989495, "test_recall": 0.989628, "test_f1": 0.989066, "test_pr_auc": 0.99497, "test_roc_auc": 0.999339, "test_fmax": 0.9944, "test_pmax": 0.996637, "test_rmax": 0.992173, "test_tmax": 0.23, "lr": 3.8153411384739607e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006930874589442356, "train_cur_epoch_loss": 1.5403592581892838, "train_cur_epoch_avg_loss": 0.0003096821990730365, "train_cur_epoch_time": 393.20652198791504, "train_cur_epoch_avg_time": 0.07905237675671795, "epoch": 31, "step": 154194} ################################################## Training, Epoch: 0032, Batch: 000006, Sample Num: 96, Cur Loss: 0.00003822, Cur Avg Loss: 0.00002741, Log Avg loss: 0.00041317, Global Avg Loss: 0.00693061, Time: 0.0617 Steps: 154200, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000206, Sample Num: 3296, Cur Loss: 0.00001865, Cur Avg Loss: 0.00031673, Log Avg loss: 0.00032541, Global Avg Loss: 0.00692205, Time: 0.0626 Steps: 154400, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000406, Sample Num: 6496, Cur Loss: 0.00001117, Cur Avg Loss: 0.00034005, Log Avg loss: 0.00036407, Global Avg Loss: 0.00691357, Time: 0.0621 Steps: 154600, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000606, Sample Num: 9696, Cur Loss: 0.00001966, Cur Avg Loss: 0.00038758, Log Avg loss: 0.00048406, Global Avg Loss: 0.00690526, Time: 0.0604 Steps: 154800, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000806, Sample Num: 12896, Cur Loss: 0.00002252, Cur Avg Loss: 0.00038630, Log Avg loss: 0.00038241, Global Avg Loss: 0.00689684, Time: 0.1529 Steps: 155000, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001006, Sample Num: 16096, Cur Loss: 0.00001424, Cur Avg Loss: 0.00035220, Log Avg loss: 0.00021478, Global Avg Loss: 0.00688823, Time: 0.0678 Steps: 155200, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001206, Sample Num: 19296, Cur Loss: 0.00010377, Cur Avg Loss: 0.00034094, Log Avg loss: 0.00028431, Global Avg Loss: 0.00687973, Time: 0.1167 Steps: 155400, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001406, Sample Num: 22496, Cur Loss: 0.00001171, Cur Avg Loss: 0.00031618, Log Avg loss: 0.00016692, Global Avg Loss: 0.00687110, Time: 0.0921 Steps: 155600, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001606, Sample Num: 25696, Cur Loss: 0.00000218, Cur Avg Loss: 0.00029455, Log Avg loss: 0.00014245, Global Avg Loss: 0.00686247, Time: 0.0607 Steps: 155800, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001806, Sample Num: 28896, Cur Loss: 0.00000425, Cur Avg Loss: 0.00028546, Log Avg loss: 0.00021247, Global Avg Loss: 0.00685394, Time: 0.0589 Steps: 156000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002006, Sample Num: 32096, Cur Loss: 0.00006880, Cur Avg Loss: 0.00027777, Log Avg loss: 0.00020831, Global Avg Loss: 0.00684543, Time: 0.0417 Steps: 156200, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002206, Sample Num: 35296, Cur Loss: 0.00000873, Cur Avg Loss: 0.00028532, Log Avg loss: 0.00036106, Global Avg Loss: 0.00683714, Time: 0.0601 Steps: 156400, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002406, Sample Num: 38496, Cur Loss: 0.00045591, Cur Avg Loss: 0.00028957, Log Avg loss: 0.00033648, Global Avg Loss: 0.00682884, Time: 0.0612 Steps: 156600, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002606, Sample Num: 41696, Cur Loss: 0.00011436, Cur Avg Loss: 0.00028881, Log Avg loss: 0.00027965, Global Avg Loss: 0.00682048, Time: 0.0613 Steps: 156800, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002806, Sample Num: 44896, Cur Loss: 0.00009218, Cur Avg Loss: 0.00029290, Log Avg loss: 0.00034618, Global Avg Loss: 0.00681224, Time: 0.0605 Steps: 157000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003006, Sample Num: 48096, Cur Loss: 0.00000875, Cur Avg Loss: 0.00029926, Log Avg loss: 0.00038856, Global Avg Loss: 0.00680406, Time: 0.1196 Steps: 157200, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003206, Sample Num: 51296, Cur Loss: 0.00005414, Cur Avg Loss: 0.00029514, Log Avg loss: 0.00023320, Global Avg Loss: 0.00679571, Time: 0.0892 Steps: 157400, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003406, Sample Num: 54496, Cur Loss: 0.00003773, Cur Avg Loss: 0.00029440, Log Avg loss: 0.00028254, Global Avg Loss: 0.00678745, Time: 0.0619 Steps: 157600, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003606, Sample Num: 57696, Cur Loss: 0.00002811, Cur Avg Loss: 0.00029463, Log Avg loss: 0.00029848, Global Avg Loss: 0.00677922, Time: 0.0578 Steps: 157800, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003806, Sample Num: 60896, Cur Loss: 0.00012889, Cur Avg Loss: 0.00029336, Log Avg loss: 0.00027053, Global Avg Loss: 0.00677099, Time: 0.0599 Steps: 158000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 004006, Sample Num: 64096, Cur Loss: 0.00005971, Cur Avg Loss: 0.00029310, Log Avg loss: 0.00028807, Global Avg Loss: 0.00676279, Time: 0.1193 Steps: 158200, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 004206, Sample Num: 67296, Cur Loss: 0.00018688, Cur Avg Loss: 0.00029589, Log Avg loss: 0.00035183, Global Avg Loss: 0.00675470, Time: 0.1104 Steps: 158400, Updated lr: 0.000036 Training, Epoch: 0032, Batch: 004406, Sample Num: 70496, Cur Loss: 0.00000447, Cur Avg Loss: 0.00029305, Log Avg loss: 0.00023341, Global Avg Loss: 0.00674647, Time: 0.0606 Steps: 158600, Updated lr: 0.000036 Training, Epoch: 0032, Batch: 004606, Sample Num: 73696, Cur Loss: 0.00007434, Cur Avg Loss: 0.00029810, Log Avg loss: 0.00040934, Global Avg Loss: 0.00673849, Time: 0.0622 Steps: 158800, Updated lr: 0.000036 Training, Epoch: 0032, Batch: 004806, Sample Num: 76896, Cur Loss: 0.00018302, Cur Avg Loss: 0.00030045, Log Avg loss: 0.00035455, Global Avg Loss: 0.00673046, Time: 0.0612 Steps: 159000, Updated lr: 0.000036 ***** Running evaluation checkpoint-159168 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-159168 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 371.349331, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001375, "eval_total_loss": 1.466036, "eval_acc": 0.999687, "eval_jaccard": 0.989201, "eval_prec": 0.991054, "eval_recall": 0.990671, "eval_f1": 0.990349, "eval_pr_auc": 0.995735, "eval_roc_auc": 0.999415, "eval_fmax": 0.994909, "eval_pmax": 0.996898, "eval_rmax": 0.992929, "eval_tmax": 0.21, "update_flag": true, "test_avg_loss": 0.001485, "test_total_loss": 1.583407, "test_acc": 0.999683, "test_jaccard": 0.988019, "test_prec": 0.989767, "test_recall": 0.989882, "test_f1": 0.989306, "test_pr_auc": 0.995069, "test_roc_auc": 0.99934, "test_fmax": 0.994517, "test_pmax": 0.996798, "test_rmax": 0.992246, "test_tmax": 0.24, "lr": 3.614533710133226e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006723657501906844, "train_cur_epoch_loss": 1.491840819033996, "train_cur_epoch_avg_loss": 0.0002999277883059903, "train_cur_epoch_time": 371.3493309020996, "train_cur_epoch_avg_time": 0.07465808823926409, "epoch": 32, "step": 159168} ################################################## Training, Epoch: 0033, Batch: 000032, Sample Num: 512, Cur Loss: 0.00023990, Cur Avg Loss: 0.00011691, Log Avg loss: 0.00025804, Global Avg Loss: 0.00672233, Time: 0.0686 Steps: 159200, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000232, Sample Num: 3712, Cur Loss: 0.00002171, Cur Avg Loss: 0.00028146, Log Avg loss: 0.00030779, Global Avg Loss: 0.00671428, Time: 0.1456 Steps: 159400, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000432, Sample Num: 6912, Cur Loss: 0.00002367, Cur Avg Loss: 0.00031742, Log Avg loss: 0.00035913, Global Avg Loss: 0.00670632, Time: 0.0361 Steps: 159600, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000632, Sample Num: 10112, Cur Loss: 0.00003645, Cur Avg Loss: 0.00035142, Log Avg loss: 0.00042488, Global Avg Loss: 0.00669846, Time: 0.0606 Steps: 159800, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000832, Sample Num: 13312, Cur Loss: 0.00017603, Cur Avg Loss: 0.00035066, Log Avg loss: 0.00034823, Global Avg Loss: 0.00669052, Time: 0.0708 Steps: 160000, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001032, Sample Num: 16512, Cur Loss: 0.00012746, Cur Avg Loss: 0.00032801, Log Avg loss: 0.00023381, Global Avg Loss: 0.00668246, Time: 0.1107 Steps: 160200, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001232, Sample Num: 19712, Cur Loss: 0.00021659, Cur Avg Loss: 0.00030953, Log Avg loss: 0.00021415, Global Avg Loss: 0.00667439, Time: 0.0612 Steps: 160400, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001432, Sample Num: 22912, Cur Loss: 0.00001677, Cur Avg Loss: 0.00029073, Log Avg loss: 0.00017497, Global Avg Loss: 0.00666630, Time: 0.0624 Steps: 160600, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001632, Sample Num: 26112, Cur Loss: 0.00279939, Cur Avg Loss: 0.00027363, Log Avg loss: 0.00015114, Global Avg Loss: 0.00665819, Time: 0.0277 Steps: 160800, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 001832, Sample Num: 29312, Cur Loss: 0.00010652, Cur Avg Loss: 0.00026481, Log Avg loss: 0.00019284, Global Avg Loss: 0.00665016, Time: 0.0626 Steps: 161000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002032, Sample Num: 32512, Cur Loss: 0.00018764, Cur Avg Loss: 0.00026395, Log Avg loss: 0.00025607, Global Avg Loss: 0.00664223, Time: 0.0659 Steps: 161200, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002232, Sample Num: 35712, Cur Loss: 0.00018813, Cur Avg Loss: 0.00027549, Log Avg loss: 0.00039278, Global Avg Loss: 0.00663449, Time: 0.0636 Steps: 161400, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002432, Sample Num: 38912, Cur Loss: 0.00022316, Cur Avg Loss: 0.00027563, Log Avg loss: 0.00027724, Global Avg Loss: 0.00662662, Time: 0.0686 Steps: 161600, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002632, Sample Num: 42112, Cur Loss: 0.00001599, Cur Avg Loss: 0.00027328, Log Avg loss: 0.00024465, Global Avg Loss: 0.00661873, Time: 0.0606 Steps: 161800, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002832, Sample Num: 45312, Cur Loss: 0.00005687, Cur Avg Loss: 0.00027663, Log Avg loss: 0.00032069, Global Avg Loss: 0.00661095, Time: 0.1642 Steps: 162000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003032, Sample Num: 48512, Cur Loss: 0.00007517, Cur Avg Loss: 0.00028424, Log Avg loss: 0.00039200, Global Avg Loss: 0.00660329, Time: 0.0605 Steps: 162200, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003232, Sample Num: 51712, Cur Loss: 0.00001769, Cur Avg Loss: 0.00027739, Log Avg loss: 0.00017360, Global Avg Loss: 0.00659537, Time: 0.0698 Steps: 162400, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003432, Sample Num: 54912, Cur Loss: 0.00027506, Cur Avg Loss: 0.00027502, Log Avg loss: 0.00023666, Global Avg Loss: 0.00658755, Time: 0.1706 Steps: 162600, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003632, Sample Num: 58112, Cur Loss: 0.00000551, Cur Avg Loss: 0.00027627, Log Avg loss: 0.00029766, Global Avg Loss: 0.00657982, Time: 0.0610 Steps: 162800, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003832, Sample Num: 61312, Cur Loss: 0.00001181, Cur Avg Loss: 0.00027370, Log Avg loss: 0.00022711, Global Avg Loss: 0.00657202, Time: 0.0611 Steps: 163000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 004032, Sample Num: 64512, Cur Loss: 0.00006057, Cur Avg Loss: 0.00027608, Log Avg loss: 0.00032165, Global Avg Loss: 0.00656436, Time: 0.0702 Steps: 163200, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 004232, Sample Num: 67712, Cur Loss: 0.00001462, Cur Avg Loss: 0.00027462, Log Avg loss: 0.00024524, Global Avg Loss: 0.00655663, Time: 0.0612 Steps: 163400, Updated lr: 0.000034 Training, Epoch: 0033, Batch: 004432, Sample Num: 70912, Cur Loss: 0.00011807, Cur Avg Loss: 0.00027264, Log Avg loss: 0.00023065, Global Avg Loss: 0.00654890, Time: 0.0618 Steps: 163600, Updated lr: 0.000034 Training, Epoch: 0033, Batch: 004632, Sample Num: 74112, Cur Loss: 0.00000599, Cur Avg Loss: 0.00027763, Log Avg loss: 0.00038823, Global Avg Loss: 0.00654137, Time: 0.0611 Steps: 163800, Updated lr: 0.000034 Training, Epoch: 0033, Batch: 004832, Sample Num: 77312, Cur Loss: 0.00005630, Cur Avg Loss: 0.00028102, Log Avg loss: 0.00035964, Global Avg Loss: 0.00653384, Time: 0.1128 Steps: 164000, Updated lr: 0.000034 ***** Running evaluation checkpoint-164142 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-164142 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 402.153517, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001381, "eval_total_loss": 1.472254, "eval_acc": 0.999677, "eval_jaccard": 0.989011, "eval_prec": 0.990962, "eval_recall": 0.990632, "eval_f1": 0.990235, "eval_pr_auc": 0.995777, "eval_roc_auc": 0.999418, "eval_fmax": 0.994877, "eval_pmax": 0.997167, "eval_rmax": 0.992597, "eval_tmax": 0.24, "update_flag": false, "test_avg_loss": 0.001513, "test_total_loss": 1.612906, "test_acc": 0.999685, "test_jaccard": 0.988026, "test_prec": 0.989566, "test_recall": 0.989896, "test_f1": 0.989261, "test_pr_auc": 0.995049, "test_roc_auc": 0.999345, "test_fmax": 0.994387, "test_pmax": 0.996724, "test_rmax": 0.99206, "test_tmax": 0.24, "lr": 3.413726281792491e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0065284695884197195, "train_cur_epoch_loss": 1.4049379188806768, "train_cur_epoch_avg_loss": 0.00028245635683165997, "train_cur_epoch_time": 402.15351724624634, "train_cur_epoch_avg_time": 0.08085112932172223, "epoch": 33, "step": 164142} ################################################## Training, Epoch: 0034, Batch: 000058, Sample Num: 928, Cur Loss: 0.00027384, Cur Avg Loss: 0.00027296, Log Avg loss: 0.00031436, Global Avg Loss: 0.00652626, Time: 0.0609 Steps: 164200, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000258, Sample Num: 4128, Cur Loss: 0.00000838, Cur Avg Loss: 0.00027582, Log Avg loss: 0.00027665, Global Avg Loss: 0.00651866, Time: 0.0689 Steps: 164400, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000458, Sample Num: 7328, Cur Loss: 0.00000772, Cur Avg Loss: 0.00029935, Log Avg loss: 0.00032971, Global Avg Loss: 0.00651114, Time: 0.0630 Steps: 164600, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000658, Sample Num: 10528, Cur Loss: 0.00014478, Cur Avg Loss: 0.00033203, Log Avg loss: 0.00040687, Global Avg Loss: 0.00650373, Time: 0.0609 Steps: 164800, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000858, Sample Num: 13728, Cur Loss: 0.00003982, Cur Avg Loss: 0.00033332, Log Avg loss: 0.00033757, Global Avg Loss: 0.00649625, Time: 0.0558 Steps: 165000, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001058, Sample Num: 16928, Cur Loss: 0.00227171, Cur Avg Loss: 0.00031102, Log Avg loss: 0.00021532, Global Avg Loss: 0.00648865, Time: 0.1293 Steps: 165200, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001258, Sample Num: 20128, Cur Loss: 0.00002509, Cur Avg Loss: 0.00029077, Log Avg loss: 0.00018369, Global Avg Loss: 0.00648103, Time: 0.0606 Steps: 165400, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001458, Sample Num: 23328, Cur Loss: 0.00005478, Cur Avg Loss: 0.00027583, Log Avg loss: 0.00018180, Global Avg Loss: 0.00647342, Time: 0.0642 Steps: 165600, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001658, Sample Num: 26528, Cur Loss: 0.00006112, Cur Avg Loss: 0.00026219, Log Avg loss: 0.00016276, Global Avg Loss: 0.00646581, Time: 0.0608 Steps: 165800, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 001858, Sample Num: 29728, Cur Loss: 0.00008749, Cur Avg Loss: 0.00025557, Log Avg loss: 0.00020070, Global Avg Loss: 0.00645826, Time: 0.0685 Steps: 166000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002058, Sample Num: 32928, Cur Loss: 0.00003447, Cur Avg Loss: 0.00025715, Log Avg loss: 0.00027188, Global Avg Loss: 0.00645081, Time: 0.0608 Steps: 166200, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002258, Sample Num: 36128, Cur Loss: 0.00014063, Cur Avg Loss: 0.00027305, Log Avg loss: 0.00043667, Global Avg Loss: 0.00644359, Time: 0.1218 Steps: 166400, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002458, Sample Num: 39328, Cur Loss: 0.00001769, Cur Avg Loss: 0.00026807, Log Avg loss: 0.00021178, Global Avg Loss: 0.00643610, Time: 0.0596 Steps: 166600, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002658, Sample Num: 42528, Cur Loss: 0.00001199, Cur Avg Loss: 0.00026796, Log Avg loss: 0.00026666, Global Avg Loss: 0.00642871, Time: 0.0609 Steps: 166800, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002858, Sample Num: 45728, Cur Loss: 0.00006368, Cur Avg Loss: 0.00026896, Log Avg loss: 0.00028222, Global Avg Loss: 0.00642135, Time: 0.0613 Steps: 167000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003058, Sample Num: 48928, Cur Loss: 0.00001408, Cur Avg Loss: 0.00027466, Log Avg loss: 0.00035614, Global Avg Loss: 0.00641409, Time: 0.0625 Steps: 167200, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003258, Sample Num: 52128, Cur Loss: 0.00012300, Cur Avg Loss: 0.00026855, Log Avg loss: 0.00017504, Global Avg Loss: 0.00640664, Time: 0.0609 Steps: 167400, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003458, Sample Num: 55328, Cur Loss: 0.00224340, Cur Avg Loss: 0.00027011, Log Avg loss: 0.00029564, Global Avg Loss: 0.00639934, Time: 0.1146 Steps: 167600, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003658, Sample Num: 58528, Cur Loss: 0.00001074, Cur Avg Loss: 0.00027287, Log Avg loss: 0.00032058, Global Avg Loss: 0.00639210, Time: 0.1138 Steps: 167800, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003858, Sample Num: 61728, Cur Loss: 0.00021120, Cur Avg Loss: 0.00026941, Log Avg loss: 0.00020606, Global Avg Loss: 0.00638473, Time: 0.0930 Steps: 168000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 004058, Sample Num: 64928, Cur Loss: 0.00041150, Cur Avg Loss: 0.00026951, Log Avg loss: 0.00027138, Global Avg Loss: 0.00637747, Time: 0.0604 Steps: 168200, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004258, Sample Num: 68128, Cur Loss: 0.00000687, Cur Avg Loss: 0.00026953, Log Avg loss: 0.00026994, Global Avg Loss: 0.00637021, Time: 0.0653 Steps: 168400, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004458, Sample Num: 71328, Cur Loss: 0.00097394, Cur Avg Loss: 0.00026904, Log Avg loss: 0.00025865, Global Avg Loss: 0.00636296, Time: 0.0909 Steps: 168600, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004658, Sample Num: 74528, Cur Loss: 0.00258360, Cur Avg Loss: 0.00027155, Log Avg loss: 0.00032753, Global Avg Loss: 0.00635581, Time: 0.0608 Steps: 168800, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004858, Sample Num: 77728, Cur Loss: 0.00006844, Cur Avg Loss: 0.00027338, Log Avg loss: 0.00031599, Global Avg Loss: 0.00634866, Time: 0.0610 Steps: 169000, Updated lr: 0.000032 ***** Running evaluation checkpoint-169116 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-169116 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 365.128929, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.0014, "eval_total_loss": 1.491975, "eval_acc": 0.999681, "eval_jaccard": 0.988847, "eval_prec": 0.990743, "eval_recall": 0.990389, "eval_f1": 0.990019, "eval_pr_auc": 0.995768, "eval_roc_auc": 0.999414, "eval_fmax": 0.994937, "eval_pmax": 0.996251, "eval_rmax": 0.993627, "eval_tmax": 0.14, "update_flag": false, "test_avg_loss": 0.00152, "test_total_loss": 1.61991, "test_acc": 0.999689, "test_jaccard": 0.988175, "test_prec": 0.989861, "test_recall": 0.989974, "test_f1": 0.989421, "test_pr_auc": 0.994948, "test_roc_auc": 0.999338, "test_fmax": 0.994422, "test_pmax": 0.996271, "test_rmax": 0.99258, "test_tmax": 0.15, "lr": 3.2129188534517564e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006344542163020468, "train_cur_epoch_loss": 1.3675372589802635, "train_cur_epoch_avg_loss": 0.00027493712484524796, "train_cur_epoch_time": 365.1289293766022, "train_cur_epoch_avg_time": 0.07340750490080462, "epoch": 34, "step": 169116} ################################################## Training, Epoch: 0035, Batch: 000084, Sample Num: 1344, Cur Loss: 0.00020726, Cur Avg Loss: 0.00032124, Log Avg loss: 0.00033220, Global Avg Loss: 0.00634155, Time: 0.1336 Steps: 169200, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000284, Sample Num: 4544, Cur Loss: 0.00002511, Cur Avg Loss: 0.00026581, Log Avg loss: 0.00024253, Global Avg Loss: 0.00633435, Time: 0.0612 Steps: 169400, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000484, Sample Num: 7744, Cur Loss: 0.00000770, Cur Avg Loss: 0.00034000, Log Avg loss: 0.00044536, Global Avg Loss: 0.00632741, Time: 0.0606 Steps: 169600, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000684, Sample Num: 10944, Cur Loss: 0.00000988, Cur Avg Loss: 0.00032314, Log Avg loss: 0.00028233, Global Avg Loss: 0.00632029, Time: 0.0622 Steps: 169800, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000884, Sample Num: 14144, Cur Loss: 0.00002314, Cur Avg Loss: 0.00032228, Log Avg loss: 0.00031933, Global Avg Loss: 0.00631323, Time: 0.0549 Steps: 170000, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001084, Sample Num: 17344, Cur Loss: 0.00006972, Cur Avg Loss: 0.00030916, Log Avg loss: 0.00025115, Global Avg Loss: 0.00630610, Time: 0.1675 Steps: 170200, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001284, Sample Num: 20544, Cur Loss: 0.00028496, Cur Avg Loss: 0.00028364, Log Avg loss: 0.00014534, Global Avg Loss: 0.00629887, Time: 0.1675 Steps: 170400, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001484, Sample Num: 23744, Cur Loss: 0.00004428, Cur Avg Loss: 0.00026884, Log Avg loss: 0.00017380, Global Avg Loss: 0.00629169, Time: 0.0616 Steps: 170600, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001684, Sample Num: 26944, Cur Loss: 0.00000418, Cur Avg Loss: 0.00025308, Log Avg loss: 0.00013618, Global Avg Loss: 0.00628448, Time: 0.0600 Steps: 170800, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 001884, Sample Num: 30144, Cur Loss: 0.00003897, Cur Avg Loss: 0.00024826, Log Avg loss: 0.00020763, Global Avg Loss: 0.00627738, Time: 0.0381 Steps: 171000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002084, Sample Num: 33344, Cur Loss: 0.00023623, Cur Avg Loss: 0.00025438, Log Avg loss: 0.00031207, Global Avg Loss: 0.00627041, Time: 0.0482 Steps: 171200, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002284, Sample Num: 36544, Cur Loss: 0.00036041, Cur Avg Loss: 0.00026290, Log Avg loss: 0.00035166, Global Avg Loss: 0.00626350, Time: 0.0605 Steps: 171400, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002484, Sample Num: 39744, Cur Loss: 0.00008004, Cur Avg Loss: 0.00025628, Log Avg loss: 0.00018072, Global Avg Loss: 0.00625641, Time: 0.1232 Steps: 171600, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002684, Sample Num: 42944, Cur Loss: 0.00005312, Cur Avg Loss: 0.00026047, Log Avg loss: 0.00031246, Global Avg Loss: 0.00624949, Time: 0.0610 Steps: 171800, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002884, Sample Num: 46144, Cur Loss: 0.00002503, Cur Avg Loss: 0.00026706, Log Avg loss: 0.00035554, Global Avg Loss: 0.00624264, Time: 0.0624 Steps: 172000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003084, Sample Num: 49344, Cur Loss: 0.00012054, Cur Avg Loss: 0.00026647, Log Avg loss: 0.00025798, Global Avg Loss: 0.00623569, Time: 0.2038 Steps: 172200, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003284, Sample Num: 52544, Cur Loss: 0.00002004, Cur Avg Loss: 0.00026083, Log Avg loss: 0.00017388, Global Avg Loss: 0.00622866, Time: 0.0812 Steps: 172400, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003484, Sample Num: 55744, Cur Loss: 0.00000973, Cur Avg Loss: 0.00026036, Log Avg loss: 0.00025257, Global Avg Loss: 0.00622173, Time: 0.0611 Steps: 172600, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003684, Sample Num: 58944, Cur Loss: 0.00005156, Cur Avg Loss: 0.00026243, Log Avg loss: 0.00029848, Global Avg Loss: 0.00621487, Time: 0.1678 Steps: 172800, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003884, Sample Num: 62144, Cur Loss: 0.00002858, Cur Avg Loss: 0.00025811, Log Avg loss: 0.00017863, Global Avg Loss: 0.00620790, Time: 0.0608 Steps: 173000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 004084, Sample Num: 65344, Cur Loss: 0.00001652, Cur Avg Loss: 0.00025814, Log Avg loss: 0.00025855, Global Avg Loss: 0.00620103, Time: 0.0608 Steps: 173200, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004284, Sample Num: 68544, Cur Loss: 0.00004876, Cur Avg Loss: 0.00025896, Log Avg loss: 0.00027572, Global Avg Loss: 0.00619419, Time: 0.1659 Steps: 173400, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004484, Sample Num: 71744, Cur Loss: 0.00041912, Cur Avg Loss: 0.00025877, Log Avg loss: 0.00025476, Global Avg Loss: 0.00618735, Time: 0.0662 Steps: 173600, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004684, Sample Num: 74944, Cur Loss: 0.00015316, Cur Avg Loss: 0.00026442, Log Avg loss: 0.00039120, Global Avg Loss: 0.00618068, Time: 0.0613 Steps: 173800, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004884, Sample Num: 78144, Cur Loss: 0.00026274, Cur Avg Loss: 0.00026728, Log Avg loss: 0.00033421, Global Avg Loss: 0.00617396, Time: 0.0616 Steps: 174000, Updated lr: 0.000030 ***** Running evaluation checkpoint-174090 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-174090 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 395.164111, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001391, "eval_total_loss": 1.482534, "eval_acc": 0.999683, "eval_jaccard": 0.988888, "eval_prec": 0.990688, "eval_recall": 0.990421, "eval_f1": 0.99003, "eval_pr_auc": 0.995776, "eval_roc_auc": 0.999419, "eval_fmax": 0.994955, "eval_pmax": 0.997321, "eval_rmax": 0.9926, "eval_tmax": 0.27, "update_flag": false, "test_avg_loss": 0.001517, "test_total_loss": 1.617513, "test_acc": 0.999687, "test_jaccard": 0.988233, "test_prec": 0.989886, "test_recall": 0.990029, "test_f1": 0.989475, "test_pr_auc": 0.995139, "test_roc_auc": 0.999355, "test_fmax": 0.99451, "test_pmax": 0.996815, "test_rmax": 0.992216, "test_tmax": 0.23, "lr": 3.0121114251110216e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006170955210197981, "train_cur_epoch_loss": 1.338000101997892, "train_cur_epoch_avg_loss": 0.0002689988142335931, "train_cur_epoch_time": 395.16411089897156, "train_cur_epoch_avg_time": 0.07944594107337587, "epoch": 35, "step": 174090} ################################################## Training, Epoch: 0036, Batch: 000110, Sample Num: 1760, Cur Loss: 0.00000063, Cur Avg Loss: 0.00032226, Log Avg loss: 0.00034022, Global Avg Loss: 0.00616726, Time: 0.0602 Steps: 174200, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000310, Sample Num: 4960, Cur Loss: 0.00051729, Cur Avg Loss: 0.00028968, Log Avg loss: 0.00027176, Global Avg Loss: 0.00616050, Time: 0.0601 Steps: 174400, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000510, Sample Num: 8160, Cur Loss: 0.00001251, Cur Avg Loss: 0.00034743, Log Avg loss: 0.00043695, Global Avg Loss: 0.00615394, Time: 0.0609 Steps: 174600, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000710, Sample Num: 11360, Cur Loss: 0.00007860, Cur Avg Loss: 0.00032415, Log Avg loss: 0.00026477, Global Avg Loss: 0.00614721, Time: 0.1419 Steps: 174800, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000910, Sample Num: 14560, Cur Loss: 0.00000388, Cur Avg Loss: 0.00032072, Log Avg loss: 0.00030856, Global Avg Loss: 0.00614053, Time: 0.0946 Steps: 175000, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001110, Sample Num: 17760, Cur Loss: 0.00000188, Cur Avg Loss: 0.00029470, Log Avg loss: 0.00017632, Global Avg Loss: 0.00613373, Time: 0.0609 Steps: 175200, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001310, Sample Num: 20960, Cur Loss: 0.00000733, Cur Avg Loss: 0.00027416, Log Avg loss: 0.00016016, Global Avg Loss: 0.00612691, Time: 0.0607 Steps: 175400, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001510, Sample Num: 24160, Cur Loss: 0.00004097, Cur Avg Loss: 0.00025825, Log Avg loss: 0.00015404, Global Avg Loss: 0.00612011, Time: 0.0685 Steps: 175600, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001710, Sample Num: 27360, Cur Loss: 0.00002577, Cur Avg Loss: 0.00024798, Log Avg loss: 0.00017042, Global Avg Loss: 0.00611334, Time: 0.0654 Steps: 175800, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 001910, Sample Num: 30560, Cur Loss: 0.00002132, Cur Avg Loss: 0.00024100, Log Avg loss: 0.00018136, Global Avg Loss: 0.00610660, Time: 0.1144 Steps: 176000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002110, Sample Num: 33760, Cur Loss: 0.00018108, Cur Avg Loss: 0.00025019, Log Avg loss: 0.00033787, Global Avg Loss: 0.00610005, Time: 0.1237 Steps: 176200, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002310, Sample Num: 36960, Cur Loss: 0.00000303, Cur Avg Loss: 0.00025550, Log Avg loss: 0.00031159, Global Avg Loss: 0.00609349, Time: 0.0600 Steps: 176400, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002510, Sample Num: 40160, Cur Loss: 0.00000591, Cur Avg Loss: 0.00025019, Log Avg loss: 0.00018889, Global Avg Loss: 0.00608680, Time: 0.0625 Steps: 176600, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002710, Sample Num: 43360, Cur Loss: 0.00001586, Cur Avg Loss: 0.00025344, Log Avg loss: 0.00029412, Global Avg Loss: 0.00608025, Time: 0.1178 Steps: 176800, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002910, Sample Num: 46560, Cur Loss: 0.00034261, Cur Avg Loss: 0.00025782, Log Avg loss: 0.00031720, Global Avg Loss: 0.00607374, Time: 0.0607 Steps: 177000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003110, Sample Num: 49760, Cur Loss: 0.00002942, Cur Avg Loss: 0.00025765, Log Avg loss: 0.00025526, Global Avg Loss: 0.00606717, Time: 0.0962 Steps: 177200, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003310, Sample Num: 52960, Cur Loss: 0.00005146, Cur Avg Loss: 0.00025432, Log Avg loss: 0.00020244, Global Avg Loss: 0.00606056, Time: 0.0662 Steps: 177400, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003510, Sample Num: 56160, Cur Loss: 0.00729232, Cur Avg Loss: 0.00025442, Log Avg loss: 0.00025611, Global Avg Loss: 0.00605402, Time: 0.1076 Steps: 177600, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003710, Sample Num: 59360, Cur Loss: 0.00013327, Cur Avg Loss: 0.00025611, Log Avg loss: 0.00028584, Global Avg Loss: 0.00604754, Time: 0.0577 Steps: 177800, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003910, Sample Num: 62560, Cur Loss: 0.00002337, Cur Avg Loss: 0.00025003, Log Avg loss: 0.00013716, Global Avg Loss: 0.00604089, Time: 0.0265 Steps: 178000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 004110, Sample Num: 65760, Cur Loss: 0.00019094, Cur Avg Loss: 0.00025088, Log Avg loss: 0.00026758, Global Avg Loss: 0.00603441, Time: 0.1119 Steps: 178200, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004310, Sample Num: 68960, Cur Loss: 0.00011366, Cur Avg Loss: 0.00025287, Log Avg loss: 0.00029379, Global Avg Loss: 0.00602798, Time: 0.0630 Steps: 178400, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004510, Sample Num: 72160, Cur Loss: 0.00000379, Cur Avg Loss: 0.00025488, Log Avg loss: 0.00029805, Global Avg Loss: 0.00602156, Time: 0.0619 Steps: 178600, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004710, Sample Num: 75360, Cur Loss: 0.00002854, Cur Avg Loss: 0.00025943, Log Avg loss: 0.00036210, Global Avg Loss: 0.00601523, Time: 0.0607 Steps: 178800, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004910, Sample Num: 78560, Cur Loss: 0.00004135, Cur Avg Loss: 0.00025987, Log Avg loss: 0.00027026, Global Avg Loss: 0.00600881, Time: 0.0622 Steps: 179000, Updated lr: 0.000028 ***** Running evaluation checkpoint-179064 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-179064 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 376.550750, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001406, "eval_total_loss": 1.499004, "eval_acc": 0.999687, "eval_jaccard": 0.989184, "eval_prec": 0.990918, "eval_recall": 0.990752, "eval_f1": 0.990301, "eval_pr_auc": 0.995738, "eval_roc_auc": 0.999414, "eval_fmax": 0.99487, "eval_pmax": 0.997576, "eval_rmax": 0.992179, "eval_tmax": 0.31, "update_flag": false, "test_avg_loss": 0.001523, "test_total_loss": 1.623556, "test_acc": 0.999687, "test_jaccard": 0.98826, "test_prec": 0.989886, "test_recall": 0.989998, "test_f1": 0.989469, "test_pr_auc": 0.99516, "test_roc_auc": 0.999351, "test_fmax": 0.994531, "test_pmax": 0.996423, "test_rmax": 0.992646, "test_tmax": 0.21, "lr": 2.811303996770287e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006006814587975796, "train_cur_epoch_loss": 1.3026548379320104, "train_cur_epoch_avg_loss": 0.0002618928101994392, "train_cur_epoch_time": 376.550749540329, "train_cur_epoch_avg_time": 0.07570380971860254, "epoch": 36, "step": 179064} ################################################## Training, Epoch: 0037, Batch: 000136, Sample Num: 2176, Cur Loss: 0.00023113, Cur Avg Loss: 0.00028984, Log Avg loss: 0.00033051, Global Avg Loss: 0.00600248, Time: 0.0605 Steps: 179200, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000336, Sample Num: 5376, Cur Loss: 0.00000301, Cur Avg Loss: 0.00028163, Log Avg loss: 0.00027604, Global Avg Loss: 0.00599609, Time: 0.0475 Steps: 179400, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000536, Sample Num: 8576, Cur Loss: 0.00002364, Cur Avg Loss: 0.00033769, Log Avg loss: 0.00043189, Global Avg Loss: 0.00598990, Time: 0.1096 Steps: 179600, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000736, Sample Num: 11776, Cur Loss: 0.00001687, Cur Avg Loss: 0.00033085, Log Avg loss: 0.00031252, Global Avg Loss: 0.00598358, Time: 0.0689 Steps: 179800, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000936, Sample Num: 14976, Cur Loss: 0.00006148, Cur Avg Loss: 0.00032045, Log Avg loss: 0.00028217, Global Avg Loss: 0.00597725, Time: 0.0603 Steps: 180000, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 001136, Sample Num: 18176, Cur Loss: 0.00004758, Cur Avg Loss: 0.00029669, Log Avg loss: 0.00018549, Global Avg Loss: 0.00597082, Time: 0.1177 Steps: 180200, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 001336, Sample Num: 21376, Cur Loss: 0.00028951, Cur Avg Loss: 0.00027989, Log Avg loss: 0.00018449, Global Avg Loss: 0.00596440, Time: 0.0915 Steps: 180400, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 001536, Sample Num: 24576, Cur Loss: 0.00002858, Cur Avg Loss: 0.00025871, Log Avg loss: 0.00011720, Global Avg Loss: 0.00595793, Time: 0.1012 Steps: 180600, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 001736, Sample Num: 27776, Cur Loss: 0.00001998, Cur Avg Loss: 0.00024887, Log Avg loss: 0.00017327, Global Avg Loss: 0.00595153, Time: 0.0856 Steps: 180800, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 001936, Sample Num: 30976, Cur Loss: 0.00008286, Cur Avg Loss: 0.00023619, Log Avg loss: 0.00012617, Global Avg Loss: 0.00594509, Time: 0.0917 Steps: 181000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002136, Sample Num: 34176, Cur Loss: 0.00016835, Cur Avg Loss: 0.00024723, Log Avg loss: 0.00035407, Global Avg Loss: 0.00593892, Time: 0.0606 Steps: 181200, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002336, Sample Num: 37376, Cur Loss: 0.00028035, Cur Avg Loss: 0.00025495, Log Avg loss: 0.00033747, Global Avg Loss: 0.00593274, Time: 0.0668 Steps: 181400, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002536, Sample Num: 40576, Cur Loss: 0.00002237, Cur Avg Loss: 0.00024859, Log Avg loss: 0.00017420, Global Avg Loss: 0.00592640, Time: 0.0599 Steps: 181600, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002736, Sample Num: 43776, Cur Loss: 0.00000486, Cur Avg Loss: 0.00025027, Log Avg loss: 0.00027159, Global Avg Loss: 0.00592018, Time: 0.0706 Steps: 181800, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002936, Sample Num: 46976, Cur Loss: 0.00002003, Cur Avg Loss: 0.00025170, Log Avg loss: 0.00027124, Global Avg Loss: 0.00591397, Time: 0.1152 Steps: 182000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003136, Sample Num: 50176, Cur Loss: 0.00001035, Cur Avg Loss: 0.00025204, Log Avg loss: 0.00025709, Global Avg Loss: 0.00590776, Time: 0.0687 Steps: 182200, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003336, Sample Num: 53376, Cur Loss: 0.00001845, Cur Avg Loss: 0.00024972, Log Avg loss: 0.00021331, Global Avg Loss: 0.00590152, Time: 0.0608 Steps: 182400, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003536, Sample Num: 56576, Cur Loss: 0.00004190, Cur Avg Loss: 0.00024910, Log Avg loss: 0.00023882, Global Avg Loss: 0.00589532, Time: 0.0600 Steps: 182600, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003736, Sample Num: 59776, Cur Loss: 0.00003490, Cur Avg Loss: 0.00024773, Log Avg loss: 0.00022340, Global Avg Loss: 0.00588911, Time: 0.1650 Steps: 182800, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003936, Sample Num: 62976, Cur Loss: 0.00002937, Cur Avg Loss: 0.00024374, Log Avg loss: 0.00016939, Global Avg Loss: 0.00588286, Time: 0.0659 Steps: 183000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 004136, Sample Num: 66176, Cur Loss: 0.00006301, Cur Avg Loss: 0.00024674, Log Avg loss: 0.00030570, Global Avg Loss: 0.00587677, Time: 0.0603 Steps: 183200, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004336, Sample Num: 69376, Cur Loss: 0.00000725, Cur Avg Loss: 0.00024648, Log Avg loss: 0.00024118, Global Avg Loss: 0.00587063, Time: 0.0598 Steps: 183400, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004536, Sample Num: 72576, Cur Loss: 0.00008904, Cur Avg Loss: 0.00025315, Log Avg loss: 0.00039764, Global Avg Loss: 0.00586467, Time: 0.1256 Steps: 183600, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004736, Sample Num: 75776, Cur Loss: 0.00015245, Cur Avg Loss: 0.00025182, Log Avg loss: 0.00022172, Global Avg Loss: 0.00585852, Time: 0.0633 Steps: 183800, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004936, Sample Num: 78976, Cur Loss: 0.00006617, Cur Avg Loss: 0.00025467, Log Avg loss: 0.00032207, Global Avg Loss: 0.00585251, Time: 0.1031 Steps: 184000, Updated lr: 0.000026 ***** Running evaluation checkpoint-184038 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-184038 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 399.089084, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001413, "eval_total_loss": 1.506606, "eval_acc": 0.999688, "eval_jaccard": 0.989113, "eval_prec": 0.990979, "eval_recall": 0.99054, "eval_f1": 0.990245, "eval_pr_auc": 0.995714, "eval_roc_auc": 0.999408, "eval_fmax": 0.994981, "eval_pmax": 0.997313, "eval_rmax": 0.992661, "eval_tmax": 0.24, "update_flag": false, "test_avg_loss": 0.001519, "test_total_loss": 1.619718, "test_acc": 0.999697, "test_jaccard": 0.988326, "test_prec": 0.9899, "test_recall": 0.990086, "test_f1": 0.989524, "test_pr_auc": 0.99518, "test_roc_auc": 0.999357, "test_fmax": 0.994617, "test_pmax": 0.996912, "test_rmax": 0.992334, "test_tmax": 0.21, "lr": 2.610496568429552e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005851404229386506, "train_cur_epoch_loss": 1.276484186536976, "train_cur_epoch_avg_loss": 0.00025663132017229113, "train_cur_epoch_time": 399.0890836715698, "train_cur_epoch_avg_time": 0.08023503893678524, "epoch": 37, "step": 184038} ################################################## Training, Epoch: 0038, Batch: 000162, Sample Num: 2592, Cur Loss: 0.00119951, Cur Avg Loss: 0.00021055, Log Avg loss: 0.00026776, Global Avg Loss: 0.00584644, Time: 0.0683 Steps: 184200, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000362, Sample Num: 5792, Cur Loss: 0.00001956, Cur Avg Loss: 0.00025517, Log Avg loss: 0.00029132, Global Avg Loss: 0.00584042, Time: 0.0620 Steps: 184400, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000562, Sample Num: 8992, Cur Loss: 0.00006718, Cur Avg Loss: 0.00031212, Log Avg loss: 0.00041519, Global Avg Loss: 0.00583454, Time: 0.1243 Steps: 184600, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000762, Sample Num: 12192, Cur Loss: 0.00001358, Cur Avg Loss: 0.00031924, Log Avg loss: 0.00033926, Global Avg Loss: 0.00582859, Time: 0.0575 Steps: 184800, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000962, Sample Num: 15392, Cur Loss: 0.00000237, Cur Avg Loss: 0.00029966, Log Avg loss: 0.00022503, Global Avg Loss: 0.00582254, Time: 0.0620 Steps: 185000, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 001162, Sample Num: 18592, Cur Loss: 0.00001437, Cur Avg Loss: 0.00028020, Log Avg loss: 0.00018662, Global Avg Loss: 0.00581645, Time: 0.0607 Steps: 185200, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 001362, Sample Num: 21792, Cur Loss: 0.00003694, Cur Avg Loss: 0.00026732, Log Avg loss: 0.00019249, Global Avg Loss: 0.00581038, Time: 0.0659 Steps: 185400, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 001562, Sample Num: 24992, Cur Loss: 0.00177598, Cur Avg Loss: 0.00024872, Log Avg loss: 0.00012206, Global Avg Loss: 0.00580425, Time: 0.0621 Steps: 185600, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 001762, Sample Num: 28192, Cur Loss: 0.00003993, Cur Avg Loss: 0.00023672, Log Avg loss: 0.00014294, Global Avg Loss: 0.00579816, Time: 0.0602 Steps: 185800, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 001962, Sample Num: 31392, Cur Loss: 0.00000051, Cur Avg Loss: 0.00022733, Log Avg loss: 0.00014462, Global Avg Loss: 0.00579208, Time: 0.1643 Steps: 186000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002162, Sample Num: 34592, Cur Loss: 0.00002362, Cur Avg Loss: 0.00023743, Log Avg loss: 0.00033653, Global Avg Loss: 0.00578622, Time: 0.0646 Steps: 186200, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002362, Sample Num: 37792, Cur Loss: 0.00001882, Cur Avg Loss: 0.00024165, Log Avg loss: 0.00028722, Global Avg Loss: 0.00578032, Time: 0.0891 Steps: 186400, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002562, Sample Num: 40992, Cur Loss: 0.00002478, Cur Avg Loss: 0.00023439, Log Avg loss: 0.00014872, Global Avg Loss: 0.00577428, Time: 0.0693 Steps: 186600, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002762, Sample Num: 44192, Cur Loss: 0.00007876, Cur Avg Loss: 0.00024465, Log Avg loss: 0.00037601, Global Avg Loss: 0.00576850, Time: 0.0576 Steps: 186800, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002962, Sample Num: 47392, Cur Loss: 0.00002361, Cur Avg Loss: 0.00024715, Log Avg loss: 0.00028176, Global Avg Loss: 0.00576264, Time: 0.0610 Steps: 187000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003162, Sample Num: 50592, Cur Loss: 0.00000988, Cur Avg Loss: 0.00024383, Log Avg loss: 0.00019460, Global Avg Loss: 0.00575669, Time: 0.0267 Steps: 187200, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003362, Sample Num: 53792, Cur Loss: 0.00003125, Cur Avg Loss: 0.00024200, Log Avg loss: 0.00021312, Global Avg Loss: 0.00575077, Time: 0.0683 Steps: 187400, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003562, Sample Num: 56992, Cur Loss: 0.00015521, Cur Avg Loss: 0.00024209, Log Avg loss: 0.00024361, Global Avg Loss: 0.00574490, Time: 0.1124 Steps: 187600, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003762, Sample Num: 60192, Cur Loss: 0.00000432, Cur Avg Loss: 0.00024112, Log Avg loss: 0.00022381, Global Avg Loss: 0.00573902, Time: 0.0535 Steps: 187800, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003962, Sample Num: 63392, Cur Loss: 0.00002960, Cur Avg Loss: 0.00024037, Log Avg loss: 0.00022627, Global Avg Loss: 0.00573315, Time: 0.0603 Steps: 188000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 004162, Sample Num: 66592, Cur Loss: 0.00001883, Cur Avg Loss: 0.00023919, Log Avg loss: 0.00021581, Global Avg Loss: 0.00572729, Time: 0.0684 Steps: 188200, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004362, Sample Num: 69792, Cur Loss: 0.00004232, Cur Avg Loss: 0.00023824, Log Avg loss: 0.00021850, Global Avg Loss: 0.00572144, Time: 0.0609 Steps: 188400, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004562, Sample Num: 72992, Cur Loss: 0.00000347, Cur Avg Loss: 0.00024295, Log Avg loss: 0.00034565, Global Avg Loss: 0.00571574, Time: 0.0574 Steps: 188600, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004762, Sample Num: 76192, Cur Loss: 0.00054049, Cur Avg Loss: 0.00024274, Log Avg loss: 0.00023786, Global Avg Loss: 0.00570994, Time: 0.1608 Steps: 188800, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004962, Sample Num: 79392, Cur Loss: 0.00001737, Cur Avg Loss: 0.00024571, Log Avg loss: 0.00031654, Global Avg Loss: 0.00570423, Time: 0.0606 Steps: 189000, Updated lr: 0.000024 ***** Running evaluation checkpoint-189012 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-189012 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 381.243289, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.00141, "eval_total_loss": 1.503273, "eval_acc": 0.999684, "eval_jaccard": 0.989072, "eval_prec": 0.990837, "eval_recall": 0.990539, "eval_f1": 0.990176, "eval_pr_auc": 0.995764, "eval_roc_auc": 0.99942, "eval_fmax": 0.994967, "eval_pmax": 0.997065, "eval_rmax": 0.992878, "eval_tmax": 0.22, "update_flag": false, "test_avg_loss": 0.001515, "test_total_loss": 1.614496, "test_acc": 0.999693, "test_jaccard": 0.988281, "test_prec": 0.9898, "test_recall": 0.990109, "test_f1": 0.989484, "test_pr_auc": 0.995036, "test_roc_auc": 0.999354, "test_fmax": 0.994585, "test_pmax": 0.99646, "test_rmax": 0.992717, "test_tmax": 0.18, "lr": 2.4096891400888173e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005703882417452362, "train_cur_epoch_loss": 1.2214919196715002, "train_cur_epoch_avg_loss": 0.0002455753758889224, "train_cur_epoch_time": 381.2432894706726, "train_cur_epoch_avg_time": 0.07664722345610628, "epoch": 38, "step": 189012} ################################################## Training, Epoch: 0039, Batch: 000188, Sample Num: 3008, Cur Loss: 0.00001745, Cur Avg Loss: 0.00029687, Log Avg loss: 0.00029044, Global Avg Loss: 0.00569851, Time: 0.0608 Steps: 189200, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000388, Sample Num: 6208, Cur Loss: 0.00000254, Cur Avg Loss: 0.00026631, Log Avg loss: 0.00023758, Global Avg Loss: 0.00569274, Time: 0.0602 Steps: 189400, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000588, Sample Num: 9408, Cur Loss: 0.00005056, Cur Avg Loss: 0.00031703, Log Avg loss: 0.00041543, Global Avg Loss: 0.00568718, Time: 0.1196 Steps: 189600, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000788, Sample Num: 12608, Cur Loss: 0.00006034, Cur Avg Loss: 0.00031059, Log Avg loss: 0.00029165, Global Avg Loss: 0.00568149, Time: 0.2241 Steps: 189800, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000988, Sample Num: 15808, Cur Loss: 0.00001653, Cur Avg Loss: 0.00028746, Log Avg loss: 0.00019634, Global Avg Loss: 0.00567572, Time: 0.0610 Steps: 190000, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 001188, Sample Num: 19008, Cur Loss: 0.00002265, Cur Avg Loss: 0.00027515, Log Avg loss: 0.00021431, Global Avg Loss: 0.00566997, Time: 0.1232 Steps: 190200, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 001388, Sample Num: 22208, Cur Loss: 0.00012560, Cur Avg Loss: 0.00025758, Log Avg loss: 0.00015322, Global Avg Loss: 0.00566418, Time: 0.0621 Steps: 190400, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 001588, Sample Num: 25408, Cur Loss: 0.00001529, Cur Avg Loss: 0.00023846, Log Avg loss: 0.00010577, Global Avg Loss: 0.00565835, Time: 0.0934 Steps: 190600, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 001788, Sample Num: 28608, Cur Loss: 0.00001319, Cur Avg Loss: 0.00022993, Log Avg loss: 0.00016223, Global Avg Loss: 0.00565259, Time: 0.0609 Steps: 190800, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 001988, Sample Num: 31808, Cur Loss: 0.00014578, Cur Avg Loss: 0.00022481, Log Avg loss: 0.00017904, Global Avg Loss: 0.00564685, Time: 0.0646 Steps: 191000, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002188, Sample Num: 35008, Cur Loss: 0.00008845, Cur Avg Loss: 0.00023849, Log Avg loss: 0.00037448, Global Avg Loss: 0.00564134, Time: 0.0671 Steps: 191200, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002388, Sample Num: 38208, Cur Loss: 0.00001430, Cur Avg Loss: 0.00024424, Log Avg loss: 0.00030714, Global Avg Loss: 0.00563577, Time: 0.0590 Steps: 191400, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002588, Sample Num: 41408, Cur Loss: 0.00002653, Cur Avg Loss: 0.00023946, Log Avg loss: 0.00018241, Global Avg Loss: 0.00563007, Time: 0.0790 Steps: 191600, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002788, Sample Num: 44608, Cur Loss: 0.00075745, Cur Avg Loss: 0.00024485, Log Avg loss: 0.00031460, Global Avg Loss: 0.00562453, Time: 0.1114 Steps: 191800, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002988, Sample Num: 47808, Cur Loss: 0.00003711, Cur Avg Loss: 0.00024924, Log Avg loss: 0.00031040, Global Avg Loss: 0.00561899, Time: 0.1187 Steps: 192000, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003188, Sample Num: 51008, Cur Loss: 0.00002446, Cur Avg Loss: 0.00024558, Log Avg loss: 0.00019091, Global Avg Loss: 0.00561335, Time: 0.0624 Steps: 192200, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003388, Sample Num: 54208, Cur Loss: 0.00001911, Cur Avg Loss: 0.00024224, Log Avg loss: 0.00018900, Global Avg Loss: 0.00560771, Time: 0.0649 Steps: 192400, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003588, Sample Num: 57408, Cur Loss: 0.00003790, Cur Avg Loss: 0.00024319, Log Avg loss: 0.00025927, Global Avg Loss: 0.00560215, Time: 0.0608 Steps: 192600, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003788, Sample Num: 60608, Cur Loss: 0.00001367, Cur Avg Loss: 0.00024234, Log Avg loss: 0.00022713, Global Avg Loss: 0.00559658, Time: 0.1356 Steps: 192800, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003988, Sample Num: 63808, Cur Loss: 0.00000732, Cur Avg Loss: 0.00024086, Log Avg loss: 0.00021270, Global Avg Loss: 0.00559100, Time: 0.0621 Steps: 193000, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004188, Sample Num: 67008, Cur Loss: 0.00006199, Cur Avg Loss: 0.00024102, Log Avg loss: 0.00024427, Global Avg Loss: 0.00558546, Time: 0.0694 Steps: 193200, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004388, Sample Num: 70208, Cur Loss: 0.00000666, Cur Avg Loss: 0.00023994, Log Avg loss: 0.00021727, Global Avg Loss: 0.00557991, Time: 0.1442 Steps: 193400, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004588, Sample Num: 73408, Cur Loss: 0.00001093, Cur Avg Loss: 0.00024341, Log Avg loss: 0.00031957, Global Avg Loss: 0.00557448, Time: 0.0611 Steps: 193600, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004788, Sample Num: 76608, Cur Loss: 0.00000104, Cur Avg Loss: 0.00024222, Log Avg loss: 0.00021488, Global Avg Loss: 0.00556895, Time: 0.0602 Steps: 193800, Updated lr: 0.000022 ***** Running evaluation checkpoint-193986 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-193986 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 392.496329, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001407, "eval_total_loss": 1.500322, "eval_acc": 0.999684, "eval_jaccard": 0.989027, "eval_prec": 0.990822, "eval_recall": 0.99051, "eval_f1": 0.990143, "eval_pr_auc": 0.995791, "eval_roc_auc": 0.999421, "eval_fmax": 0.994958, "eval_pmax": 0.996555, "eval_rmax": 0.993366, "eval_tmax": 0.15, "update_flag": false, "test_avg_loss": 0.001526, "test_total_loss": 1.627237, "test_acc": 0.999692, "test_jaccard": 0.988334, "test_prec": 0.989879, "test_recall": 0.990156, "test_f1": 0.989548, "test_pr_auc": 0.995032, "test_roc_auc": 0.999346, "test_fmax": 0.994513, "test_pmax": 0.996041, "test_rmax": 0.99299, "test_tmax": 0.16, "lr": 2.2088817117480825e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005563907395121173, "train_cur_epoch_loss": 1.217916462470356, "train_cur_epoch_avg_loss": 0.0002448565465360587, "train_cur_epoch_time": 392.4963290691376, "train_cur_epoch_avg_time": 0.07890959571152746, "epoch": 39, "step": 193986} ################################################## Training, Epoch: 0040, Batch: 000014, Sample Num: 224, Cur Loss: 0.00037209, Cur Avg Loss: 0.00006713, Log Avg loss: 0.00029561, Global Avg Loss: 0.00556351, Time: 0.0595 Steps: 194000, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000214, Sample Num: 3424, Cur Loss: 0.00002103, Cur Avg Loss: 0.00025796, Log Avg loss: 0.00027132, Global Avg Loss: 0.00555806, Time: 0.1499 Steps: 194200, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000414, Sample Num: 6624, Cur Loss: 0.00001001, Cur Avg Loss: 0.00025427, Log Avg loss: 0.00025033, Global Avg Loss: 0.00555260, Time: 0.0610 Steps: 194400, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000614, Sample Num: 9824, Cur Loss: 0.00001612, Cur Avg Loss: 0.00029825, Log Avg loss: 0.00038928, Global Avg Loss: 0.00554729, Time: 0.0590 Steps: 194600, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000814, Sample Num: 13024, Cur Loss: 0.00011120, Cur Avg Loss: 0.00030857, Log Avg loss: 0.00034025, Global Avg Loss: 0.00554195, Time: 0.1157 Steps: 194800, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001014, Sample Num: 16224, Cur Loss: 0.00176775, Cur Avg Loss: 0.00028622, Log Avg loss: 0.00019524, Global Avg Loss: 0.00553646, Time: 0.1345 Steps: 195000, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001214, Sample Num: 19424, Cur Loss: 0.00034625, Cur Avg Loss: 0.00027174, Log Avg loss: 0.00019832, Global Avg Loss: 0.00553099, Time: 0.0933 Steps: 195200, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001414, Sample Num: 22624, Cur Loss: 0.00009938, Cur Avg Loss: 0.00025417, Log Avg loss: 0.00014753, Global Avg Loss: 0.00552548, Time: 0.2037 Steps: 195400, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001614, Sample Num: 25824, Cur Loss: 0.00000613, Cur Avg Loss: 0.00023622, Log Avg loss: 0.00010932, Global Avg Loss: 0.00551995, Time: 0.0578 Steps: 195600, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 001814, Sample Num: 29024, Cur Loss: 0.00330941, Cur Avg Loss: 0.00023046, Log Avg loss: 0.00018400, Global Avg Loss: 0.00551450, Time: 0.0632 Steps: 195800, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002014, Sample Num: 32224, Cur Loss: 0.00001371, Cur Avg Loss: 0.00022546, Log Avg loss: 0.00018014, Global Avg Loss: 0.00550905, Time: 0.1161 Steps: 196000, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002214, Sample Num: 35424, Cur Loss: 0.00005645, Cur Avg Loss: 0.00023937, Log Avg loss: 0.00037936, Global Avg Loss: 0.00550382, Time: 0.0684 Steps: 196200, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002414, Sample Num: 38624, Cur Loss: 0.00004930, Cur Avg Loss: 0.00023980, Log Avg loss: 0.00024462, Global Avg Loss: 0.00549847, Time: 0.0606 Steps: 196400, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002614, Sample Num: 41824, Cur Loss: 0.00001516, Cur Avg Loss: 0.00023941, Log Avg loss: 0.00023463, Global Avg Loss: 0.00549311, Time: 0.0616 Steps: 196600, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002814, Sample Num: 45024, Cur Loss: 0.00000409, Cur Avg Loss: 0.00024463, Log Avg loss: 0.00031290, Global Avg Loss: 0.00548785, Time: 0.0607 Steps: 196800, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003014, Sample Num: 48224, Cur Loss: 0.00000435, Cur Avg Loss: 0.00024843, Log Avg loss: 0.00030183, Global Avg Loss: 0.00548258, Time: 0.0645 Steps: 197000, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003214, Sample Num: 51424, Cur Loss: 0.00002249, Cur Avg Loss: 0.00024235, Log Avg loss: 0.00015080, Global Avg Loss: 0.00547718, Time: 0.0598 Steps: 197200, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003414, Sample Num: 54624, Cur Loss: 0.00000886, Cur Avg Loss: 0.00023898, Log Avg loss: 0.00018485, Global Avg Loss: 0.00547181, Time: 0.1114 Steps: 197400, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003614, Sample Num: 57824, Cur Loss: 0.00003420, Cur Avg Loss: 0.00023800, Log Avg loss: 0.00022115, Global Avg Loss: 0.00546650, Time: 0.0601 Steps: 197600, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003814, Sample Num: 61024, Cur Loss: 0.00010039, Cur Avg Loss: 0.00023493, Log Avg loss: 0.00017963, Global Avg Loss: 0.00546115, Time: 0.0622 Steps: 197800, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 004014, Sample Num: 64224, Cur Loss: 0.00000459, Cur Avg Loss: 0.00023421, Log Avg loss: 0.00022046, Global Avg Loss: 0.00545586, Time: 0.0335 Steps: 198000, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004214, Sample Num: 67424, Cur Loss: 0.00012854, Cur Avg Loss: 0.00023371, Log Avg loss: 0.00022352, Global Avg Loss: 0.00545058, Time: 0.0507 Steps: 198200, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004414, Sample Num: 70624, Cur Loss: 0.00000246, Cur Avg Loss: 0.00023235, Log Avg loss: 0.00020382, Global Avg Loss: 0.00544529, Time: 0.0237 Steps: 198400, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004614, Sample Num: 73824, Cur Loss: 0.00116630, Cur Avg Loss: 0.00023813, Log Avg loss: 0.00036568, Global Avg Loss: 0.00544018, Time: 0.0931 Steps: 198600, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004814, Sample Num: 77024, Cur Loss: 0.00019573, Cur Avg Loss: 0.00024171, Log Avg loss: 0.00032430, Global Avg Loss: 0.00543503, Time: 0.0588 Steps: 198800, Updated lr: 0.000020 ***** Running evaluation checkpoint-198960 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-198960 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 367.685806, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001402, "eval_total_loss": 1.494605, "eval_acc": 0.999683, "eval_jaccard": 0.989074, "eval_prec": 0.990863, "eval_recall": 0.990617, "eval_f1": 0.99022, "eval_pr_auc": 0.995824, "eval_roc_auc": 0.999426, "eval_fmax": 0.995072, "eval_pmax": 0.996309, "eval_rmax": 0.993837, "eval_tmax": 0.12, "update_flag": false, "test_avg_loss": 0.001526, "test_total_loss": 1.627002, "test_acc": 0.999688, "test_jaccard": 0.98818, "test_prec": 0.989703, "test_recall": 0.990122, "test_f1": 0.98942, "test_pr_auc": 0.995171, "test_roc_auc": 0.999362, "test_fmax": 0.994602, "test_pmax": 0.997336, "test_rmax": 0.991884, "test_tmax": 0.32, "lr": 2.0080742834073476e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005430861490668808, "train_cur_epoch_loss": 1.2040622334903048, "train_cur_epoch_avg_loss": 0.00024207121702659928, "train_cur_epoch_time": 367.68580627441406, "train_cur_epoch_avg_time": 0.0739215533322103, "epoch": 40, "step": 198960} ################################################## Training, Epoch: 0041, Batch: 000040, Sample Num: 640, Cur Loss: 0.00006796, Cur Avg Loss: 0.00015389, Log Avg loss: 0.00023310, Global Avg Loss: 0.00542980, Time: 0.0614 Steps: 199000, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00000971, Cur Avg Loss: 0.00022721, Log Avg loss: 0.00024188, Global Avg Loss: 0.00542459, Time: 0.0606 Steps: 199200, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000440, Sample Num: 7040, Cur Loss: 0.00007884, Cur Avg Loss: 0.00026388, Log Avg loss: 0.00030788, Global Avg Loss: 0.00541946, Time: 0.1151 Steps: 199400, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000640, Sample Num: 10240, Cur Loss: 0.00012574, Cur Avg Loss: 0.00027772, Log Avg loss: 0.00030817, Global Avg Loss: 0.00541434, Time: 0.1182 Steps: 199600, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000840, Sample Num: 13440, Cur Loss: 0.00001308, Cur Avg Loss: 0.00029073, Log Avg loss: 0.00033237, Global Avg Loss: 0.00540925, Time: 0.0238 Steps: 199800, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00000877, Cur Avg Loss: 0.00027105, Log Avg loss: 0.00018839, Global Avg Loss: 0.00540403, Time: 0.0603 Steps: 200000, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 001240, Sample Num: 19840, Cur Loss: 0.00003282, Cur Avg Loss: 0.00025830, Log Avg loss: 0.00019201, Global Avg Loss: 0.00539882, Time: 0.0608 Steps: 200200, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00003798, Cur Avg Loss: 0.00024376, Log Avg loss: 0.00015362, Global Avg Loss: 0.00539359, Time: 0.2426 Steps: 200400, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00003649, Cur Avg Loss: 0.00022877, Log Avg loss: 0.00012085, Global Avg Loss: 0.00538833, Time: 0.0249 Steps: 200600, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 001840, Sample Num: 29440, Cur Loss: 0.00003274, Cur Avg Loss: 0.00022352, Log Avg loss: 0.00018046, Global Avg Loss: 0.00538314, Time: 0.0619 Steps: 200800, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00002239, Cur Avg Loss: 0.00022151, Log Avg loss: 0.00020302, Global Avg Loss: 0.00537799, Time: 0.1080 Steps: 201000, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002240, Sample Num: 35840, Cur Loss: 0.00006180, Cur Avg Loss: 0.00022917, Log Avg loss: 0.00030728, Global Avg Loss: 0.00537295, Time: 0.1453 Steps: 201200, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002440, Sample Num: 39040, Cur Loss: 0.00000265, Cur Avg Loss: 0.00022803, Log Avg loss: 0.00021521, Global Avg Loss: 0.00536783, Time: 0.0918 Steps: 201400, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002640, Sample Num: 42240, Cur Loss: 0.00003815, Cur Avg Loss: 0.00022756, Log Avg loss: 0.00022188, Global Avg Loss: 0.00536272, Time: 0.0312 Steps: 201600, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00000486, Cur Avg Loss: 0.00022896, Log Avg loss: 0.00024744, Global Avg Loss: 0.00535765, Time: 0.0607 Steps: 201800, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00010483, Cur Avg Loss: 0.00023508, Log Avg loss: 0.00032196, Global Avg Loss: 0.00535267, Time: 0.0695 Steps: 202000, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003240, Sample Num: 51840, Cur Loss: 0.00000169, Cur Avg Loss: 0.00022890, Log Avg loss: 0.00013496, Global Avg Loss: 0.00534751, Time: 0.0602 Steps: 202200, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003440, Sample Num: 55040, Cur Loss: 0.00000661, Cur Avg Loss: 0.00022789, Log Avg loss: 0.00021162, Global Avg Loss: 0.00534243, Time: 0.0605 Steps: 202400, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003640, Sample Num: 58240, Cur Loss: 0.00002214, Cur Avg Loss: 0.00022609, Log Avg loss: 0.00019503, Global Avg Loss: 0.00533735, Time: 0.0605 Steps: 202600, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003840, Sample Num: 61440, Cur Loss: 0.00027971, Cur Avg Loss: 0.00022335, Log Avg loss: 0.00017344, Global Avg Loss: 0.00533226, Time: 0.0693 Steps: 202800, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00020641, Cur Avg Loss: 0.00022349, Log Avg loss: 0.00022618, Global Avg Loss: 0.00532723, Time: 0.0911 Steps: 203000, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004240, Sample Num: 67840, Cur Loss: 0.00001650, Cur Avg Loss: 0.00022398, Log Avg loss: 0.00023398, Global Avg Loss: 0.00532221, Time: 0.0607 Steps: 203200, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004440, Sample Num: 71040, Cur Loss: 0.00006349, Cur Avg Loss: 0.00022458, Log Avg loss: 0.00023732, Global Avg Loss: 0.00531721, Time: 0.0626 Steps: 203400, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00002734, Cur Avg Loss: 0.00022854, Log Avg loss: 0.00031649, Global Avg Loss: 0.00531230, Time: 0.1312 Steps: 203600, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00003282, Cur Avg Loss: 0.00023072, Log Avg loss: 0.00028118, Global Avg Loss: 0.00530736, Time: 0.0624 Steps: 203800, Updated lr: 0.000018 ***** Running evaluation checkpoint-203934 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-203934 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 384.560972, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001418, "eval_total_loss": 1.511538, "eval_acc": 0.99969, "eval_jaccard": 0.989175, "eval_prec": 0.990903, "eval_recall": 0.990684, "eval_f1": 0.990283, "eval_pr_auc": 0.995818, "eval_roc_auc": 0.999424, "eval_fmax": 0.995065, "eval_pmax": 0.9976, "eval_rmax": 0.992544, "eval_tmax": 0.28, "update_flag": false, "test_avg_loss": 0.001521, "test_total_loss": 1.621341, "test_acc": 0.99969, "test_jaccard": 0.988244, "test_prec": 0.989769, "test_recall": 0.990088, "test_f1": 0.989456, "test_pr_auc": 0.995164, "test_roc_auc": 0.999354, "test_fmax": 0.994626, "test_pmax": 0.996502, "test_rmax": 0.992756, "test_tmax": 0.17, "lr": 1.807266855066613e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005304020368270529, "train_cur_epoch_loss": 1.1458875994160849, "train_cur_epoch_avg_loss": 0.00023037547233938175, "train_cur_epoch_time": 384.5609722137451, "train_cur_epoch_avg_time": 0.07731422843058808, "epoch": 41, "step": 203934} ################################################## Training, Epoch: 0042, Batch: 000066, Sample Num: 1056, Cur Loss: 0.00010547, Cur Avg Loss: 0.00022484, Log Avg loss: 0.00022024, Global Avg Loss: 0.00530238, Time: 0.0412 Steps: 204000, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000266, Sample Num: 4256, Cur Loss: 0.00002054, Cur Avg Loss: 0.00024757, Log Avg loss: 0.00025508, Global Avg Loss: 0.00529743, Time: 0.0597 Steps: 204200, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000466, Sample Num: 7456, Cur Loss: 0.00025317, Cur Avg Loss: 0.00028872, Log Avg loss: 0.00034345, Global Avg Loss: 0.00529259, Time: 0.1122 Steps: 204400, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000666, Sample Num: 10656, Cur Loss: 0.00025637, Cur Avg Loss: 0.00028439, Log Avg loss: 0.00027430, Global Avg Loss: 0.00528768, Time: 0.1083 Steps: 204600, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000866, Sample Num: 13856, Cur Loss: 0.00067289, Cur Avg Loss: 0.00028337, Log Avg loss: 0.00027998, Global Avg Loss: 0.00528279, Time: 0.0667 Steps: 204800, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 001066, Sample Num: 17056, Cur Loss: 0.00008793, Cur Avg Loss: 0.00025875, Log Avg loss: 0.00015213, Global Avg Loss: 0.00527778, Time: 0.0599 Steps: 205000, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 001266, Sample Num: 20256, Cur Loss: 0.00000389, Cur Avg Loss: 0.00024759, Log Avg loss: 0.00018809, Global Avg Loss: 0.00527282, Time: 0.0627 Steps: 205200, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 001466, Sample Num: 23456, Cur Loss: 0.00000382, Cur Avg Loss: 0.00023361, Log Avg loss: 0.00014516, Global Avg Loss: 0.00526783, Time: 0.1082 Steps: 205400, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 001666, Sample Num: 26656, Cur Loss: 0.00002874, Cur Avg Loss: 0.00021773, Log Avg loss: 0.00010132, Global Avg Loss: 0.00526281, Time: 0.0687 Steps: 205600, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 001866, Sample Num: 29856, Cur Loss: 0.00000219, Cur Avg Loss: 0.00021403, Log Avg loss: 0.00018319, Global Avg Loss: 0.00525787, Time: 0.1163 Steps: 205800, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002066, Sample Num: 33056, Cur Loss: 0.00308379, Cur Avg Loss: 0.00021658, Log Avg loss: 0.00024042, Global Avg Loss: 0.00525300, Time: 0.0861 Steps: 206000, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002266, Sample Num: 36256, Cur Loss: 0.00002495, Cur Avg Loss: 0.00022043, Log Avg loss: 0.00026013, Global Avg Loss: 0.00524816, Time: 0.1163 Steps: 206200, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002466, Sample Num: 39456, Cur Loss: 0.00004952, Cur Avg Loss: 0.00021954, Log Avg loss: 0.00020950, Global Avg Loss: 0.00524327, Time: 0.0680 Steps: 206400, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002666, Sample Num: 42656, Cur Loss: 0.00000880, Cur Avg Loss: 0.00022696, Log Avg loss: 0.00031849, Global Avg Loss: 0.00523851, Time: 0.1123 Steps: 206600, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002866, Sample Num: 45856, Cur Loss: 0.00001022, Cur Avg Loss: 0.00022935, Log Avg loss: 0.00026119, Global Avg Loss: 0.00523369, Time: 0.0616 Steps: 206800, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003066, Sample Num: 49056, Cur Loss: 0.00000526, Cur Avg Loss: 0.00022956, Log Avg loss: 0.00023247, Global Avg Loss: 0.00522886, Time: 0.1127 Steps: 207000, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003266, Sample Num: 52256, Cur Loss: 0.00000421, Cur Avg Loss: 0.00022417, Log Avg loss: 0.00014158, Global Avg Loss: 0.00522395, Time: 0.0618 Steps: 207200, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003466, Sample Num: 55456, Cur Loss: 0.00010219, Cur Avg Loss: 0.00022337, Log Avg loss: 0.00021027, Global Avg Loss: 0.00521911, Time: 0.0593 Steps: 207400, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003666, Sample Num: 58656, Cur Loss: 0.00002123, Cur Avg Loss: 0.00022392, Log Avg loss: 0.00023354, Global Avg Loss: 0.00521431, Time: 0.0440 Steps: 207600, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003866, Sample Num: 61856, Cur Loss: 0.00005621, Cur Avg Loss: 0.00022065, Log Avg loss: 0.00016078, Global Avg Loss: 0.00520945, Time: 0.0823 Steps: 207800, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 004066, Sample Num: 65056, Cur Loss: 0.00012745, Cur Avg Loss: 0.00022145, Log Avg loss: 0.00023691, Global Avg Loss: 0.00520467, Time: 0.0608 Steps: 208000, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004266, Sample Num: 68256, Cur Loss: 0.00014283, Cur Avg Loss: 0.00022257, Log Avg loss: 0.00024524, Global Avg Loss: 0.00519990, Time: 0.1350 Steps: 208200, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004466, Sample Num: 71456, Cur Loss: 0.00002250, Cur Avg Loss: 0.00022144, Log Avg loss: 0.00019735, Global Avg Loss: 0.00519510, Time: 0.1189 Steps: 208400, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004666, Sample Num: 74656, Cur Loss: 0.00002947, Cur Avg Loss: 0.00022466, Log Avg loss: 0.00029667, Global Avg Loss: 0.00519040, Time: 0.0989 Steps: 208600, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004866, Sample Num: 77856, Cur Loss: 0.00005423, Cur Avg Loss: 0.00022659, Log Avg loss: 0.00027163, Global Avg Loss: 0.00518569, Time: 0.1228 Steps: 208800, Updated lr: 0.000016 ***** Running evaluation checkpoint-208908 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-208908 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 389.315001, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001409, "eval_total_loss": 1.501725, "eval_acc": 0.999686, "eval_jaccard": 0.989088, "eval_prec": 0.990759, "eval_recall": 0.990653, "eval_f1": 0.990198, "eval_pr_auc": 0.9958, "eval_roc_auc": 0.999419, "eval_fmax": 0.995073, "eval_pmax": 0.996704, "eval_rmax": 0.993447, "eval_tmax": 0.16, "update_flag": false, "test_avg_loss": 0.001533, "test_total_loss": 1.634588, "test_acc": 0.999688, "test_jaccard": 0.988157, "test_prec": 0.989644, "test_recall": 0.990032, "test_f1": 0.989374, "test_pr_auc": 0.995042, "test_roc_auc": 0.999355, "test_fmax": 0.99459, "test_pmax": 0.996519, "test_rmax": 0.992668, "test_tmax": 0.18, "lr": 1.6064594267258782e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005183155083348365, "train_cur_epoch_loss": 1.1324723692591903, "train_cur_epoch_avg_loss": 0.00022767840153984525, "train_cur_epoch_time": 389.3150005340576, "train_cur_epoch_avg_time": 0.07827000412827857, "epoch": 42, "step": 208908} ################################################## Training, Epoch: 0043, Batch: 000092, Sample Num: 1472, Cur Loss: 0.00026579, Cur Avg Loss: 0.00028008, Log Avg loss: 0.00027816, Global Avg Loss: 0.00518100, Time: 0.1283 Steps: 209000, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000292, Sample Num: 4672, Cur Loss: 0.00031815, Cur Avg Loss: 0.00023559, Log Avg loss: 0.00021512, Global Avg Loss: 0.00517625, Time: 0.0609 Steps: 209200, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000492, Sample Num: 7872, Cur Loss: 0.00005224, Cur Avg Loss: 0.00031079, Log Avg loss: 0.00042057, Global Avg Loss: 0.00517171, Time: 0.0600 Steps: 209400, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000692, Sample Num: 11072, Cur Loss: 0.01032716, Cur Avg Loss: 0.00029351, Log Avg loss: 0.00025101, Global Avg Loss: 0.00516701, Time: 0.0615 Steps: 209600, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000892, Sample Num: 14272, Cur Loss: 0.00006124, Cur Avg Loss: 0.00029016, Log Avg loss: 0.00027857, Global Avg Loss: 0.00516235, Time: 0.0607 Steps: 209800, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 001092, Sample Num: 17472, Cur Loss: 0.00001717, Cur Avg Loss: 0.00026637, Log Avg loss: 0.00016030, Global Avg Loss: 0.00515759, Time: 0.0637 Steps: 210000, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 001292, Sample Num: 20672, Cur Loss: 0.00047945, Cur Avg Loss: 0.00024775, Log Avg loss: 0.00014607, Global Avg Loss: 0.00515282, Time: 0.1157 Steps: 210200, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 001492, Sample Num: 23872, Cur Loss: 0.00003996, Cur Avg Loss: 0.00023032, Log Avg loss: 0.00011774, Global Avg Loss: 0.00514803, Time: 0.0618 Steps: 210400, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 001692, Sample Num: 27072, Cur Loss: 0.00004403, Cur Avg Loss: 0.00021882, Log Avg loss: 0.00013299, Global Avg Loss: 0.00514327, Time: 0.0620 Steps: 210600, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 001892, Sample Num: 30272, Cur Loss: 0.00014409, Cur Avg Loss: 0.00021363, Log Avg loss: 0.00016977, Global Avg Loss: 0.00513855, Time: 0.0619 Steps: 210800, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002092, Sample Num: 33472, Cur Loss: 0.00004866, Cur Avg Loss: 0.00021603, Log Avg loss: 0.00023867, Global Avg Loss: 0.00513391, Time: 0.1069 Steps: 211000, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002292, Sample Num: 36672, Cur Loss: 0.00002737, Cur Avg Loss: 0.00022246, Log Avg loss: 0.00028975, Global Avg Loss: 0.00512932, Time: 0.0992 Steps: 211200, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002492, Sample Num: 39872, Cur Loss: 0.00001778, Cur Avg Loss: 0.00022023, Log Avg loss: 0.00019462, Global Avg Loss: 0.00512465, Time: 0.0827 Steps: 211400, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002692, Sample Num: 43072, Cur Loss: 0.00003356, Cur Avg Loss: 0.00022231, Log Avg loss: 0.00024828, Global Avg Loss: 0.00512004, Time: 0.0684 Steps: 211600, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002892, Sample Num: 46272, Cur Loss: 0.00001143, Cur Avg Loss: 0.00022867, Log Avg loss: 0.00031421, Global Avg Loss: 0.00511550, Time: 0.0206 Steps: 211800, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003092, Sample Num: 49472, Cur Loss: 0.00000143, Cur Avg Loss: 0.00022610, Log Avg loss: 0.00018904, Global Avg Loss: 0.00511086, Time: 0.0598 Steps: 212000, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003292, Sample Num: 52672, Cur Loss: 0.00001893, Cur Avg Loss: 0.00022128, Log Avg loss: 0.00014671, Global Avg Loss: 0.00510618, Time: 0.0601 Steps: 212200, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003492, Sample Num: 55872, Cur Loss: 0.00005784, Cur Avg Loss: 0.00021906, Log Avg loss: 0.00018261, Global Avg Loss: 0.00510154, Time: 0.1232 Steps: 212400, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003692, Sample Num: 59072, Cur Loss: 0.00002114, Cur Avg Loss: 0.00022215, Log Avg loss: 0.00027610, Global Avg Loss: 0.00509700, Time: 0.1221 Steps: 212600, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003892, Sample Num: 62272, Cur Loss: 0.00001222, Cur Avg Loss: 0.00021550, Log Avg loss: 0.00009274, Global Avg Loss: 0.00509230, Time: 0.0606 Steps: 212800, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004092, Sample Num: 65472, Cur Loss: 0.00010921, Cur Avg Loss: 0.00021754, Log Avg loss: 0.00025723, Global Avg Loss: 0.00508776, Time: 0.0946 Steps: 213000, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004292, Sample Num: 68672, Cur Loss: 0.00002639, Cur Avg Loss: 0.00021753, Log Avg loss: 0.00021735, Global Avg Loss: 0.00508319, Time: 0.0600 Steps: 213200, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004492, Sample Num: 71872, Cur Loss: 0.00000483, Cur Avg Loss: 0.00021959, Log Avg loss: 0.00026365, Global Avg Loss: 0.00507867, Time: 0.0578 Steps: 213400, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004692, Sample Num: 75072, Cur Loss: 0.00000111, Cur Avg Loss: 0.00022275, Log Avg loss: 0.00029387, Global Avg Loss: 0.00507419, Time: 0.0348 Steps: 213600, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004892, Sample Num: 78272, Cur Loss: 0.00000745, Cur Avg Loss: 0.00022324, Log Avg loss: 0.00023472, Global Avg Loss: 0.00506967, Time: 0.1197 Steps: 213800, Updated lr: 0.000014 ***** Running evaluation checkpoint-213882 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-213882 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 368.107065, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001415, "eval_total_loss": 1.508842, "eval_acc": 0.999684, "eval_jaccard": 0.989069, "eval_prec": 0.990756, "eval_recall": 0.99064, "eval_f1": 0.990187, "eval_pr_auc": 0.995767, "eval_roc_auc": 0.999423, "eval_fmax": 0.995072, "eval_pmax": 0.997634, "eval_rmax": 0.992523, "eval_tmax": 0.3, "update_flag": false, "test_avg_loss": 0.001533, "test_total_loss": 1.633696, "test_acc": 0.999693, "test_jaccard": 0.988272, "test_prec": 0.98973, "test_recall": 0.99009, "test_f1": 0.989451, "test_pr_auc": 0.995159, "test_roc_auc": 0.99936, "test_fmax": 0.994558, "test_pmax": 0.997503, "test_rmax": 0.99163, "test_tmax": 0.32, "lr": 1.4056519983851435e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005067835624190239, "train_cur_epoch_loss": 1.1162568209168882, "train_cur_epoch_avg_loss": 0.0002244183395490326, "train_cur_epoch_time": 368.1070647239685, "train_cur_epoch_avg_time": 0.07400624542098282, "epoch": 43, "step": 213882} ################################################## Training, Epoch: 0044, Batch: 000118, Sample Num: 1888, Cur Loss: 0.00001014, Cur Avg Loss: 0.00028340, Log Avg loss: 0.00028796, Global Avg Loss: 0.00506520, Time: 0.0614 Steps: 214000, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000318, Sample Num: 5088, Cur Loss: 0.00000326, Cur Avg Loss: 0.00024582, Log Avg loss: 0.00022364, Global Avg Loss: 0.00506068, Time: 0.1198 Steps: 214200, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000518, Sample Num: 8288, Cur Loss: 0.00002633, Cur Avg Loss: 0.00030443, Log Avg loss: 0.00039763, Global Avg Loss: 0.00505633, Time: 0.0628 Steps: 214400, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000718, Sample Num: 11488, Cur Loss: 0.00000115, Cur Avg Loss: 0.00028959, Log Avg loss: 0.00025117, Global Avg Loss: 0.00505185, Time: 0.0598 Steps: 214600, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000918, Sample Num: 14688, Cur Loss: 0.00010623, Cur Avg Loss: 0.00028649, Log Avg loss: 0.00027536, Global Avg Loss: 0.00504740, Time: 0.0632 Steps: 214800, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 001118, Sample Num: 17888, Cur Loss: 0.00008992, Cur Avg Loss: 0.00025744, Log Avg loss: 0.00012410, Global Avg Loss: 0.00504282, Time: 0.0966 Steps: 215000, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 001318, Sample Num: 21088, Cur Loss: 0.00001141, Cur Avg Loss: 0.00024033, Log Avg loss: 0.00014470, Global Avg Loss: 0.00503827, Time: 0.1200 Steps: 215200, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 001518, Sample Num: 24288, Cur Loss: 0.00001848, Cur Avg Loss: 0.00022475, Log Avg loss: 0.00012210, Global Avg Loss: 0.00503370, Time: 0.0599 Steps: 215400, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 001718, Sample Num: 27488, Cur Loss: 0.00000575, Cur Avg Loss: 0.00021807, Log Avg loss: 0.00016736, Global Avg Loss: 0.00502919, Time: 0.0604 Steps: 215600, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 001918, Sample Num: 30688, Cur Loss: 0.00004024, Cur Avg Loss: 0.00020786, Log Avg loss: 0.00012010, Global Avg Loss: 0.00502464, Time: 0.0601 Steps: 215800, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002118, Sample Num: 33888, Cur Loss: 0.00002165, Cur Avg Loss: 0.00021173, Log Avg loss: 0.00024884, Global Avg Loss: 0.00502022, Time: 0.0627 Steps: 216000, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002318, Sample Num: 37088, Cur Loss: 0.00001131, Cur Avg Loss: 0.00022351, Log Avg loss: 0.00034827, Global Avg Loss: 0.00501590, Time: 0.1786 Steps: 216200, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002518, Sample Num: 40288, Cur Loss: 0.00026743, Cur Avg Loss: 0.00021814, Log Avg loss: 0.00015592, Global Avg Loss: 0.00501141, Time: 0.1070 Steps: 216400, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002718, Sample Num: 43488, Cur Loss: 0.00006953, Cur Avg Loss: 0.00021981, Log Avg loss: 0.00024079, Global Avg Loss: 0.00500700, Time: 0.0603 Steps: 216600, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002918, Sample Num: 46688, Cur Loss: 0.00019521, Cur Avg Loss: 0.00022393, Log Avg loss: 0.00027999, Global Avg Loss: 0.00500264, Time: 0.0268 Steps: 216800, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003118, Sample Num: 49888, Cur Loss: 0.00010540, Cur Avg Loss: 0.00022367, Log Avg loss: 0.00021981, Global Avg Loss: 0.00499823, Time: 0.1155 Steps: 217000, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003318, Sample Num: 53088, Cur Loss: 0.00000589, Cur Avg Loss: 0.00021954, Log Avg loss: 0.00015521, Global Avg Loss: 0.00499377, Time: 0.1160 Steps: 217200, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003518, Sample Num: 56288, Cur Loss: 0.00014089, Cur Avg Loss: 0.00021948, Log Avg loss: 0.00021854, Global Avg Loss: 0.00498938, Time: 0.0604 Steps: 217400, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003718, Sample Num: 59488, Cur Loss: 0.00001866, Cur Avg Loss: 0.00021843, Log Avg loss: 0.00019983, Global Avg Loss: 0.00498498, Time: 0.0615 Steps: 217600, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003918, Sample Num: 62688, Cur Loss: 0.00008575, Cur Avg Loss: 0.00021359, Log Avg loss: 0.00012361, Global Avg Loss: 0.00498051, Time: 0.0618 Steps: 217800, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004118, Sample Num: 65888, Cur Loss: 0.00000533, Cur Avg Loss: 0.00021399, Log Avg loss: 0.00022193, Global Avg Loss: 0.00497615, Time: 0.1176 Steps: 218000, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004318, Sample Num: 69088, Cur Loss: 0.00003085, Cur Avg Loss: 0.00021356, Log Avg loss: 0.00020467, Global Avg Loss: 0.00497177, Time: 0.0596 Steps: 218200, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004518, Sample Num: 72288, Cur Loss: 0.00006343, Cur Avg Loss: 0.00021578, Log Avg loss: 0.00026383, Global Avg Loss: 0.00496746, Time: 0.0622 Steps: 218400, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004718, Sample Num: 75488, Cur Loss: 0.00004868, Cur Avg Loss: 0.00021718, Log Avg loss: 0.00024871, Global Avg Loss: 0.00496314, Time: 0.0597 Steps: 218600, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004918, Sample Num: 78688, Cur Loss: 0.00094771, Cur Avg Loss: 0.00021952, Log Avg loss: 0.00027477, Global Avg Loss: 0.00495886, Time: 0.0598 Steps: 218800, Updated lr: 0.000012 ***** Running evaluation checkpoint-218856 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-218856 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 397.542562, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001426, "eval_total_loss": 1.520569, "eval_acc": 0.999686, "eval_jaccard": 0.989033, "eval_prec": 0.990774, "eval_recall": 0.990603, "eval_f1": 0.990153, "eval_pr_auc": 0.995821, "eval_roc_auc": 0.999423, "eval_fmax": 0.995063, "eval_pmax": 0.997542, "eval_rmax": 0.992597, "eval_tmax": 0.27, "update_flag": false, "test_avg_loss": 0.001537, "test_total_loss": 1.638798, "test_acc": 0.99969, "test_jaccard": 0.988243, "test_prec": 0.98975, "test_recall": 0.99006, "test_f1": 0.98944, "test_pr_auc": 0.995145, "test_roc_auc": 0.999357, "test_fmax": 0.994603, "test_pmax": 0.996605, "test_rmax": 0.992609, "test_tmax": 0.18, "lr": 1.2048445700444087e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004957663106630356, "train_cur_epoch_loss": 1.0954978916362421, "train_cur_epoch_avg_loss": 0.00022024485155533617, "train_cur_epoch_time": 397.54256200790405, "train_cur_epoch_avg_time": 0.07992411781421473, "epoch": 44, "step": 218856} ################################################## Training, Epoch: 0045, Batch: 000144, Sample Num: 2304, Cur Loss: 0.00000523, Cur Avg Loss: 0.00023775, Log Avg loss: 0.00025060, Global Avg Loss: 0.00495456, Time: 0.1335 Steps: 219000, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000344, Sample Num: 5504, Cur Loss: 0.00141846, Cur Avg Loss: 0.00022777, Log Avg loss: 0.00022058, Global Avg Loss: 0.00495024, Time: 0.0610 Steps: 219200, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000544, Sample Num: 8704, Cur Loss: 0.00005581, Cur Avg Loss: 0.00028212, Log Avg loss: 0.00037560, Global Avg Loss: 0.00494607, Time: 0.0694 Steps: 219400, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000744, Sample Num: 11904, Cur Loss: 0.00022287, Cur Avg Loss: 0.00027996, Log Avg loss: 0.00027410, Global Avg Loss: 0.00494182, Time: 0.0641 Steps: 219600, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000944, Sample Num: 15104, Cur Loss: 0.00024639, Cur Avg Loss: 0.00026610, Log Avg loss: 0.00021452, Global Avg Loss: 0.00493751, Time: 0.0603 Steps: 219800, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 001144, Sample Num: 18304, Cur Loss: 0.00371303, Cur Avg Loss: 0.00025134, Log Avg loss: 0.00018170, Global Avg Loss: 0.00493319, Time: 0.0610 Steps: 220000, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 001344, Sample Num: 21504, Cur Loss: 0.00001252, Cur Avg Loss: 0.00023554, Log Avg loss: 0.00014514, Global Avg Loss: 0.00492884, Time: 0.0690 Steps: 220200, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 001544, Sample Num: 24704, Cur Loss: 0.00003152, Cur Avg Loss: 0.00021928, Log Avg loss: 0.00010999, Global Avg Loss: 0.00492447, Time: 0.0613 Steps: 220400, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 001744, Sample Num: 27904, Cur Loss: 0.00000569, Cur Avg Loss: 0.00021145, Log Avg loss: 0.00015099, Global Avg Loss: 0.00492014, Time: 0.0296 Steps: 220600, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 001944, Sample Num: 31104, Cur Loss: 0.00026139, Cur Avg Loss: 0.00020041, Log Avg loss: 0.00010413, Global Avg Loss: 0.00491578, Time: 0.2003 Steps: 220800, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002144, Sample Num: 34304, Cur Loss: 0.00000752, Cur Avg Loss: 0.00020634, Log Avg loss: 0.00026398, Global Avg Loss: 0.00491157, Time: 0.0617 Steps: 221000, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002344, Sample Num: 37504, Cur Loss: 0.00213455, Cur Avg Loss: 0.00021710, Log Avg loss: 0.00033245, Global Avg Loss: 0.00490743, Time: 0.0608 Steps: 221200, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002544, Sample Num: 40704, Cur Loss: 0.00007361, Cur Avg Loss: 0.00020863, Log Avg loss: 0.00010944, Global Avg Loss: 0.00490309, Time: 0.1100 Steps: 221400, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002744, Sample Num: 43904, Cur Loss: 0.00004173, Cur Avg Loss: 0.00021583, Log Avg loss: 0.00030738, Global Avg Loss: 0.00489895, Time: 0.0538 Steps: 221600, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002944, Sample Num: 47104, Cur Loss: 0.00029064, Cur Avg Loss: 0.00021543, Log Avg loss: 0.00020993, Global Avg Loss: 0.00489472, Time: 0.0691 Steps: 221800, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003144, Sample Num: 50304, Cur Loss: 0.00012662, Cur Avg Loss: 0.00021483, Log Avg loss: 0.00020602, Global Avg Loss: 0.00489049, Time: 0.1199 Steps: 222000, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003344, Sample Num: 53504, Cur Loss: 0.00000457, Cur Avg Loss: 0.00021261, Log Avg loss: 0.00017770, Global Avg Loss: 0.00488625, Time: 0.0584 Steps: 222200, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003544, Sample Num: 56704, Cur Loss: 0.00007135, Cur Avg Loss: 0.00021160, Log Avg loss: 0.00019472, Global Avg Loss: 0.00488203, Time: 0.0607 Steps: 222400, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003744, Sample Num: 59904, Cur Loss: 0.00002521, Cur Avg Loss: 0.00021168, Log Avg loss: 0.00021314, Global Avg Loss: 0.00487784, Time: 0.0703 Steps: 222600, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003944, Sample Num: 63104, Cur Loss: 0.00372254, Cur Avg Loss: 0.00020963, Log Avg loss: 0.00017117, Global Avg Loss: 0.00487361, Time: 0.1123 Steps: 222800, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004144, Sample Num: 66304, Cur Loss: 0.00334352, Cur Avg Loss: 0.00020943, Log Avg loss: 0.00020550, Global Avg Loss: 0.00486943, Time: 0.1119 Steps: 223000, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004344, Sample Num: 69504, Cur Loss: 0.00180215, Cur Avg Loss: 0.00020898, Log Avg loss: 0.00019968, Global Avg Loss: 0.00486524, Time: 0.1490 Steps: 223200, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004544, Sample Num: 72704, Cur Loss: 0.00009100, Cur Avg Loss: 0.00021407, Log Avg loss: 0.00032460, Global Avg Loss: 0.00486118, Time: 0.0701 Steps: 223400, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004744, Sample Num: 75904, Cur Loss: 0.00007342, Cur Avg Loss: 0.00021364, Log Avg loss: 0.00020392, Global Avg Loss: 0.00485701, Time: 0.1146 Steps: 223600, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004944, Sample Num: 79104, Cur Loss: 0.00004385, Cur Avg Loss: 0.00021585, Log Avg loss: 0.00026829, Global Avg Loss: 0.00485291, Time: 0.0629 Steps: 223800, Updated lr: 0.000010 ***** Running evaluation checkpoint-223830 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-223830 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 370.695940, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001424, "eval_total_loss": 1.517602, "eval_acc": 0.999688, "eval_jaccard": 0.989036, "eval_prec": 0.990649, "eval_recall": 0.990559, "eval_f1": 0.990105, "eval_pr_auc": 0.995793, "eval_roc_auc": 0.999424, "eval_fmax": 0.995055, "eval_pmax": 0.99752, "eval_rmax": 0.992602, "eval_tmax": 0.27, "update_flag": false, "test_avg_loss": 0.001537, "test_total_loss": 1.63821, "test_acc": 0.999696, "test_jaccard": 0.988336, "test_prec": 0.989798, "test_recall": 0.990065, "test_f1": 0.989477, "test_pr_auc": 0.995239, "test_roc_auc": 0.999366, "test_fmax": 0.994602, "test_pmax": 0.996938, "test_rmax": 0.992277, "test_tmax": 0.24, "lr": 1.0040371417036738e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004852313606758406, "train_cur_epoch_loss": 1.0790377360400072, "train_cur_epoch_avg_loss": 0.00021693561239244214, "train_cur_epoch_time": 370.6959402561188, "train_cur_epoch_avg_time": 0.07452672703178906, "epoch": 45, "step": 223830} ################################################## Training, Epoch: 0046, Batch: 000170, Sample Num: 2720, Cur Loss: 0.00006330, Cur Avg Loss: 0.00018242, Log Avg loss: 0.00021438, Global Avg Loss: 0.00484877, Time: 0.0608 Steps: 224000, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000370, Sample Num: 5920, Cur Loss: 0.00003111, Cur Avg Loss: 0.00024712, Log Avg loss: 0.00030212, Global Avg Loss: 0.00484471, Time: 0.1279 Steps: 224200, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000570, Sample Num: 9120, Cur Loss: 0.00033934, Cur Avg Loss: 0.00028349, Log Avg loss: 0.00035076, Global Avg Loss: 0.00484071, Time: 0.0823 Steps: 224400, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000770, Sample Num: 12320, Cur Loss: 0.00006219, Cur Avg Loss: 0.00027915, Log Avg loss: 0.00026679, Global Avg Loss: 0.00483664, Time: 0.0313 Steps: 224600, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000970, Sample Num: 15520, Cur Loss: 0.00001608, Cur Avg Loss: 0.00026020, Log Avg loss: 0.00018725, Global Avg Loss: 0.00483250, Time: 0.0612 Steps: 224800, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 001170, Sample Num: 18720, Cur Loss: 0.00001244, Cur Avg Loss: 0.00024193, Log Avg loss: 0.00015332, Global Avg Loss: 0.00482834, Time: 0.0603 Steps: 225000, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 001370, Sample Num: 21920, Cur Loss: 0.00005598, Cur Avg Loss: 0.00022825, Log Avg loss: 0.00014825, Global Avg Loss: 0.00482418, Time: 0.0607 Steps: 225200, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 001570, Sample Num: 25120, Cur Loss: 0.00010134, Cur Avg Loss: 0.00021045, Log Avg loss: 0.00008853, Global Avg Loss: 0.00481998, Time: 0.1116 Steps: 225400, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 001770, Sample Num: 28320, Cur Loss: 0.00002228, Cur Avg Loss: 0.00020101, Log Avg loss: 0.00012683, Global Avg Loss: 0.00481582, Time: 0.0624 Steps: 225600, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 001970, Sample Num: 31520, Cur Loss: 0.00040107, Cur Avg Loss: 0.00019147, Log Avg loss: 0.00010706, Global Avg Loss: 0.00481165, Time: 0.0603 Steps: 225800, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002170, Sample Num: 34720, Cur Loss: 0.00006220, Cur Avg Loss: 0.00020202, Log Avg loss: 0.00030597, Global Avg Loss: 0.00480766, Time: 0.0623 Steps: 226000, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002370, Sample Num: 37920, Cur Loss: 0.00000384, Cur Avg Loss: 0.00020971, Log Avg loss: 0.00029318, Global Avg Loss: 0.00480367, Time: 0.0271 Steps: 226200, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002570, Sample Num: 41120, Cur Loss: 0.00005650, Cur Avg Loss: 0.00020426, Log Avg loss: 0.00013965, Global Avg Loss: 0.00479955, Time: 0.0351 Steps: 226400, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002770, Sample Num: 44320, Cur Loss: 0.00001222, Cur Avg Loss: 0.00021054, Log Avg loss: 0.00029123, Global Avg Loss: 0.00479557, Time: 0.0596 Steps: 226600, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002970, Sample Num: 47520, Cur Loss: 0.00007058, Cur Avg Loss: 0.00021347, Log Avg loss: 0.00025408, Global Avg Loss: 0.00479157, Time: 0.0549 Steps: 226800, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003170, Sample Num: 50720, Cur Loss: 0.00001389, Cur Avg Loss: 0.00021013, Log Avg loss: 0.00016048, Global Avg Loss: 0.00478749, Time: 0.0797 Steps: 227000, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003370, Sample Num: 53920, Cur Loss: 0.00180547, Cur Avg Loss: 0.00020889, Log Avg loss: 0.00018919, Global Avg Loss: 0.00478344, Time: 0.0986 Steps: 227200, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003570, Sample Num: 57120, Cur Loss: 0.00006347, Cur Avg Loss: 0.00020751, Log Avg loss: 0.00018436, Global Avg Loss: 0.00477939, Time: 0.0608 Steps: 227400, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003770, Sample Num: 60320, Cur Loss: 0.00004092, Cur Avg Loss: 0.00020665, Log Avg loss: 0.00019118, Global Avg Loss: 0.00477536, Time: 0.0594 Steps: 227600, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003970, Sample Num: 63520, Cur Loss: 0.00001633, Cur Avg Loss: 0.00020490, Log Avg loss: 0.00017208, Global Avg Loss: 0.00477132, Time: 0.0684 Steps: 227800, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004170, Sample Num: 66720, Cur Loss: 0.00009043, Cur Avg Loss: 0.00020460, Log Avg loss: 0.00019853, Global Avg Loss: 0.00476731, Time: 0.0605 Steps: 228000, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004370, Sample Num: 69920, Cur Loss: 0.00000724, Cur Avg Loss: 0.00020363, Log Avg loss: 0.00018344, Global Avg Loss: 0.00476329, Time: 0.1134 Steps: 228200, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004570, Sample Num: 73120, Cur Loss: 0.00012862, Cur Avg Loss: 0.00020858, Log Avg loss: 0.00031670, Global Avg Loss: 0.00475940, Time: 0.0498 Steps: 228400, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004770, Sample Num: 76320, Cur Loss: 0.00054028, Cur Avg Loss: 0.00020707, Log Avg loss: 0.00017266, Global Avg Loss: 0.00475539, Time: 0.0602 Steps: 228600, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004970, Sample Num: 79520, Cur Loss: 0.00000348, Cur Avg Loss: 0.00021119, Log Avg loss: 0.00030934, Global Avg Loss: 0.00475150, Time: 0.1170 Steps: 228800, Updated lr: 0.000008 ***** Running evaluation checkpoint-228804 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-228804 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 405.170144, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001422, "eval_total_loss": 1.515623, "eval_acc": 0.99969, "eval_jaccard": 0.989069, "eval_prec": 0.990705, "eval_recall": 0.990593, "eval_f1": 0.990151, "eval_pr_auc": 0.995762, "eval_roc_auc": 0.999424, "eval_fmax": 0.995086, "eval_pmax": 0.996578, "eval_rmax": 0.993598, "eval_tmax": 0.14, "update_flag": false, "test_avg_loss": 0.001524, "test_total_loss": 1.6244, "test_acc": 0.999693, "test_jaccard": 0.988272, "test_prec": 0.98971, "test_recall": 0.990062, "test_f1": 0.989425, "test_pr_auc": 0.995192, "test_roc_auc": 0.999358, "test_fmax": 0.99465, "test_pmax": 0.997095, "test_rmax": 0.992216, "test_tmax": 0.25, "lr": 8.032297133629391e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00475143912405958, "train_cur_epoch_loss": 1.054922740595707, "train_cur_epoch_avg_loss": 0.00021208740261272757, "train_cur_epoch_time": 405.1701443195343, "train_cur_epoch_avg_time": 0.08145760842773106, "epoch": 46, "step": 228804} ################################################## Training, Epoch: 0047, Batch: 000196, Sample Num: 3136, Cur Loss: 0.00012638, Cur Avg Loss: 0.00022346, Log Avg loss: 0.00024558, Global Avg Loss: 0.00474756, Time: 0.1054 Steps: 229000, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000396, Sample Num: 6336, Cur Loss: 0.00001116, Cur Avg Loss: 0.00021906, Log Avg loss: 0.00021475, Global Avg Loss: 0.00474361, Time: 0.0630 Steps: 229200, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000596, Sample Num: 9536, Cur Loss: 0.00000223, Cur Avg Loss: 0.00026340, Log Avg loss: 0.00035119, Global Avg Loss: 0.00473978, Time: 0.0609 Steps: 229400, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000796, Sample Num: 12736, Cur Loss: 0.00002403, Cur Avg Loss: 0.00027326, Log Avg loss: 0.00030263, Global Avg Loss: 0.00473591, Time: 0.1067 Steps: 229600, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000996, Sample Num: 15936, Cur Loss: 0.00002267, Cur Avg Loss: 0.00025137, Log Avg loss: 0.00016428, Global Avg Loss: 0.00473193, Time: 0.0603 Steps: 229800, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 001196, Sample Num: 19136, Cur Loss: 0.00005808, Cur Avg Loss: 0.00024291, Log Avg loss: 0.00020078, Global Avg Loss: 0.00472799, Time: 0.0536 Steps: 230000, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 001396, Sample Num: 22336, Cur Loss: 0.00003657, Cur Avg Loss: 0.00023006, Log Avg loss: 0.00015316, Global Avg Loss: 0.00472402, Time: 0.0604 Steps: 230200, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 001596, Sample Num: 25536, Cur Loss: 0.00006955, Cur Avg Loss: 0.00022101, Log Avg loss: 0.00015787, Global Avg Loss: 0.00472006, Time: 0.0605 Steps: 230400, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 001796, Sample Num: 28736, Cur Loss: 0.00005521, Cur Avg Loss: 0.00021292, Log Avg loss: 0.00014835, Global Avg Loss: 0.00471609, Time: 0.1215 Steps: 230600, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 001996, Sample Num: 31936, Cur Loss: 0.00006884, Cur Avg Loss: 0.00020370, Log Avg loss: 0.00012091, Global Avg Loss: 0.00471211, Time: 0.1074 Steps: 230800, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002196, Sample Num: 35136, Cur Loss: 0.00003207, Cur Avg Loss: 0.00021924, Log Avg loss: 0.00037429, Global Avg Loss: 0.00470835, Time: 0.0597 Steps: 231000, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002396, Sample Num: 38336, Cur Loss: 0.00018546, Cur Avg Loss: 0.00021655, Log Avg loss: 0.00018701, Global Avg Loss: 0.00470444, Time: 0.0630 Steps: 231200, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002596, Sample Num: 41536, Cur Loss: 0.00000336, Cur Avg Loss: 0.00021330, Log Avg loss: 0.00017445, Global Avg Loss: 0.00470053, Time: 0.0600 Steps: 231400, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002796, Sample Num: 44736, Cur Loss: 0.00002478, Cur Avg Loss: 0.00021833, Log Avg loss: 0.00028360, Global Avg Loss: 0.00469671, Time: 0.0606 Steps: 231600, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002996, Sample Num: 47936, Cur Loss: 0.00010576, Cur Avg Loss: 0.00022252, Log Avg loss: 0.00028114, Global Avg Loss: 0.00469290, Time: 0.0978 Steps: 231800, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003196, Sample Num: 51136, Cur Loss: 0.00005700, Cur Avg Loss: 0.00021753, Log Avg loss: 0.00014276, Global Avg Loss: 0.00468898, Time: 0.0607 Steps: 232000, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003396, Sample Num: 54336, Cur Loss: 0.00001749, Cur Avg Loss: 0.00021498, Log Avg loss: 0.00017420, Global Avg Loss: 0.00468509, Time: 0.0677 Steps: 232200, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003596, Sample Num: 57536, Cur Loss: 0.00000813, Cur Avg Loss: 0.00021384, Log Avg loss: 0.00019453, Global Avg Loss: 0.00468123, Time: 0.0597 Steps: 232400, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003796, Sample Num: 60736, Cur Loss: 0.00001293, Cur Avg Loss: 0.00021235, Log Avg loss: 0.00018551, Global Avg Loss: 0.00467736, Time: 0.1012 Steps: 232600, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 003996, Sample Num: 63936, Cur Loss: 0.00008755, Cur Avg Loss: 0.00021169, Log Avg loss: 0.00019911, Global Avg Loss: 0.00467351, Time: 0.0625 Steps: 232800, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004196, Sample Num: 67136, Cur Loss: 0.00037854, Cur Avg Loss: 0.00021105, Log Avg loss: 0.00019828, Global Avg Loss: 0.00466967, Time: 0.0629 Steps: 233000, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004396, Sample Num: 70336, Cur Loss: 0.00018336, Cur Avg Loss: 0.00020938, Log Avg loss: 0.00017430, Global Avg Loss: 0.00466582, Time: 0.0606 Steps: 233200, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004596, Sample Num: 73536, Cur Loss: 0.00173999, Cur Avg Loss: 0.00021345, Log Avg loss: 0.00030305, Global Avg Loss: 0.00466208, Time: 0.1069 Steps: 233400, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004796, Sample Num: 76736, Cur Loss: 0.00553818, Cur Avg Loss: 0.00021459, Log Avg loss: 0.00024083, Global Avg Loss: 0.00465829, Time: 0.1116 Steps: 233600, Updated lr: 0.000006 ***** Running evaluation checkpoint-233778 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-233778 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 367.009622, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001425, "eval_total_loss": 1.51914, "eval_acc": 0.999688, "eval_jaccard": 0.989095, "eval_prec": 0.990668, "eval_recall": 0.990715, "eval_f1": 0.990189, "eval_pr_auc": 0.995801, "eval_roc_auc": 0.999425, "eval_fmax": 0.995052, "eval_pmax": 0.997619, "eval_rmax": 0.992498, "eval_tmax": 0.28, "update_flag": false, "test_avg_loss": 0.001526, "test_total_loss": 1.627196, "test_acc": 0.999698, "test_jaccard": 0.988498, "test_prec": 0.989827, "test_recall": 0.990247, "test_f1": 0.989603, "test_pr_auc": 0.995171, "test_roc_auc": 0.999357, "test_fmax": 0.994628, "test_pmax": 0.996193, "test_rmax": 0.993069, "test_tmax": 0.15, "lr": 6.024222850222043e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004654910936455099, "train_cur_epoch_loss": 1.0674915612720781, "train_cur_epoch_avg_loss": 0.00021461430664899037, "train_cur_epoch_time": 367.00962233543396, "train_cur_epoch_avg_time": 0.07378560963720024, "epoch": 47, "step": 233778} ################################################## Training, Epoch: 0048, Batch: 000022, Sample Num: 352, Cur Loss: 0.00002070, Cur Avg Loss: 0.00008372, Log Avg loss: 0.00020071, Global Avg Loss: 0.00465448, Time: 0.0622 Steps: 233800, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000222, Sample Num: 3552, Cur Loss: 0.00007637, Cur Avg Loss: 0.00022688, Log Avg loss: 0.00024262, Global Avg Loss: 0.00465071, Time: 0.1214 Steps: 234000, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000422, Sample Num: 6752, Cur Loss: 0.00001376, Cur Avg Loss: 0.00022253, Log Avg loss: 0.00021771, Global Avg Loss: 0.00464692, Time: 0.0587 Steps: 234200, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000622, Sample Num: 9952, Cur Loss: 0.00000585, Cur Avg Loss: 0.00026621, Log Avg loss: 0.00035839, Global Avg Loss: 0.00464327, Time: 0.0602 Steps: 234400, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000822, Sample Num: 13152, Cur Loss: 0.00000815, Cur Avg Loss: 0.00028442, Log Avg loss: 0.00034105, Global Avg Loss: 0.00463960, Time: 0.1232 Steps: 234600, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 001022, Sample Num: 16352, Cur Loss: 0.00000371, Cur Avg Loss: 0.00025531, Log Avg loss: 0.00013568, Global Avg Loss: 0.00463576, Time: 0.1434 Steps: 234800, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 001222, Sample Num: 19552, Cur Loss: 0.00007970, Cur Avg Loss: 0.00024233, Log Avg loss: 0.00017597, Global Avg Loss: 0.00463197, Time: 0.0615 Steps: 235000, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 001422, Sample Num: 22752, Cur Loss: 0.00003017, Cur Avg Loss: 0.00022709, Log Avg loss: 0.00013398, Global Avg Loss: 0.00462814, Time: 0.0273 Steps: 235200, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 001622, Sample Num: 25952, Cur Loss: 0.00001362, Cur Avg Loss: 0.00021149, Log Avg loss: 0.00010055, Global Avg Loss: 0.00462429, Time: 0.1184 Steps: 235400, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 001822, Sample Num: 29152, Cur Loss: 0.00001340, Cur Avg Loss: 0.00020367, Log Avg loss: 0.00014029, Global Avg Loss: 0.00462049, Time: 0.1184 Steps: 235600, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002022, Sample Num: 32352, Cur Loss: 0.00001416, Cur Avg Loss: 0.00019663, Log Avg loss: 0.00013247, Global Avg Loss: 0.00461668, Time: 0.0245 Steps: 235800, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002222, Sample Num: 35552, Cur Loss: 0.00022527, Cur Avg Loss: 0.00021091, Log Avg loss: 0.00035533, Global Avg Loss: 0.00461307, Time: 0.0669 Steps: 236000, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002422, Sample Num: 38752, Cur Loss: 0.00301742, Cur Avg Loss: 0.00021048, Log Avg loss: 0.00020573, Global Avg Loss: 0.00460934, Time: 0.1961 Steps: 236200, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002622, Sample Num: 41952, Cur Loss: 0.00000217, Cur Avg Loss: 0.00020958, Log Avg loss: 0.00019866, Global Avg Loss: 0.00460561, Time: 0.0642 Steps: 236400, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002822, Sample Num: 45152, Cur Loss: 0.00001777, Cur Avg Loss: 0.00021215, Log Avg loss: 0.00024577, Global Avg Loss: 0.00460192, Time: 0.1276 Steps: 236600, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003022, Sample Num: 48352, Cur Loss: 0.00008245, Cur Avg Loss: 0.00021746, Log Avg loss: 0.00029237, Global Avg Loss: 0.00459828, Time: 0.1158 Steps: 236800, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003222, Sample Num: 51552, Cur Loss: 0.00002703, Cur Avg Loss: 0.00021197, Log Avg loss: 0.00012909, Global Avg Loss: 0.00459451, Time: 0.0603 Steps: 237000, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003422, Sample Num: 54752, Cur Loss: 0.00000507, Cur Avg Loss: 0.00020865, Log Avg loss: 0.00015511, Global Avg Loss: 0.00459077, Time: 0.0611 Steps: 237200, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003622, Sample Num: 57952, Cur Loss: 0.00001536, Cur Avg Loss: 0.00020903, Log Avg loss: 0.00021566, Global Avg Loss: 0.00458708, Time: 0.0616 Steps: 237400, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003822, Sample Num: 61152, Cur Loss: 0.00002382, Cur Avg Loss: 0.00020776, Log Avg loss: 0.00018467, Global Avg Loss: 0.00458337, Time: 0.0660 Steps: 237600, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004022, Sample Num: 64352, Cur Loss: 0.00014656, Cur Avg Loss: 0.00020600, Log Avg loss: 0.00017228, Global Avg Loss: 0.00457966, Time: 0.0616 Steps: 237800, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004222, Sample Num: 67552, Cur Loss: 0.00003966, Cur Avg Loss: 0.00020585, Log Avg loss: 0.00020301, Global Avg Loss: 0.00457599, Time: 0.1546 Steps: 238000, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004422, Sample Num: 70752, Cur Loss: 0.00002752, Cur Avg Loss: 0.00020414, Log Avg loss: 0.00016788, Global Avg Loss: 0.00457229, Time: 0.1405 Steps: 238200, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004622, Sample Num: 73952, Cur Loss: 0.00002817, Cur Avg Loss: 0.00020807, Log Avg loss: 0.00029511, Global Avg Loss: 0.00456870, Time: 0.0603 Steps: 238400, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004822, Sample Num: 77152, Cur Loss: 0.00006739, Cur Avg Loss: 0.00020905, Log Avg loss: 0.00023157, Global Avg Loss: 0.00456506, Time: 0.0598 Steps: 238600, Updated lr: 0.000004 ***** Running evaluation checkpoint-238752 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-238752 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 406.358717, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001422, "eval_total_loss": 1.516091, "eval_acc": 0.99969, "eval_jaccard": 0.989227, "eval_prec": 0.9908, "eval_recall": 0.990774, "eval_f1": 0.990292, "eval_pr_auc": 0.995819, "eval_roc_auc": 0.999426, "eval_fmax": 0.995081, "eval_pmax": 0.996464, "eval_rmax": 0.993701, "eval_tmax": 0.13, "update_flag": false, "test_avg_loss": 0.001526, "test_total_loss": 1.626429, "test_acc": 0.999698, "test_jaccard": 0.988515, "test_prec": 0.989837, "test_recall": 0.990344, "test_f1": 0.989642, "test_pr_auc": 0.995165, "test_roc_auc": 0.999357, "test_fmax": 0.994647, "test_pmax": 0.996231, "test_rmax": 0.993069, "test_tmax": 0.16, "lr": 4.0161485668146955e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004562279077744409, "train_cur_epoch_loss": 1.0374854670329796, "train_cur_epoch_avg_loss": 0.00020858171834197418, "train_cur_epoch_time": 406.35871744155884, "train_cur_epoch_avg_time": 0.08169656562958562, "epoch": 48, "step": 238752} ################################################## Training, Epoch: 0049, Batch: 000048, Sample Num: 768, Cur Loss: 0.00002416, Cur Avg Loss: 0.00007543, Log Avg loss: 0.00016540, Global Avg Loss: 0.00456138, Time: 0.0874 Steps: 238800, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000248, Sample Num: 3968, Cur Loss: 0.00020208, Cur Avg Loss: 0.00020790, Log Avg loss: 0.00023969, Global Avg Loss: 0.00455776, Time: 0.1678 Steps: 239000, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000448, Sample Num: 7168, Cur Loss: 0.00005313, Cur Avg Loss: 0.00023927, Log Avg loss: 0.00027818, Global Avg Loss: 0.00455418, Time: 0.0862 Steps: 239200, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000648, Sample Num: 10368, Cur Loss: 0.00000892, Cur Avg Loss: 0.00025530, Log Avg loss: 0.00029121, Global Avg Loss: 0.00455062, Time: 0.0623 Steps: 239400, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000848, Sample Num: 13568, Cur Loss: 0.00001426, Cur Avg Loss: 0.00027134, Log Avg loss: 0.00032329, Global Avg Loss: 0.00454709, Time: 0.0619 Steps: 239600, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 001048, Sample Num: 16768, Cur Loss: 0.00002076, Cur Avg Loss: 0.00024608, Log Avg loss: 0.00013898, Global Avg Loss: 0.00454342, Time: 0.0609 Steps: 239800, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 001248, Sample Num: 19968, Cur Loss: 0.00001896, Cur Avg Loss: 0.00023038, Log Avg loss: 0.00014811, Global Avg Loss: 0.00453975, Time: 0.0681 Steps: 240000, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 001448, Sample Num: 23168, Cur Loss: 0.00003891, Cur Avg Loss: 0.00021712, Log Avg loss: 0.00013441, Global Avg Loss: 0.00453609, Time: 0.1178 Steps: 240200, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 001648, Sample Num: 26368, Cur Loss: 0.00001799, Cur Avg Loss: 0.00020271, Log Avg loss: 0.00009835, Global Avg Loss: 0.00453239, Time: 0.0365 Steps: 240400, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 001848, Sample Num: 29568, Cur Loss: 0.00004444, Cur Avg Loss: 0.00019948, Log Avg loss: 0.00017291, Global Avg Loss: 0.00452877, Time: 0.1082 Steps: 240600, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002048, Sample Num: 32768, Cur Loss: 0.00003584, Cur Avg Loss: 0.00019895, Log Avg loss: 0.00019401, Global Avg Loss: 0.00452517, Time: 0.0631 Steps: 240800, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002248, Sample Num: 35968, Cur Loss: 0.00002618, Cur Avg Loss: 0.00020562, Log Avg loss: 0.00027394, Global Avg Loss: 0.00452164, Time: 0.0602 Steps: 241000, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002448, Sample Num: 39168, Cur Loss: 0.00008698, Cur Avg Loss: 0.00020441, Log Avg loss: 0.00019078, Global Avg Loss: 0.00451805, Time: 0.0600 Steps: 241200, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002648, Sample Num: 42368, Cur Loss: 0.00364823, Cur Avg Loss: 0.00020380, Log Avg loss: 0.00019634, Global Avg Loss: 0.00451447, Time: 0.1105 Steps: 241400, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002848, Sample Num: 45568, Cur Loss: 0.00001842, Cur Avg Loss: 0.00020329, Log Avg loss: 0.00019657, Global Avg Loss: 0.00451089, Time: 0.1223 Steps: 241600, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003048, Sample Num: 48768, Cur Loss: 0.00002708, Cur Avg Loss: 0.00020593, Log Avg loss: 0.00024348, Global Avg Loss: 0.00450737, Time: 0.1072 Steps: 241800, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003248, Sample Num: 51968, Cur Loss: 0.00002901, Cur Avg Loss: 0.00020141, Log Avg loss: 0.00013260, Global Avg Loss: 0.00450375, Time: 0.0619 Steps: 242000, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003448, Sample Num: 55168, Cur Loss: 0.00001240, Cur Avg Loss: 0.00020210, Log Avg loss: 0.00021323, Global Avg Loss: 0.00450021, Time: 0.0612 Steps: 242200, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003648, Sample Num: 58368, Cur Loss: 0.00027498, Cur Avg Loss: 0.00020354, Log Avg loss: 0.00022838, Global Avg Loss: 0.00449668, Time: 0.0818 Steps: 242400, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003848, Sample Num: 61568, Cur Loss: 0.00001119, Cur Avg Loss: 0.00020070, Log Avg loss: 0.00014881, Global Avg Loss: 0.00449310, Time: 0.1148 Steps: 242600, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004048, Sample Num: 64768, Cur Loss: 0.00000857, Cur Avg Loss: 0.00020138, Log Avg loss: 0.00021456, Global Avg Loss: 0.00448957, Time: 0.0611 Steps: 242800, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004248, Sample Num: 67968, Cur Loss: 0.00018542, Cur Avg Loss: 0.00020175, Log Avg loss: 0.00020916, Global Avg Loss: 0.00448605, Time: 0.0613 Steps: 243000, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004448, Sample Num: 71168, Cur Loss: 0.00003066, Cur Avg Loss: 0.00019964, Log Avg loss: 0.00015487, Global Avg Loss: 0.00448249, Time: 0.0703 Steps: 243200, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004648, Sample Num: 74368, Cur Loss: 0.00001302, Cur Avg Loss: 0.00020331, Log Avg loss: 0.00028504, Global Avg Loss: 0.00447904, Time: 0.0624 Steps: 243400, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004848, Sample Num: 77568, Cur Loss: 0.00016219, Cur Avg Loss: 0.00020503, Log Avg loss: 0.00024495, Global Avg Loss: 0.00447556, Time: 0.0608 Steps: 243600, Updated lr: 0.000002 ***** Running evaluation checkpoint-243726 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-243726 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 364.984570, Avg time per batch (s): 0.070000 {"eval_avg_loss": 0.001419, "eval_total_loss": 1.512717, "eval_acc": 0.999684, "eval_jaccard": 0.989032, "eval_prec": 0.990585, "eval_recall": 0.990676, "eval_f1": 0.990124, "eval_pr_auc": 0.995843, "eval_roc_auc": 0.999427, "eval_fmax": 0.995126, "eval_pmax": 0.997502, "eval_rmax": 0.992762, "eval_tmax": 0.27, "update_flag": false, "test_avg_loss": 0.00152, "test_total_loss": 1.620044, "test_acc": 0.999698, "test_jaccard": 0.988471, "test_prec": 0.989759, "test_recall": 0.990319, "test_f1": 0.989599, "test_pr_auc": 0.995199, "test_roc_auc": 0.999362, "test_fmax": 0.994701, "test_pmax": 0.996624, "test_rmax": 0.992785, "test_tmax": 0.2, "lr": 2.0080742834073478e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004473361993925647, "train_cur_epoch_loss": 1.0213709618871292, "train_cur_epoch_avg_loss": 0.00020534197062467414, "train_cur_epoch_time": 364.98456954956055, "train_cur_epoch_avg_time": 0.07337848201639738, "epoch": 49, "step": 243726} ################################################## Training, Epoch: 0050, Batch: 000074, Sample Num: 1184, Cur Loss: 0.00002322, Cur Avg Loss: 0.00017091, Log Avg loss: 0.00020013, Global Avg Loss: 0.00447206, Time: 0.1209 Steps: 243800, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000274, Sample Num: 4384, Cur Loss: 0.00002707, Cur Avg Loss: 0.00021364, Log Avg loss: 0.00022945, Global Avg Loss: 0.00446858, Time: 0.1507 Steps: 244000, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000474, Sample Num: 7584, Cur Loss: 0.00002644, Cur Avg Loss: 0.00028580, Log Avg loss: 0.00038466, Global Avg Loss: 0.00446523, Time: 0.0314 Steps: 244200, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000674, Sample Num: 10784, Cur Loss: 0.00006091, Cur Avg Loss: 0.00026413, Log Avg loss: 0.00021278, Global Avg Loss: 0.00446175, Time: 0.0608 Steps: 244400, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000874, Sample Num: 13984, Cur Loss: 0.00001814, Cur Avg Loss: 0.00026545, Log Avg loss: 0.00026989, Global Avg Loss: 0.00445833, Time: 0.0613 Steps: 244600, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 001074, Sample Num: 17184, Cur Loss: 0.00003635, Cur Avg Loss: 0.00024221, Log Avg loss: 0.00014065, Global Avg Loss: 0.00445480, Time: 0.0597 Steps: 244800, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 001274, Sample Num: 20384, Cur Loss: 0.00000265, Cur Avg Loss: 0.00022766, Log Avg loss: 0.00014955, Global Avg Loss: 0.00445128, Time: 0.0609 Steps: 245000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 001474, Sample Num: 23584, Cur Loss: 0.00005522, Cur Avg Loss: 0.00021294, Log Avg loss: 0.00011918, Global Avg Loss: 0.00444775, Time: 0.1127 Steps: 245200, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 001674, Sample Num: 26784, Cur Loss: 0.00001097, Cur Avg Loss: 0.00019915, Log Avg loss: 0.00009749, Global Avg Loss: 0.00444421, Time: 0.1169 Steps: 245400, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 001874, Sample Num: 29984, Cur Loss: 0.00000459, Cur Avg Loss: 0.00019287, Log Avg loss: 0.00014030, Global Avg Loss: 0.00444070, Time: 0.0254 Steps: 245600, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002074, Sample Num: 33184, Cur Loss: 0.00000680, Cur Avg Loss: 0.00019429, Log Avg loss: 0.00020758, Global Avg Loss: 0.00443726, Time: 0.0610 Steps: 245800, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002274, Sample Num: 36384, Cur Loss: 0.00003301, Cur Avg Loss: 0.00020228, Log Avg loss: 0.00028511, Global Avg Loss: 0.00443388, Time: 0.1064 Steps: 246000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002474, Sample Num: 39584, Cur Loss: 0.00007455, Cur Avg Loss: 0.00019707, Log Avg loss: 0.00013791, Global Avg Loss: 0.00443039, Time: 0.0395 Steps: 246200, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002674, Sample Num: 42784, Cur Loss: 0.00019593, Cur Avg Loss: 0.00019979, Log Avg loss: 0.00023335, Global Avg Loss: 0.00442698, Time: 0.0970 Steps: 246400, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002874, Sample Num: 45984, Cur Loss: 0.00003446, Cur Avg Loss: 0.00020550, Log Avg loss: 0.00028196, Global Avg Loss: 0.00442362, Time: 0.0593 Steps: 246600, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003074, Sample Num: 49184, Cur Loss: 0.00002649, Cur Avg Loss: 0.00020432, Log Avg loss: 0.00018726, Global Avg Loss: 0.00442019, Time: 0.0694 Steps: 246800, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003274, Sample Num: 52384, Cur Loss: 0.00001648, Cur Avg Loss: 0.00020083, Log Avg loss: 0.00014717, Global Avg Loss: 0.00441673, Time: 0.0610 Steps: 247000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003474, Sample Num: 55584, Cur Loss: 0.00005168, Cur Avg Loss: 0.00020037, Log Avg loss: 0.00019294, Global Avg Loss: 0.00441331, Time: 0.0608 Steps: 247200, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003674, Sample Num: 58784, Cur Loss: 0.00003251, Cur Avg Loss: 0.00020200, Log Avg loss: 0.00023029, Global Avg Loss: 0.00440993, Time: 0.0627 Steps: 247400, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003874, Sample Num: 61984, Cur Loss: 0.00000319, Cur Avg Loss: 0.00019765, Log Avg loss: 0.00011770, Global Avg Loss: 0.00440646, Time: 0.1053 Steps: 247600, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004074, Sample Num: 65184, Cur Loss: 0.00000491, Cur Avg Loss: 0.00019901, Log Avg loss: 0.00022538, Global Avg Loss: 0.00440309, Time: 0.0784 Steps: 247800, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004274, Sample Num: 68384, Cur Loss: 0.00002204, Cur Avg Loss: 0.00019880, Log Avg loss: 0.00019445, Global Avg Loss: 0.00439969, Time: 0.0629 Steps: 248000, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004474, Sample Num: 71584, Cur Loss: 0.00197906, Cur Avg Loss: 0.00019779, Log Avg loss: 0.00017618, Global Avg Loss: 0.00439629, Time: 0.1009 Steps: 248200, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004674, Sample Num: 74784, Cur Loss: 0.00003500, Cur Avg Loss: 0.00019936, Log Avg loss: 0.00023464, Global Avg Loss: 0.00439294, Time: 0.0689 Steps: 248400, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004874, Sample Num: 77984, Cur Loss: 0.00006215, Cur Avg Loss: 0.00020156, Log Avg loss: 0.00025300, Global Avg Loss: 0.00438961, Time: 0.0608 Steps: 248600, Updated lr: 0.000000 ***** Running evaluation checkpoint-248700 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-248700 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 393.016902, Avg time per batch (s): 0.080000 {"eval_avg_loss": 0.001418, "eval_total_loss": 1.511802, "eval_acc": 0.999686, "eval_jaccard": 0.989097, "eval_prec": 0.990688, "eval_recall": 0.990688, "eval_f1": 0.990186, "eval_pr_auc": 0.995865, "eval_roc_auc": 0.999432, "eval_fmax": 0.995131, "eval_pmax": 0.997512, "eval_rmax": 0.992762, "eval_tmax": 0.27, "update_flag": false, "test_avg_loss": 0.001518, "test_total_loss": 1.617903, "test_acc": 0.999698, "test_jaccard": 0.988456, "test_prec": 0.989788, "test_recall": 0.990285, "test_f1": 0.989583, "test_pr_auc": 0.995207, "test_roc_auc": 0.999363, "test_fmax": 0.99465, "test_pmax": 0.997006, "test_rmax": 0.992306, "test_tmax": 0.25, "lr": 0.0, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004387943434509547, "train_cur_epoch_loss": 1.006906831002972, "train_cur_epoch_avg_loss": 0.0002024340231208227, "train_cur_epoch_time": 393.0169024467468, "train_cur_epoch_avg_time": 0.07901425461333873, "epoch": 50, "step": 248700} ################################################## #########################Best Metric######################### {"epoch": 32, "global_step": 159168, "eval_avg_loss": 0.001375, "eval_total_loss": 1.466036, "eval_acc": 0.999687, "eval_jaccard": 0.989201, "eval_prec": 0.991054, "eval_recall": 0.990671, "eval_f1": 0.990349, "eval_pr_auc": 0.995735, "eval_roc_auc": 0.999415, "eval_fmax": 0.994909, "eval_pmax": 0.996898, "eval_rmax": 0.992929, "eval_tmax": 0.21, "update_flag": true, "test_avg_loss": 0.001485, "test_total_loss": 1.583407, "test_acc": 0.999683, "test_jaccard": 0.988019, "test_prec": 0.989767, "test_recall": 0.989882, "test_f1": 0.989306, "test_pr_auc": 0.995069, "test_roc_auc": 0.99934, "test_fmax": 0.994517, "test_pmax": 0.996798, "test_rmax": 0.992246, "test_tmax": 0.24} ################################################## Total Time: 206881.107923, Avg time per epoch(50 epochs): 4137.620000 ++++++++++++Validation+++++++++++++ best f1 global step: 159168 checkpoint path: ../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135319/checkpoint-159168 ***** Running evaluation checkpoint-159168 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## {"evaluation_avg_loss_159168": 0.001375, "evaluation_total_loss_159168": 1.466036, "evaluation_acc_159168": 0.999687, "evaluation_jaccard_159168": 0.989201, "evaluation_prec_159168": 0.991054, "evaluation_recall_159168": 0.990671, "evaluation_f1_159168": 0.990349, "evaluation_pr_auc_159168": 0.995735, "evaluation_roc_auc_159168": 0.999415, "evaluation_fmax_159168": 0.994909, "evaluation_pmax_159168": 0.996898, "evaluation_rmax_159168": 0.992929, "evaluation_tmax_159168": 0.21} ++++++++++++Testing+++++++++++++ best f1 global step: 159168 checkpoint path: ../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135319/checkpoint-159168 ***** Running testing checkpoint-159168 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## {"evaluation_avg_loss_159168": 0.001485, "evaluation_total_loss_159168": 1.583407, "evaluation_acc_159168": 0.999683, "evaluation_jaccard_159168": 0.988019, "evaluation_prec_159168": 0.989767, "evaluation_recall_159168": 0.989882, "evaluation_f1_159168": 0.989306, "evaluation_pr_auc_159168": 0.995069, "evaluation_roc_auc_159168": 0.99934, "evaluation_fmax_159168": 0.994517, "evaluation_pmax_159168": 0.996798, "evaluation_rmax_159168": 0.992246, "evaluation_tmax_159168": 0.24}