{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "f1", "beta1": 0.9, "beta2": 0.98, "buffer_size": 512, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "VirusEC4", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/VirusEC4/protein/multi_label/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 2560, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "matrix", "intermediate_size": 4096, "label_filepath": "../dataset/VirusEC4/protein/multi_label/label.txt", "label_size": 70, "label_type": "VirusEC4", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": "../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000", "llm_step": "3800000", "llm_task_level": "token_level,span_level,seq_level", "llm_time_str": "20240815023346", "llm_type": "lucaone_virus", "llm_version": "v1.0", "lmdb_path": null, "local_rank": -1, "log_dir": "../logs/VirusEC4/protein/multi_label/luca_base/matrix/20250501135254", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": true, "matrix_dirpath": "../matrices/VirusEC4/protein/multi_label/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "non_ignore": true, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 0, "num_hidden_layers": 0, "num_train_epochs": 50, "output_dir": "../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135254", "output_mode": "multi_label", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 1.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "null", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "multi_label", "tb_log_dir": "../tb-logs/VirusEC4/protein/multi_label/luca_base/matrix/20250501135254", "test_data_dir": "../dataset/VirusEC4/protein/multi_label/test/", "time_str": "20250501135258", "train_data_dir": "../dataset/VirusEC4/protein/multi_label/train/", "trunc_type": "right", "vector_dirpath": "../vectors/VirusEC4/protein/multi_label/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 1000, "weight": [1749.35, 1715.049019607843, 1666.047619047619, 1521.1739130434783, 1445.7438016528927, 1356.0852713178294, 1305.4850746268658, 1258.525179856115, 1240.6737588652481, 1150.8881578947369, 993.9488636363636, 920.7105263157895, 874.675, 813.6511627906976, 777.4888888888889, 774.0486725663717, 763.9082969432314, 650.3159851301115, 631.5342960288808, 629.2625899280575, 620.3368794326241, 615.9683098591549, 585.0668896321071, 567.9707792207793, 544.9688473520249, 539.9228395061729, 519.0949554896142, 510.01457725947523, 484.584487534626, 366.74004192872115, 354.1194331983806, 343.0098039215686, 331.3162878787879, 328.8251879699248, 328.2082551594747, 314.0664272890485, 312.38392857142856, 199.01592718998862, 186.49786780383795, 181.84511434511435, 172.01081612586037, 170.66829268292682, 156.05263157894737, 149.0076660988075, 134.4619523443505, 128.62867647058823, 127.68978102189782, 127.4107793153678, 127.0406681190995, 100.07723112128147, 94.0510752688172, 80.20861989912883, 78.0611334225792, 74.85451433461704, 65.00743218134522, 63.7983223924143, 44.866632469864065, 42.24462690171456, 41.661109788044776, 29.93924353927777, 27.193377895227734, 27.004476690336524, 25.790210821170575, 22.048777413662716, 21.104475811316203, 17.876047414674023, 10.421482187537233, 9.041970331317517, 17.4935, 7.06893764900796], "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,embedding_matrix ################################################## Encoder Config: {'llm_type': 'lucaone_virus', 'llm_version': 'v1.0', 'llm_step': '3800000', 'llm_dirpath': '../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000', 'input_type': 'matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/VirusEC4/protein/multi_label/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'matrix_dirpath': '../matrices/VirusEC4/protein/multi_label/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': True, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 2560, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "id2label": {}, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "label2id": {}, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4098, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "num_attention_heads": 8, "num_hidden_layers": 4, "pad_token_id": 0, "pos_weight": 1.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": null, "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.29.0", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39, "weight": [ 1749.35, 1715.049019607843, 1666.047619047619, 1521.1739130434783, 1445.7438016528927, 1356.0852713178294, 1305.4850746268658, 1258.525179856115, 1240.6737588652481, 1150.8881578947369, 993.9488636363636, 920.7105263157895, 874.675, 813.6511627906976, 777.4888888888889, 774.0486725663717, 763.9082969432314, 650.3159851301115, 631.5342960288808, 629.2625899280575, 620.3368794326241, 615.9683098591549, 585.0668896321071, 567.9707792207793, 544.9688473520249, 539.9228395061729, 519.0949554896142, 510.01457725947523, 484.584487534626, 366.74004192872115, 354.1194331983806, 343.0098039215686, 331.3162878787879, 328.8251879699248, 328.2082551594747, 314.0664272890485, 312.38392857142856, 199.01592718998862, 186.49786780383795, 181.84511434511435, 172.01081612586037, 170.66829268292682, 156.05263157894737, 149.0076660988075, 134.4619523443505, 128.62867647058823, 127.68978102189782, 127.4107793153678, 127.0406681190995, 100.07723112128147, 94.0510752688172, 80.20861989912883, 78.0611334225792, 74.85451433461704, 65.00743218134522, 63.7983223924143, 44.866632469864065, 42.24462690171456, 41.661109788044776, 29.93924353927777, 27.193377895227734, 27.004476690336524, 25.790210821170575, 22.048777413662716, 21.104475811316203, 17.876047414674023, 10.421482187537233, 9.041970331317517, 17.4935, 7.06893764900796 ] } ################################################## Mode Architecture: LucaBase( (matrix_pooler): GlobalMaskValueAttentionPooling1D (2560 -> 2560) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=2560, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=128, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=70, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 20014150 ################################################## {"total_num": "19.090000M", "total_size": "76.350000MB", "param_sum": "19.090000M", "param_size": "76.350000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "19.086981M", "trainable_size": "76.347923MB"} ################################################## Train dataset len: 79578, batch size: 16, batch num: 4974 Train dataset t_total: 248700, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 79578 Train Dataset Num Epochs = 50 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 248700 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.68436402, Cur Avg Loss: 0.69114187, Log Avg loss: 0.69114187, Global Avg Loss: 0.69114187, Time: 0.3853 Steps: 200, Updated lr: 0.000020 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.38318467, Cur Avg Loss: 0.64461094, Log Avg loss: 0.59808000, Global Avg Loss: 0.64461094, Time: 0.3609 Steps: 400, Updated lr: 0.000040 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.08144163, Cur Avg Loss: 0.48210764, Log Avg loss: 0.15710105, Global Avg Loss: 0.48210764, Time: 0.2990 Steps: 600, Updated lr: 0.000060 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.08503893, Cur Avg Loss: 0.38265111, Log Avg loss: 0.08428153, Global Avg Loss: 0.38265111, Time: 0.2126 Steps: 800, Updated lr: 0.000080 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.08394705, Cur Avg Loss: 0.32210511, Log Avg loss: 0.07992109, Global Avg Loss: 0.32210511, Time: 0.2200 Steps: 1000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.05381764, Cur Avg Loss: 0.27998738, Log Avg loss: 0.06939876, Global Avg Loss: 0.27998738, Time: 0.2164 Steps: 1200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.04026909, Cur Avg Loss: 0.24765643, Log Avg loss: 0.05367072, Global Avg Loss: 0.24765643, Time: 0.1192 Steps: 1400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 0.04557987, Cur Avg Loss: 0.22230979, Log Avg loss: 0.04488333, Global Avg Loss: 0.22230979, Time: 0.3000 Steps: 1600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.04752219, Cur Avg Loss: 0.20191127, Log Avg loss: 0.03872309, Global Avg Loss: 0.20191127, Time: 0.2213 Steps: 1800, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.04121159, Cur Avg Loss: 0.18528947, Log Avg loss: 0.03569325, Global Avg Loss: 0.18528947, Time: 0.2166 Steps: 2000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.02605342, Cur Avg Loss: 0.17148075, Log Avg loss: 0.03339356, Global Avg Loss: 0.17148075, Time: 0.2198 Steps: 2200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.02992425, Cur Avg Loss: 0.15952492, Log Avg loss: 0.02801084, Global Avg Loss: 0.15952492, Time: 0.4001 Steps: 2400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.02721773, Cur Avg Loss: 0.14921534, Log Avg loss: 0.02550033, Global Avg Loss: 0.14921534, Time: 0.3119 Steps: 2600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.01602932, Cur Avg Loss: 0.14025444, Log Avg loss: 0.02376274, Global Avg Loss: 0.14025444, Time: 0.2862 Steps: 2800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.02590895, Cur Avg Loss: 0.13236061, Log Avg loss: 0.02184692, Global Avg Loss: 0.13236061, Time: 0.2558 Steps: 3000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.03724291, Cur Avg Loss: 0.12538277, Log Avg loss: 0.02071516, Global Avg Loss: 0.12538277, Time: 0.2491 Steps: 3200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.01917133, Cur Avg Loss: 0.11917651, Log Avg loss: 0.01987650, Global Avg Loss: 0.11917651, Time: 0.3960 Steps: 3400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.01556698, Cur Avg Loss: 0.11355661, Log Avg loss: 0.01801816, Global Avg Loss: 0.11355661, Time: 0.2820 Steps: 3600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.01219468, Cur Avg Loss: 0.10850631, Log Avg loss: 0.01760101, Global Avg Loss: 0.10850631, Time: 0.4620 Steps: 3800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00657699, Cur Avg Loss: 0.10390875, Log Avg loss: 0.01655502, Global Avg Loss: 0.10390875, Time: 0.2402 Steps: 4000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.01095279, Cur Avg Loss: 0.09971375, Log Avg loss: 0.01581391, Global Avg Loss: 0.09971375, Time: 0.3959 Steps: 4200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00805394, Cur Avg Loss: 0.09583435, Log Avg loss: 0.01436690, Global Avg Loss: 0.09583435, Time: 0.2730 Steps: 4400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.01780123, Cur Avg Loss: 0.09226139, Log Avg loss: 0.01365622, Global Avg Loss: 0.09226139, Time: 0.2952 Steps: 4600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.02541198, Cur Avg Loss: 0.08894705, Log Avg loss: 0.01271726, Global Avg Loss: 0.08894705, Time: 0.4438 Steps: 4800, Updated lr: 0.000098 ***** Running evaluation checkpoint-4974 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-4974 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1475.795128, Avg time per batch (s): 0.300000 {"eval_avg_loss": 0.010869, "eval_total_loss": 11.586304, "eval_acc": 0.996495, "eval_jaccard": 0.777114, "eval_prec": 0.7817, "eval_recall": 0.779048, "eval_f1": 0.779422, "eval_pr_auc": 0.933387, "eval_roc_auc": 0.990965, "eval_fmax": 0.933962, "eval_pmax": 0.969813, "eval_rmax": 0.900668, "eval_tmax": 0.18, "update_flag": true, "test_avg_loss": 0.011171, "test_total_loss": 11.908732, "test_acc": 0.996413, "test_jaccard": 0.769473, "test_prec": 0.774395, "test_recall": 0.771243, "test_f1": 0.771905, "test_pr_auc": 0.930486, "test_roc_auc": 0.990145, "test_fmax": 0.932814, "test_pmax": 0.963691, "test_rmax": 0.903855, "test_tmax": 0.17, "lr": 9.839563988696004e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.08625709811433555, "train_cur_epoch_loss": 429.04280602070503, "train_cur_epoch_avg_loss": 0.08625709811433555, "train_cur_epoch_time": 1475.7951278686523, "train_cur_epoch_avg_time": 0.2967018753254227, "epoch": 1, "step": 4974} ################################################## Training, Epoch: 0002, Batch: 000026, Sample Num: 416, Cur Loss: 0.00675655, Cur Avg Loss: 0.00977812, Log Avg loss: 0.01175596, Global Avg Loss: 0.08585941, Time: 0.1605 Steps: 5000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000226, Sample Num: 3616, Cur Loss: 0.00682156, Cur Avg Loss: 0.01110626, Log Avg loss: 0.01127892, Global Avg Loss: 0.08299093, Time: 0.4344 Steps: 5200, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000426, Sample Num: 6816, Cur Loss: 0.01538427, Cur Avg Loss: 0.01082492, Log Avg loss: 0.01050701, Global Avg Loss: 0.08030634, Time: 0.3709 Steps: 5400, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000626, Sample Num: 10016, Cur Loss: 0.01309291, Cur Avg Loss: 0.01068992, Log Avg loss: 0.01040236, Global Avg Loss: 0.07780977, Time: 0.4182 Steps: 5600, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 000826, Sample Num: 13216, Cur Loss: 0.00608494, Cur Avg Loss: 0.01026578, Log Avg loss: 0.00893822, Global Avg Loss: 0.07543489, Time: 0.2527 Steps: 5800, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001026, Sample Num: 16416, Cur Loss: 0.00330667, Cur Avg Loss: 0.00996094, Log Avg loss: 0.00870195, Global Avg Loss: 0.07321045, Time: 0.4408 Steps: 6000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001226, Sample Num: 19616, Cur Loss: 0.01176735, Cur Avg Loss: 0.00970717, Log Avg loss: 0.00840532, Global Avg Loss: 0.07111997, Time: 0.1806 Steps: 6200, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001426, Sample Num: 22816, Cur Loss: 0.00756029, Cur Avg Loss: 0.00936031, Log Avg loss: 0.00723409, Global Avg Loss: 0.06912353, Time: 0.4455 Steps: 6400, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001626, Sample Num: 26016, Cur Loss: 0.00901971, Cur Avg Loss: 0.00908923, Log Avg loss: 0.00715642, Global Avg Loss: 0.06724574, Time: 0.4387 Steps: 6600, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 001826, Sample Num: 29216, Cur Loss: 0.00654550, Cur Avg Loss: 0.00880496, Log Avg loss: 0.00649380, Global Avg Loss: 0.06545892, Time: 0.5463 Steps: 6800, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 002026, Sample Num: 32416, Cur Loss: 0.00433186, Cur Avg Loss: 0.00859891, Log Avg loss: 0.00671770, Global Avg Loss: 0.06378060, Time: 0.3147 Steps: 7000, Updated lr: 0.000098 Training, Epoch: 0002, Batch: 002226, Sample Num: 35616, Cur Loss: 0.00214595, Cur Avg Loss: 0.00838863, Log Avg loss: 0.00625846, Global Avg Loss: 0.06218276, Time: 0.4398 Steps: 7200, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 002426, Sample Num: 38816, Cur Loss: 0.00108881, Cur Avg Loss: 0.00814547, Log Avg loss: 0.00543913, Global Avg Loss: 0.06064915, Time: 0.3868 Steps: 7400, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 002626, Sample Num: 42016, Cur Loss: 0.00458291, Cur Avg Loss: 0.00790476, Log Avg loss: 0.00498501, Global Avg Loss: 0.05918430, Time: 0.4547 Steps: 7600, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 002826, Sample Num: 45216, Cur Loss: 0.00242554, Cur Avg Loss: 0.00770721, Log Avg loss: 0.00511338, Global Avg Loss: 0.05779787, Time: 0.1380 Steps: 7800, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003026, Sample Num: 48416, Cur Loss: 0.00870422, Cur Avg Loss: 0.00752310, Log Avg loss: 0.00492152, Global Avg Loss: 0.05647596, Time: 0.4630 Steps: 8000, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003226, Sample Num: 51616, Cur Loss: 0.00687357, Cur Avg Loss: 0.00735396, Log Avg loss: 0.00479497, Global Avg Loss: 0.05521545, Time: 0.3596 Steps: 8200, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003426, Sample Num: 54816, Cur Loss: 0.00057565, Cur Avg Loss: 0.00719413, Log Avg loss: 0.00461611, Global Avg Loss: 0.05401070, Time: 0.2228 Steps: 8400, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003626, Sample Num: 58016, Cur Loss: 0.01529327, Cur Avg Loss: 0.00703930, Log Avg loss: 0.00438698, Global Avg Loss: 0.05285666, Time: 0.1305 Steps: 8600, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 003826, Sample Num: 61216, Cur Loss: 0.00450721, Cur Avg Loss: 0.00687994, Log Avg loss: 0.00399085, Global Avg Loss: 0.05174608, Time: 0.2241 Steps: 8800, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004026, Sample Num: 64416, Cur Loss: 0.00108496, Cur Avg Loss: 0.00675470, Log Avg loss: 0.00435879, Global Avg Loss: 0.05069303, Time: 0.3366 Steps: 9000, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004226, Sample Num: 67616, Cur Loss: 0.00118820, Cur Avg Loss: 0.00663309, Log Avg loss: 0.00418513, Global Avg Loss: 0.04968198, Time: 0.2252 Steps: 9200, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004426, Sample Num: 70816, Cur Loss: 0.00197840, Cur Avg Loss: 0.00650688, Log Avg loss: 0.00384011, Global Avg Loss: 0.04870663, Time: 0.2191 Steps: 9400, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004626, Sample Num: 74016, Cur Loss: 0.00046250, Cur Avg Loss: 0.00637931, Log Avg loss: 0.00355598, Global Avg Loss: 0.04776599, Time: 0.2240 Steps: 9600, Updated lr: 0.000097 Training, Epoch: 0002, Batch: 004826, Sample Num: 77216, Cur Loss: 0.00313803, Cur Avg Loss: 0.00626369, Log Avg loss: 0.00358946, Global Avg Loss: 0.04686443, Time: 0.0858 Steps: 9800, Updated lr: 0.000096 ***** Running evaluation checkpoint-9948 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-9948 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1820.176401, Avg time per batch (s): 0.370000 {"eval_avg_loss": 0.003207, "eval_total_loss": 3.418378, "eval_acc": 0.998977, "eval_jaccard": 0.94377, "eval_prec": 0.946309, "eval_recall": 0.946886, "eval_f1": 0.945728, "eval_pr_auc": 0.987761, "eval_roc_auc": 0.998734, "eval_fmax": 0.98325, "eval_pmax": 0.988366, "eval_rmax": 0.978187, "eval_tmax": 0.21, "update_flag": true, "test_avg_loss": 0.003304, "test_total_loss": 3.522569, "test_acc": 0.998954, "test_jaccard": 0.939885, "test_prec": 0.942089, "test_recall": 0.942964, "test_f1": 0.941758, "test_pr_auc": 0.986318, "test_roc_auc": 0.998401, "test_fmax": 0.982443, "test_pmax": 0.985956, "test_rmax": 0.978955, "test_tmax": 0.19, "lr": 9.638756560355269e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0462163591722608, "train_cur_epoch_loss": 30.71753502494539, "train_cur_epoch_avg_loss": 0.006175620230186045, "train_cur_epoch_time": 1820.176400899887, "train_cur_epoch_avg_time": 0.36593815860472195, "epoch": 2, "step": 9948} ################################################## Training, Epoch: 0003, Batch: 000052, Sample Num: 832, Cur Loss: 0.00021376, Cur Avg Loss: 0.00319704, Log Avg loss: 0.00327610, Global Avg Loss: 0.04599266, Time: 0.2232 Steps: 10000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000252, Sample Num: 4032, Cur Loss: 0.00112688, Cur Avg Loss: 0.00330904, Log Avg loss: 0.00333816, Global Avg Loss: 0.04515630, Time: 0.5211 Steps: 10200, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000452, Sample Num: 7232, Cur Loss: 0.00960930, Cur Avg Loss: 0.00333276, Log Avg loss: 0.00336264, Global Avg Loss: 0.04435257, Time: 0.4723 Steps: 10400, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000652, Sample Num: 10432, Cur Loss: 0.00284966, Cur Avg Loss: 0.00336834, Log Avg loss: 0.00344874, Global Avg Loss: 0.04358080, Time: 0.3622 Steps: 10600, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 000852, Sample Num: 13632, Cur Loss: 0.00134361, Cur Avg Loss: 0.00325503, Log Avg loss: 0.00288567, Global Avg Loss: 0.04282719, Time: 0.2556 Steps: 10800, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001052, Sample Num: 16832, Cur Loss: 0.00711923, Cur Avg Loss: 0.00318025, Log Avg loss: 0.00286169, Global Avg Loss: 0.04210054, Time: 0.3065 Steps: 11000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001252, Sample Num: 20032, Cur Loss: 0.00047126, Cur Avg Loss: 0.00312097, Log Avg loss: 0.00280915, Global Avg Loss: 0.04139891, Time: 0.3116 Steps: 11200, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001452, Sample Num: 23232, Cur Loss: 0.00112872, Cur Avg Loss: 0.00303111, Log Avg loss: 0.00246860, Global Avg Loss: 0.04071592, Time: 0.2165 Steps: 11400, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001652, Sample Num: 26432, Cur Loss: 0.00144991, Cur Avg Loss: 0.00293959, Log Avg loss: 0.00227510, Global Avg Loss: 0.04005315, Time: 0.2116 Steps: 11600, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 001852, Sample Num: 29632, Cur Loss: 0.00990836, Cur Avg Loss: 0.00286642, Log Avg loss: 0.00226208, Global Avg Loss: 0.03941262, Time: 0.2280 Steps: 11800, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 002052, Sample Num: 32832, Cur Loss: 0.00312825, Cur Avg Loss: 0.00283355, Log Avg loss: 0.00252909, Global Avg Loss: 0.03879790, Time: 0.2951 Steps: 12000, Updated lr: 0.000096 Training, Epoch: 0003, Batch: 002252, Sample Num: 36032, Cur Loss: 0.00092381, Cur Avg Loss: 0.00277756, Log Avg loss: 0.00220320, Global Avg Loss: 0.03819798, Time: 0.2709 Steps: 12200, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 002452, Sample Num: 39232, Cur Loss: 0.00185985, Cur Avg Loss: 0.00270262, Log Avg loss: 0.00185880, Global Avg Loss: 0.03761187, Time: 0.2201 Steps: 12400, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 002652, Sample Num: 42432, Cur Loss: 0.00082354, Cur Avg Loss: 0.00265887, Log Avg loss: 0.00212238, Global Avg Loss: 0.03704854, Time: 0.2158 Steps: 12600, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 002852, Sample Num: 45632, Cur Loss: 0.00061234, Cur Avg Loss: 0.00259249, Log Avg loss: 0.00171241, Global Avg Loss: 0.03649642, Time: 0.2238 Steps: 12800, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003052, Sample Num: 48832, Cur Loss: 0.00054793, Cur Avg Loss: 0.00255053, Log Avg loss: 0.00195219, Global Avg Loss: 0.03596497, Time: 0.2426 Steps: 13000, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003252, Sample Num: 52032, Cur Loss: 0.00341766, Cur Avg Loss: 0.00250094, Log Avg loss: 0.00174413, Global Avg Loss: 0.03544647, Time: 0.2373 Steps: 13200, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003452, Sample Num: 55232, Cur Loss: 0.00933162, Cur Avg Loss: 0.00246727, Log Avg loss: 0.00191985, Global Avg Loss: 0.03494607, Time: 0.2390 Steps: 13400, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003652, Sample Num: 58432, Cur Loss: 0.00048865, Cur Avg Loss: 0.00242643, Log Avg loss: 0.00172138, Global Avg Loss: 0.03445747, Time: 0.3180 Steps: 13600, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 003852, Sample Num: 61632, Cur Loss: 0.00204877, Cur Avg Loss: 0.00238054, Log Avg loss: 0.00154265, Global Avg Loss: 0.03398045, Time: 0.2738 Steps: 13800, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004052, Sample Num: 64832, Cur Loss: 0.00196089, Cur Avg Loss: 0.00235767, Log Avg loss: 0.00191730, Global Avg Loss: 0.03352240, Time: 0.3562 Steps: 14000, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004252, Sample Num: 68032, Cur Loss: 0.00086555, Cur Avg Loss: 0.00232690, Log Avg loss: 0.00170335, Global Avg Loss: 0.03307425, Time: 0.3694 Steps: 14200, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004452, Sample Num: 71232, Cur Loss: 0.00290496, Cur Avg Loss: 0.00229754, Log Avg loss: 0.00167337, Global Avg Loss: 0.03263812, Time: 0.2568 Steps: 14400, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004652, Sample Num: 74432, Cur Loss: 0.00008833, Cur Avg Loss: 0.00226422, Log Avg loss: 0.00152263, Global Avg Loss: 0.03221188, Time: 0.2113 Steps: 14600, Updated lr: 0.000095 Training, Epoch: 0003, Batch: 004852, Sample Num: 77632, Cur Loss: 0.00161792, Cur Avg Loss: 0.00223492, Log Avg loss: 0.00155340, Global Avg Loss: 0.03179758, Time: 0.2212 Steps: 14800, Updated lr: 0.000094 ***** Running evaluation checkpoint-14922 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-14922 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1329.598547, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001851, "eval_total_loss": 1.973658, "eval_acc": 0.999532, "eval_jaccard": 0.979468, "eval_prec": 0.981362, "eval_recall": 0.980989, "eval_f1": 0.980711, "eval_pr_auc": 0.993255, "eval_roc_auc": 0.999253, "eval_fmax": 0.991633, "eval_pmax": 0.994379, "eval_rmax": 0.988901, "eval_tmax": 0.19, "update_flag": true, "test_avg_loss": 0.001919, "test_total_loss": 2.045512, "test_acc": 0.999542, "test_jaccard": 0.978124, "test_prec": 0.979623, "test_recall": 0.979933, "test_f1": 0.979363, "test_pr_auc": 0.992467, "test_roc_auc": 0.998984, "test_fmax": 0.991272, "test_pmax": 0.996309, "test_rmax": 0.986285, "test_tmax": 0.28, "lr": 9.437949132014535e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.03154864726656286, "train_cur_epoch_loss": 11.008573466000598, "train_cur_epoch_avg_loss": 0.002213223455166988, "train_cur_epoch_time": 1329.59854722023, "train_cur_epoch_avg_time": 0.26730971998798353, "epoch": 3, "step": 14922} ################################################## Training, Epoch: 0004, Batch: 000078, Sample Num: 1248, Cur Loss: 0.00189275, Cur Avg Loss: 0.00122674, Log Avg loss: 0.00130208, Global Avg Loss: 0.03139097, Time: 0.2614 Steps: 15000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000278, Sample Num: 4448, Cur Loss: 0.00158418, Cur Avg Loss: 0.00143799, Log Avg loss: 0.00152037, Global Avg Loss: 0.03099794, Time: 0.4217 Steps: 15200, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000478, Sample Num: 7648, Cur Loss: 0.00489974, Cur Avg Loss: 0.00145967, Log Avg loss: 0.00148980, Global Avg Loss: 0.03061472, Time: 0.3961 Steps: 15400, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000678, Sample Num: 10848, Cur Loss: 0.00012026, Cur Avg Loss: 0.00152728, Log Avg loss: 0.00168889, Global Avg Loss: 0.03024387, Time: 0.2194 Steps: 15600, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 000878, Sample Num: 14048, Cur Loss: 0.00118458, Cur Avg Loss: 0.00147980, Log Avg loss: 0.00131885, Global Avg Loss: 0.02987773, Time: 0.2502 Steps: 15800, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001078, Sample Num: 17248, Cur Loss: 0.00052774, Cur Avg Loss: 0.00146763, Log Avg loss: 0.00141419, Global Avg Loss: 0.02952194, Time: 0.3108 Steps: 16000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001278, Sample Num: 20448, Cur Loss: 0.00081665, Cur Avg Loss: 0.00142491, Log Avg loss: 0.00119463, Global Avg Loss: 0.02917222, Time: 0.2212 Steps: 16200, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001478, Sample Num: 23648, Cur Loss: 0.00152000, Cur Avg Loss: 0.00136155, Log Avg loss: 0.00095670, Global Avg Loss: 0.02882813, Time: 0.2205 Steps: 16400, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001678, Sample Num: 26848, Cur Loss: 0.00155272, Cur Avg Loss: 0.00131287, Log Avg loss: 0.00095310, Global Avg Loss: 0.02849228, Time: 0.1305 Steps: 16600, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 001878, Sample Num: 30048, Cur Loss: 0.00177240, Cur Avg Loss: 0.00127299, Log Avg loss: 0.00093842, Global Avg Loss: 0.02816426, Time: 0.2202 Steps: 16800, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 002078, Sample Num: 33248, Cur Loss: 0.00049093, Cur Avg Loss: 0.00127322, Log Avg loss: 0.00127540, Global Avg Loss: 0.02784792, Time: 0.2173 Steps: 17000, Updated lr: 0.000094 Training, Epoch: 0004, Batch: 002278, Sample Num: 36448, Cur Loss: 0.00517408, Cur Avg Loss: 0.00125729, Log Avg loss: 0.00109178, Global Avg Loss: 0.02753680, Time: 0.2190 Steps: 17200, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 002478, Sample Num: 39648, Cur Loss: 0.00036762, Cur Avg Loss: 0.00121736, Log Avg loss: 0.00076257, Global Avg Loss: 0.02722905, Time: 0.2204 Steps: 17400, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 002678, Sample Num: 42848, Cur Loss: 0.00003194, Cur Avg Loss: 0.00120623, Log Avg loss: 0.00106826, Global Avg Loss: 0.02693177, Time: 0.1296 Steps: 17600, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 002878, Sample Num: 46048, Cur Loss: 0.00117283, Cur Avg Loss: 0.00118653, Log Avg loss: 0.00092281, Global Avg Loss: 0.02663954, Time: 0.2173 Steps: 17800, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003078, Sample Num: 49248, Cur Loss: 0.00049655, Cur Avg Loss: 0.00116653, Log Avg loss: 0.00087870, Global Avg Loss: 0.02635331, Time: 0.2231 Steps: 18000, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003278, Sample Num: 52448, Cur Loss: 0.00043120, Cur Avg Loss: 0.00114177, Log Avg loss: 0.00076074, Global Avg Loss: 0.02607207, Time: 0.2132 Steps: 18200, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003478, Sample Num: 55648, Cur Loss: 0.00094743, Cur Avg Loss: 0.00113500, Log Avg loss: 0.00102403, Global Avg Loss: 0.02579981, Time: 0.2717 Steps: 18400, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003678, Sample Num: 58848, Cur Loss: 0.00014735, Cur Avg Loss: 0.00112468, Log Avg loss: 0.00094511, Global Avg Loss: 0.02553255, Time: 0.2204 Steps: 18600, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 003878, Sample Num: 62048, Cur Loss: 0.00009820, Cur Avg Loss: 0.00110170, Log Avg loss: 0.00067917, Global Avg Loss: 0.02526815, Time: 0.2183 Steps: 18800, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004078, Sample Num: 65248, Cur Loss: 0.00014013, Cur Avg Loss: 0.00109473, Log Avg loss: 0.00095959, Global Avg Loss: 0.02501227, Time: 0.2479 Steps: 19000, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004278, Sample Num: 68448, Cur Loss: 0.00022107, Cur Avg Loss: 0.00108197, Log Avg loss: 0.00082176, Global Avg Loss: 0.02476029, Time: 0.2213 Steps: 19200, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004478, Sample Num: 71648, Cur Loss: 0.00025412, Cur Avg Loss: 0.00107011, Log Avg loss: 0.00081651, Global Avg Loss: 0.02451345, Time: 0.2199 Steps: 19400, Updated lr: 0.000093 Training, Epoch: 0004, Batch: 004678, Sample Num: 74848, Cur Loss: 0.00038906, Cur Avg Loss: 0.00105946, Log Avg loss: 0.00082091, Global Avg Loss: 0.02427169, Time: 0.2201 Steps: 19600, Updated lr: 0.000092 Training, Epoch: 0004, Batch: 004878, Sample Num: 78048, Cur Loss: 0.00030512, Cur Avg Loss: 0.00105157, Log Avg loss: 0.00086710, Global Avg Loss: 0.02403528, Time: 0.2222 Steps: 19800, Updated lr: 0.000092 ***** Running evaluation checkpoint-19896 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-19896 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1330.498955, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001566, "eval_total_loss": 1.669553, "eval_acc": 0.9996, "eval_jaccard": 0.983632, "eval_prec": 0.985882, "eval_recall": 0.985181, "eval_f1": 0.984972, "eval_pr_auc": 0.994299, "eval_roc_auc": 0.999345, "eval_fmax": 0.993288, "eval_pmax": 0.996071, "eval_rmax": 0.99052, "eval_tmax": 0.19, "update_flag": true, "test_avg_loss": 0.001635, "test_total_loss": 1.74271, "test_acc": 0.999623, "test_jaccard": 0.982624, "test_prec": 0.984318, "test_recall": 0.98403, "test_f1": 0.983739, "test_pr_auc": 0.993694, "test_roc_auc": 0.999112, "test_fmax": 0.992688, "test_pmax": 0.995993, "test_rmax": 0.989405, "test_tmax": 0.18, "lr": 9.237141703673799e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.023923227680563517, "train_cur_epoch_loss": 5.207623420840719, "train_cur_epoch_avg_loss": 0.0010469689225654842, "train_cur_epoch_time": 1330.498955488205, "train_cur_epoch_avg_time": 0.26749074296103836, "epoch": 4, "step": 19896} ################################################## Training, Epoch: 0005, Batch: 000104, Sample Num: 1664, Cur Loss: 0.00084881, Cur Avg Loss: 0.00081513, Log Avg loss: 0.00081416, Global Avg Loss: 0.02380307, Time: 0.4328 Steps: 20000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000304, Sample Num: 4864, Cur Loss: 0.00016029, Cur Avg Loss: 0.00083159, Log Avg loss: 0.00084014, Global Avg Loss: 0.02357571, Time: 0.4525 Steps: 20200, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000504, Sample Num: 8064, Cur Loss: 0.00008294, Cur Avg Loss: 0.00086505, Log Avg loss: 0.00091593, Global Avg Loss: 0.02335356, Time: 0.2191 Steps: 20400, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000704, Sample Num: 11264, Cur Loss: 0.00011155, Cur Avg Loss: 0.00087397, Log Avg loss: 0.00089644, Global Avg Loss: 0.02313552, Time: 0.3033 Steps: 20600, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 000904, Sample Num: 14464, Cur Loss: 0.00096535, Cur Avg Loss: 0.00085343, Log Avg loss: 0.00078113, Global Avg Loss: 0.02292058, Time: 0.2155 Steps: 20800, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001104, Sample Num: 17664, Cur Loss: 0.00020799, Cur Avg Loss: 0.00084185, Log Avg loss: 0.00078949, Global Avg Loss: 0.02270981, Time: 0.2160 Steps: 21000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001304, Sample Num: 20864, Cur Loss: 0.00074216, Cur Avg Loss: 0.00080454, Log Avg loss: 0.00059862, Global Avg Loss: 0.02250121, Time: 0.2199 Steps: 21200, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001504, Sample Num: 24064, Cur Loss: 0.00021350, Cur Avg Loss: 0.00074945, Log Avg loss: 0.00039025, Global Avg Loss: 0.02229457, Time: 0.4243 Steps: 21400, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001704, Sample Num: 27264, Cur Loss: 0.00007435, Cur Avg Loss: 0.00073116, Log Avg loss: 0.00059363, Global Avg Loss: 0.02209363, Time: 0.2651 Steps: 21600, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 001904, Sample Num: 30464, Cur Loss: 0.00013331, Cur Avg Loss: 0.00070927, Log Avg loss: 0.00052273, Global Avg Loss: 0.02189573, Time: 0.2670 Steps: 21800, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 002104, Sample Num: 33664, Cur Loss: 0.00003376, Cur Avg Loss: 0.00071695, Log Avg loss: 0.00079008, Global Avg Loss: 0.02170386, Time: 0.2181 Steps: 22000, Updated lr: 0.000092 Training, Epoch: 0005, Batch: 002304, Sample Num: 36864, Cur Loss: 0.00049981, Cur Avg Loss: 0.00070465, Log Avg loss: 0.00057525, Global Avg Loss: 0.02151352, Time: 0.2544 Steps: 22200, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 002504, Sample Num: 40064, Cur Loss: 0.00008408, Cur Avg Loss: 0.00068391, Log Avg loss: 0.00044503, Global Avg Loss: 0.02132540, Time: 0.2616 Steps: 22400, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 002704, Sample Num: 43264, Cur Loss: 0.00030139, Cur Avg Loss: 0.00068591, Log Avg loss: 0.00071086, Global Avg Loss: 0.02114297, Time: 0.0999 Steps: 22600, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 002904, Sample Num: 46464, Cur Loss: 0.00006614, Cur Avg Loss: 0.00067558, Log Avg loss: 0.00053598, Global Avg Loss: 0.02096221, Time: 0.2492 Steps: 22800, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003104, Sample Num: 49664, Cur Loss: 0.00273199, Cur Avg Loss: 0.00066401, Log Avg loss: 0.00049605, Global Avg Loss: 0.02078424, Time: 0.2453 Steps: 23000, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003304, Sample Num: 52864, Cur Loss: 0.00021786, Cur Avg Loss: 0.00064887, Log Avg loss: 0.00041386, Global Avg Loss: 0.02060864, Time: 0.2180 Steps: 23200, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003504, Sample Num: 56064, Cur Loss: 0.00019912, Cur Avg Loss: 0.00064991, Log Avg loss: 0.00066706, Global Avg Loss: 0.02043820, Time: 0.2529 Steps: 23400, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003704, Sample Num: 59264, Cur Loss: 0.00514044, Cur Avg Loss: 0.00064952, Log Avg loss: 0.00064278, Global Avg Loss: 0.02027044, Time: 0.1448 Steps: 23600, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 003904, Sample Num: 62464, Cur Loss: 0.00014946, Cur Avg Loss: 0.00063683, Log Avg loss: 0.00040169, Global Avg Loss: 0.02010348, Time: 0.2173 Steps: 23800, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004104, Sample Num: 65664, Cur Loss: 0.00284563, Cur Avg Loss: 0.00064004, Log Avg loss: 0.00070274, Global Avg Loss: 0.01994180, Time: 0.3797 Steps: 24000, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004304, Sample Num: 68864, Cur Loss: 0.00002329, Cur Avg Loss: 0.00063135, Log Avg loss: 0.00045300, Global Avg Loss: 0.01978074, Time: 0.3250 Steps: 24200, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004504, Sample Num: 72064, Cur Loss: 0.00104907, Cur Avg Loss: 0.00063203, Log Avg loss: 0.00064673, Global Avg Loss: 0.01962390, Time: 0.3151 Steps: 24400, Updated lr: 0.000091 Training, Epoch: 0005, Batch: 004704, Sample Num: 75264, Cur Loss: 0.00010521, Cur Avg Loss: 0.00062407, Log Avg loss: 0.00044482, Global Avg Loss: 0.01946797, Time: 0.2406 Steps: 24600, Updated lr: 0.000090 Training, Epoch: 0005, Batch: 004904, Sample Num: 78464, Cur Loss: 0.00012154, Cur Avg Loss: 0.00062325, Log Avg loss: 0.00060399, Global Avg Loss: 0.01931585, Time: 0.2232 Steps: 24800, Updated lr: 0.000090 ***** Running evaluation checkpoint-24870 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-24870 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1324.926834, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001481, "eval_total_loss": 1.578809, "eval_acc": 0.999646, "eval_jaccard": 0.985678, "eval_prec": 0.987412, "eval_recall": 0.98735, "eval_f1": 0.986899, "eval_pr_auc": 0.994897, "eval_roc_auc": 0.999418, "eval_fmax": 0.993474, "eval_pmax": 0.995809, "eval_rmax": 0.99115, "eval_tmax": 0.17, "update_flag": true, "test_avg_loss": 0.001586, "test_total_loss": 1.690353, "test_acc": 0.999657, "test_jaccard": 0.984473, "test_prec": 0.985711, "test_recall": 0.986115, "test_f1": 0.985538, "test_pr_auc": 0.994135, "test_roc_auc": 0.999139, "test_fmax": 0.993177, "test_pmax": 0.996557, "test_rmax": 0.98982, "test_tmax": 0.19, "lr": 9.036334275333064e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.019262928637092645, "train_cur_epoch_loss": 3.0924972720023334, "train_cur_epoch_avg_loss": 0.0006217324632091543, "train_cur_epoch_time": 1324.926834344864, "train_cur_epoch_avg_time": 0.26637049343483393, "epoch": 5, "step": 24870} ################################################## Training, Epoch: 0006, Batch: 000130, Sample Num: 2080, Cur Loss: 0.00003497, Cur Avg Loss: 0.00053313, Log Avg loss: 0.00052687, Global Avg Loss: 0.01916553, Time: 0.2211 Steps: 25000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000330, Sample Num: 5280, Cur Loss: 0.00032615, Cur Avg Loss: 0.00050650, Log Avg loss: 0.00048919, Global Avg Loss: 0.01901731, Time: 0.2734 Steps: 25200, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000530, Sample Num: 8480, Cur Loss: 0.00024152, Cur Avg Loss: 0.00059577, Log Avg loss: 0.00074306, Global Avg Loss: 0.01887342, Time: 0.2235 Steps: 25400, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000730, Sample Num: 11680, Cur Loss: 0.00009931, Cur Avg Loss: 0.00061181, Log Avg loss: 0.00065433, Global Avg Loss: 0.01873108, Time: 0.3896 Steps: 25600, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 000930, Sample Num: 14880, Cur Loss: 0.00005974, Cur Avg Loss: 0.00058296, Log Avg loss: 0.00047764, Global Avg Loss: 0.01858958, Time: 0.5385 Steps: 25800, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001130, Sample Num: 18080, Cur Loss: 0.00009683, Cur Avg Loss: 0.00056562, Log Avg loss: 0.00048500, Global Avg Loss: 0.01845031, Time: 0.1862 Steps: 26000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001330, Sample Num: 21280, Cur Loss: 0.00002771, Cur Avg Loss: 0.00053492, Log Avg loss: 0.00036147, Global Avg Loss: 0.01831223, Time: 0.3314 Steps: 26200, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001530, Sample Num: 24480, Cur Loss: 0.00012886, Cur Avg Loss: 0.00050392, Log Avg loss: 0.00029776, Global Avg Loss: 0.01817576, Time: 0.3253 Steps: 26400, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001730, Sample Num: 27680, Cur Loss: 0.00018173, Cur Avg Loss: 0.00048711, Log Avg loss: 0.00035849, Global Avg Loss: 0.01804179, Time: 0.2204 Steps: 26600, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 001930, Sample Num: 30880, Cur Loss: 0.00013967, Cur Avg Loss: 0.00047444, Log Avg loss: 0.00036488, Global Avg Loss: 0.01790988, Time: 0.2787 Steps: 26800, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 002130, Sample Num: 34080, Cur Loss: 0.00001862, Cur Avg Loss: 0.00048982, Log Avg loss: 0.00063827, Global Avg Loss: 0.01778194, Time: 0.3210 Steps: 27000, Updated lr: 0.000090 Training, Epoch: 0006, Batch: 002330, Sample Num: 37280, Cur Loss: 0.00003230, Cur Avg Loss: 0.00048498, Log Avg loss: 0.00043339, Global Avg Loss: 0.01765438, Time: 0.2168 Steps: 27200, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 002530, Sample Num: 40480, Cur Loss: 0.00002436, Cur Avg Loss: 0.00047326, Log Avg loss: 0.00033669, Global Avg Loss: 0.01752797, Time: 0.1839 Steps: 27400, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 002730, Sample Num: 43680, Cur Loss: 0.00591727, Cur Avg Loss: 0.00047796, Log Avg loss: 0.00053749, Global Avg Loss: 0.01740485, Time: 0.2224 Steps: 27600, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 002930, Sample Num: 46880, Cur Loss: 0.00142272, Cur Avg Loss: 0.00047162, Log Avg loss: 0.00038511, Global Avg Loss: 0.01728241, Time: 0.5011 Steps: 27800, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003130, Sample Num: 50080, Cur Loss: 0.00020299, Cur Avg Loss: 0.00046831, Log Avg loss: 0.00041980, Global Avg Loss: 0.01716196, Time: 0.2808 Steps: 28000, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003330, Sample Num: 53280, Cur Loss: 0.00007606, Cur Avg Loss: 0.00046472, Log Avg loss: 0.00040851, Global Avg Loss: 0.01704314, Time: 0.2562 Steps: 28200, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003530, Sample Num: 56480, Cur Loss: 0.00004431, Cur Avg Loss: 0.00046802, Log Avg loss: 0.00052296, Global Avg Loss: 0.01692680, Time: 0.2202 Steps: 28400, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003730, Sample Num: 59680, Cur Loss: 0.00013359, Cur Avg Loss: 0.00046250, Log Avg loss: 0.00036500, Global Avg Loss: 0.01681098, Time: 0.4251 Steps: 28600, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 003930, Sample Num: 62880, Cur Loss: 0.00099509, Cur Avg Loss: 0.00045587, Log Avg loss: 0.00033233, Global Avg Loss: 0.01669655, Time: 0.3268 Steps: 28800, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004130, Sample Num: 66080, Cur Loss: 0.00020576, Cur Avg Loss: 0.00046030, Log Avg loss: 0.00054733, Global Avg Loss: 0.01658518, Time: 0.1515 Steps: 29000, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004330, Sample Num: 69280, Cur Loss: 0.00001896, Cur Avg Loss: 0.00045639, Log Avg loss: 0.00037561, Global Avg Loss: 0.01647415, Time: 0.2163 Steps: 29200, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004530, Sample Num: 72480, Cur Loss: 0.00002294, Cur Avg Loss: 0.00045519, Log Avg loss: 0.00042923, Global Avg Loss: 0.01636500, Time: 0.2991 Steps: 29400, Updated lr: 0.000089 Training, Epoch: 0006, Batch: 004730, Sample Num: 75680, Cur Loss: 0.00003143, Cur Avg Loss: 0.00045249, Log Avg loss: 0.00039134, Global Avg Loss: 0.01625707, Time: 0.4494 Steps: 29600, Updated lr: 0.000088 Training, Epoch: 0006, Batch: 004930, Sample Num: 78880, Cur Loss: 0.00006468, Cur Avg Loss: 0.00045283, Log Avg loss: 0.00046095, Global Avg Loss: 0.01615106, Time: 0.4368 Steps: 29800, Updated lr: 0.000088 ***** Running evaluation checkpoint-29844 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-29844 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1281.364184, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001587, "eval_total_loss": 1.691915, "eval_acc": 0.999653, "eval_jaccard": 0.985467, "eval_prec": 0.98729, "eval_recall": 0.986965, "eval_f1": 0.986668, "eval_pr_auc": 0.99539, "eval_roc_auc": 0.999427, "eval_fmax": 0.993921, "eval_pmax": 0.996121, "eval_rmax": 0.99173, "eval_tmax": 0.11, "update_flag": false, "test_avg_loss": 0.001703, "test_total_loss": 1.815086, "test_acc": 0.999665, "test_jaccard": 0.98429, "test_prec": 0.985656, "test_recall": 0.985693, "test_f1": 0.985325, "test_pr_auc": 0.994193, "test_roc_auc": 0.999185, "test_fmax": 0.993334, "test_pmax": 0.996491, "test_rmax": 0.990197, "test_tmax": 0.11, "lr": 8.83552684699233e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.016127899455355723, "train_cur_epoch_loss": 2.2519961411422145, "train_cur_epoch_avg_loss": 0.0004527535466711328, "train_cur_epoch_time": 1281.364184141159, "train_cur_epoch_avg_time": 0.2576124214196138, "epoch": 6, "step": 29844} ################################################## Training, Epoch: 0007, Batch: 000156, Sample Num: 2496, Cur Loss: 0.00003107, Cur Avg Loss: 0.00034547, Log Avg loss: 0.00036707, Global Avg Loss: 0.01604583, Time: 0.2456 Steps: 30000, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000356, Sample Num: 5696, Cur Loss: 0.00022698, Cur Avg Loss: 0.00037109, Log Avg loss: 0.00039107, Global Avg Loss: 0.01594216, Time: 0.2206 Steps: 30200, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000556, Sample Num: 8896, Cur Loss: 0.00013704, Cur Avg Loss: 0.00045047, Log Avg loss: 0.00059177, Global Avg Loss: 0.01584117, Time: 0.2887 Steps: 30400, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000756, Sample Num: 12096, Cur Loss: 0.00003313, Cur Avg Loss: 0.00048900, Log Avg loss: 0.00059610, Global Avg Loss: 0.01574153, Time: 0.2494 Steps: 30600, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 000956, Sample Num: 15296, Cur Loss: 0.00008794, Cur Avg Loss: 0.00045543, Log Avg loss: 0.00032855, Global Avg Loss: 0.01564144, Time: 0.3949 Steps: 30800, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001156, Sample Num: 18496, Cur Loss: 0.00002140, Cur Avg Loss: 0.00044955, Log Avg loss: 0.00042145, Global Avg Loss: 0.01554325, Time: 0.4367 Steps: 31000, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001356, Sample Num: 21696, Cur Loss: 0.00006640, Cur Avg Loss: 0.00042426, Log Avg loss: 0.00027808, Global Avg Loss: 0.01544540, Time: 0.2185 Steps: 31200, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001556, Sample Num: 24896, Cur Loss: 0.00008295, Cur Avg Loss: 0.00039516, Log Avg loss: 0.00019786, Global Avg Loss: 0.01534828, Time: 0.4043 Steps: 31400, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001756, Sample Num: 28096, Cur Loss: 0.00006310, Cur Avg Loss: 0.00038444, Log Avg loss: 0.00030102, Global Avg Loss: 0.01525304, Time: 0.4296 Steps: 31600, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 001956, Sample Num: 31296, Cur Loss: 0.00017412, Cur Avg Loss: 0.00037003, Log Avg loss: 0.00024356, Global Avg Loss: 0.01515864, Time: 0.3145 Steps: 31800, Updated lr: 0.000088 Training, Epoch: 0007, Batch: 002156, Sample Num: 34496, Cur Loss: 0.00006641, Cur Avg Loss: 0.00038137, Log Avg loss: 0.00049223, Global Avg Loss: 0.01506698, Time: 0.3275 Steps: 32000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002356, Sample Num: 37696, Cur Loss: 0.00011725, Cur Avg Loss: 0.00038201, Log Avg loss: 0.00038894, Global Avg Loss: 0.01497581, Time: 0.2719 Steps: 32200, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002556, Sample Num: 40896, Cur Loss: 0.00001220, Cur Avg Loss: 0.00037085, Log Avg loss: 0.00023938, Global Avg Loss: 0.01488484, Time: 0.2178 Steps: 32400, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002756, Sample Num: 44096, Cur Loss: 0.00266923, Cur Avg Loss: 0.00038058, Log Avg loss: 0.00050497, Global Avg Loss: 0.01479662, Time: 0.4086 Steps: 32600, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 002956, Sample Num: 47296, Cur Loss: 0.00000936, Cur Avg Loss: 0.00037758, Log Avg loss: 0.00033620, Global Avg Loss: 0.01470845, Time: 0.2198 Steps: 32800, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003156, Sample Num: 50496, Cur Loss: 0.00396203, Cur Avg Loss: 0.00037434, Log Avg loss: 0.00032643, Global Avg Loss: 0.01462129, Time: 0.2229 Steps: 33000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003356, Sample Num: 53696, Cur Loss: 0.00004199, Cur Avg Loss: 0.00036866, Log Avg loss: 0.00027910, Global Avg Loss: 0.01453489, Time: 0.2804 Steps: 33200, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003556, Sample Num: 56896, Cur Loss: 0.00003706, Cur Avg Loss: 0.00036878, Log Avg loss: 0.00037078, Global Avg Loss: 0.01445007, Time: 0.2209 Steps: 33400, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003756, Sample Num: 60096, Cur Loss: 0.00607474, Cur Avg Loss: 0.00036658, Log Avg loss: 0.00032736, Global Avg Loss: 0.01436601, Time: 0.2201 Steps: 33600, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 003956, Sample Num: 63296, Cur Loss: 0.00005931, Cur Avg Loss: 0.00036473, Log Avg loss: 0.00033001, Global Avg Loss: 0.01428296, Time: 0.1188 Steps: 33800, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004156, Sample Num: 66496, Cur Loss: 0.00015752, Cur Avg Loss: 0.00036196, Log Avg loss: 0.00030712, Global Avg Loss: 0.01420074, Time: 0.3020 Steps: 34000, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004356, Sample Num: 69696, Cur Loss: 0.00001486, Cur Avg Loss: 0.00035776, Log Avg loss: 0.00027051, Global Avg Loss: 0.01411928, Time: 0.2778 Steps: 34200, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004556, Sample Num: 72896, Cur Loss: 0.00004601, Cur Avg Loss: 0.00036047, Log Avg loss: 0.00041959, Global Avg Loss: 0.01403963, Time: 0.2195 Steps: 34400, Updated lr: 0.000087 Training, Epoch: 0007, Batch: 004756, Sample Num: 76096, Cur Loss: 0.00003658, Cur Avg Loss: 0.00035491, Log Avg loss: 0.00022810, Global Avg Loss: 0.01395980, Time: 0.2146 Steps: 34600, Updated lr: 0.000086 Training, Epoch: 0007, Batch: 004956, Sample Num: 79296, Cur Loss: 0.00003465, Cur Avg Loss: 0.00035654, Log Avg loss: 0.00039538, Global Avg Loss: 0.01388184, Time: 0.2185 Steps: 34800, Updated lr: 0.000086 ***** Running evaluation checkpoint-34818 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-34818 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1289.286896, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001549, "eval_total_loss": 1.650765, "eval_acc": 0.999679, "eval_jaccard": 0.986562, "eval_prec": 0.987832, "eval_recall": 0.988133, "eval_f1": 0.987588, "eval_pr_auc": 0.995447, "eval_roc_auc": 0.999419, "eval_fmax": 0.993898, "eval_pmax": 0.996444, "eval_rmax": 0.991366, "eval_tmax": 0.18, "update_flag": true, "test_avg_loss": 0.001685, "test_total_loss": 1.796432, "test_acc": 0.999685, "test_jaccard": 0.985459, "test_prec": 0.986549, "test_recall": 0.987079, "test_f1": 0.986463, "test_pr_auc": 0.994349, "test_roc_auc": 0.999165, "test_fmax": 0.993354, "test_pmax": 0.996951, "test_rmax": 0.989783, "test_tmax": 0.2, "lr": 8.634719418651595e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.01387481044555292, "train_cur_epoch_loss": 1.7721187476253704, "train_cur_epoch_avg_loss": 0.0003562763867361018, "train_cur_epoch_time": 1289.2868962287903, "train_cur_epoch_avg_time": 0.2592052465277021, "epoch": 7, "step": 34818} ################################################## Training, Epoch: 0008, Batch: 000182, Sample Num: 2912, Cur Loss: 0.00027408, Cur Avg Loss: 0.00036217, Log Avg loss: 0.00035513, Global Avg Loss: 0.01380454, Time: 0.2191 Steps: 35000, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000382, Sample Num: 6112, Cur Loss: 0.00842953, Cur Avg Loss: 0.00032247, Log Avg loss: 0.00028635, Global Avg Loss: 0.01372774, Time: 0.2162 Steps: 35200, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000582, Sample Num: 9312, Cur Loss: 0.00009024, Cur Avg Loss: 0.00038041, Log Avg loss: 0.00049107, Global Avg Loss: 0.01365295, Time: 0.3252 Steps: 35400, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000782, Sample Num: 12512, Cur Loss: 0.00001843, Cur Avg Loss: 0.00042128, Log Avg loss: 0.00054020, Global Avg Loss: 0.01357929, Time: 0.1086 Steps: 35600, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 000982, Sample Num: 15712, Cur Loss: 0.00000980, Cur Avg Loss: 0.00037662, Log Avg loss: 0.00020202, Global Avg Loss: 0.01350455, Time: 0.2162 Steps: 35800, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001182, Sample Num: 18912, Cur Loss: 0.00001866, Cur Avg Loss: 0.00037589, Log Avg loss: 0.00037231, Global Avg Loss: 0.01343160, Time: 0.2964 Steps: 36000, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001382, Sample Num: 22112, Cur Loss: 0.00002656, Cur Avg Loss: 0.00034534, Log Avg loss: 0.00016476, Global Avg Loss: 0.01335830, Time: 0.2170 Steps: 36200, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001582, Sample Num: 25312, Cur Loss: 0.00001707, Cur Avg Loss: 0.00032425, Log Avg loss: 0.00017857, Global Avg Loss: 0.01328588, Time: 0.2118 Steps: 36400, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001782, Sample Num: 28512, Cur Loss: 0.00003914, Cur Avg Loss: 0.00031267, Log Avg loss: 0.00022104, Global Avg Loss: 0.01321449, Time: 0.2192 Steps: 36600, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 001982, Sample Num: 31712, Cur Loss: 0.00026191, Cur Avg Loss: 0.00030349, Log Avg loss: 0.00022172, Global Avg Loss: 0.01314388, Time: 0.2240 Steps: 36800, Updated lr: 0.000086 Training, Epoch: 0008, Batch: 002182, Sample Num: 34912, Cur Loss: 0.00000231, Cur Avg Loss: 0.00031621, Log Avg loss: 0.00044229, Global Avg Loss: 0.01307522, Time: 0.2185 Steps: 37000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002382, Sample Num: 38112, Cur Loss: 0.00015835, Cur Avg Loss: 0.00031390, Log Avg loss: 0.00028863, Global Avg Loss: 0.01300647, Time: 0.2456 Steps: 37200, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002582, Sample Num: 41312, Cur Loss: 0.00008182, Cur Avg Loss: 0.00030902, Log Avg loss: 0.00025087, Global Avg Loss: 0.01293826, Time: 0.3949 Steps: 37400, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002782, Sample Num: 44512, Cur Loss: 0.00001767, Cur Avg Loss: 0.00031720, Log Avg loss: 0.00042287, Global Avg Loss: 0.01287169, Time: 0.2241 Steps: 37600, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 002982, Sample Num: 47712, Cur Loss: 0.00001586, Cur Avg Loss: 0.00032030, Log Avg loss: 0.00036346, Global Avg Loss: 0.01280551, Time: 0.3183 Steps: 37800, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003182, Sample Num: 50912, Cur Loss: 0.00000825, Cur Avg Loss: 0.00031688, Log Avg loss: 0.00026579, Global Avg Loss: 0.01273951, Time: 0.2645 Steps: 38000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003382, Sample Num: 54112, Cur Loss: 0.00003418, Cur Avg Loss: 0.00031589, Log Avg loss: 0.00030012, Global Avg Loss: 0.01267438, Time: 0.2192 Steps: 38200, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003582, Sample Num: 57312, Cur Loss: 0.00093365, Cur Avg Loss: 0.00031760, Log Avg loss: 0.00034660, Global Avg Loss: 0.01261018, Time: 0.2693 Steps: 38400, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003782, Sample Num: 60512, Cur Loss: 0.00001107, Cur Avg Loss: 0.00031324, Log Avg loss: 0.00023506, Global Avg Loss: 0.01254606, Time: 0.2180 Steps: 38600, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 003982, Sample Num: 63712, Cur Loss: 0.00001295, Cur Avg Loss: 0.00031604, Log Avg loss: 0.00036903, Global Avg Loss: 0.01248329, Time: 0.2218 Steps: 38800, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 004182, Sample Num: 66912, Cur Loss: 0.00000223, Cur Avg Loss: 0.00031495, Log Avg loss: 0.00029317, Global Avg Loss: 0.01242078, Time: 0.1241 Steps: 39000, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 004382, Sample Num: 70112, Cur Loss: 0.00000612, Cur Avg Loss: 0.00031647, Log Avg loss: 0.00034829, Global Avg Loss: 0.01235918, Time: 0.2198 Steps: 39200, Updated lr: 0.000085 Training, Epoch: 0008, Batch: 004582, Sample Num: 73312, Cur Loss: 0.00002859, Cur Avg Loss: 0.00031617, Log Avg loss: 0.00030976, Global Avg Loss: 0.01229802, Time: 0.4403 Steps: 39400, Updated lr: 0.000084 Training, Epoch: 0008, Batch: 004782, Sample Num: 76512, Cur Loss: 0.00008559, Cur Avg Loss: 0.00031435, Log Avg loss: 0.00027246, Global Avg Loss: 0.01223728, Time: 0.3572 Steps: 39600, Updated lr: 0.000084 ***** Running evaluation checkpoint-39792 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-39792 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1323.716450, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001532, "eval_total_loss": 1.633179, "eval_acc": 0.999688, "eval_jaccard": 0.986892, "eval_prec": 0.988345, "eval_recall": 0.988302, "eval_f1": 0.987907, "eval_pr_auc": 0.995572, "eval_roc_auc": 0.999451, "eval_fmax": 0.994386, "eval_pmax": 0.997559, "eval_rmax": 0.991233, "eval_tmax": 0.22, "update_flag": true, "test_avg_loss": 0.001693, "test_total_loss": 1.804457, "test_acc": 0.999695, "test_jaccard": 0.985981, "test_prec": 0.987106, "test_recall": 0.987528, "test_f1": 0.986951, "test_pr_auc": 0.994686, "test_roc_auc": 0.999196, "test_fmax": 0.993483, "test_pmax": 0.996018, "test_rmax": 0.99096, "test_tmax": 0.1, "lr": 8.43391199031086e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.012180251564930812, "train_cur_epoch_loss": 1.5834201784652606, "train_cur_epoch_avg_loss": 0.0003183394005760476, "train_cur_epoch_time": 1323.7164504528046, "train_cur_epoch_avg_time": 0.2661271512772024, "epoch": 8, "step": 39792} ################################################## Training, Epoch: 0009, Batch: 000008, Sample Num: 128, Cur Loss: 0.00002169, Cur Avg Loss: 0.00006878, Log Avg loss: 0.00040383, Global Avg Loss: 0.01217782, Time: 0.1021 Steps: 39800, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000208, Sample Num: 3328, Cur Loss: 0.00007568, Cur Avg Loss: 0.00028526, Log Avg loss: 0.00029392, Global Avg Loss: 0.01211840, Time: 0.3938 Steps: 40000, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000408, Sample Num: 6528, Cur Loss: 0.00004443, Cur Avg Loss: 0.00027500, Log Avg loss: 0.00026432, Global Avg Loss: 0.01205942, Time: 0.2181 Steps: 40200, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000608, Sample Num: 9728, Cur Loss: 0.00002133, Cur Avg Loss: 0.00033862, Log Avg loss: 0.00046842, Global Avg Loss: 0.01200204, Time: 0.3348 Steps: 40400, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 000808, Sample Num: 12928, Cur Loss: 0.00002851, Cur Avg Loss: 0.00037069, Log Avg loss: 0.00046816, Global Avg Loss: 0.01194522, Time: 0.0886 Steps: 40600, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001008, Sample Num: 16128, Cur Loss: 0.00001934, Cur Avg Loss: 0.00033437, Log Avg loss: 0.00018767, Global Avg Loss: 0.01188759, Time: 0.2189 Steps: 40800, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001208, Sample Num: 19328, Cur Loss: 0.00002726, Cur Avg Loss: 0.00032489, Log Avg loss: 0.00027712, Global Avg Loss: 0.01183095, Time: 0.2733 Steps: 41000, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001408, Sample Num: 22528, Cur Loss: 0.00001920, Cur Avg Loss: 0.00030002, Log Avg loss: 0.00014980, Global Avg Loss: 0.01177425, Time: 0.2658 Steps: 41200, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001608, Sample Num: 25728, Cur Loss: 0.00001770, Cur Avg Loss: 0.00028514, Log Avg loss: 0.00018034, Global Avg Loss: 0.01171824, Time: 0.2183 Steps: 41400, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 001808, Sample Num: 28928, Cur Loss: 0.00001176, Cur Avg Loss: 0.00028075, Log Avg loss: 0.00024550, Global Avg Loss: 0.01166308, Time: 0.2085 Steps: 41600, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 002008, Sample Num: 32128, Cur Loss: 0.00001648, Cur Avg Loss: 0.00027090, Log Avg loss: 0.00018179, Global Avg Loss: 0.01160815, Time: 0.1472 Steps: 41800, Updated lr: 0.000084 Training, Epoch: 0009, Batch: 002208, Sample Num: 35328, Cur Loss: 0.00000194, Cur Avg Loss: 0.00029345, Log Avg loss: 0.00051984, Global Avg Loss: 0.01155535, Time: 0.2584 Steps: 42000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 002408, Sample Num: 38528, Cur Loss: 0.00003804, Cur Avg Loss: 0.00028781, Log Avg loss: 0.00022554, Global Avg Loss: 0.01150165, Time: 0.2561 Steps: 42200, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 002608, Sample Num: 41728, Cur Loss: 0.00001300, Cur Avg Loss: 0.00028102, Log Avg loss: 0.00019937, Global Avg Loss: 0.01144834, Time: 0.2518 Steps: 42400, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 002808, Sample Num: 44928, Cur Loss: 0.00001302, Cur Avg Loss: 0.00028499, Log Avg loss: 0.00033667, Global Avg Loss: 0.01139617, Time: 0.2105 Steps: 42600, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003008, Sample Num: 48128, Cur Loss: 0.00010067, Cur Avg Loss: 0.00029108, Log Avg loss: 0.00037668, Global Avg Loss: 0.01134468, Time: 0.1557 Steps: 42800, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003208, Sample Num: 51328, Cur Loss: 0.00001490, Cur Avg Loss: 0.00028615, Log Avg loss: 0.00021202, Global Avg Loss: 0.01129290, Time: 0.2161 Steps: 43000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003408, Sample Num: 54528, Cur Loss: 0.00002645, Cur Avg Loss: 0.00028450, Log Avg loss: 0.00025792, Global Avg Loss: 0.01124181, Time: 0.2145 Steps: 43200, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003608, Sample Num: 57728, Cur Loss: 0.00000716, Cur Avg Loss: 0.00028803, Log Avg loss: 0.00034816, Global Avg Loss: 0.01119161, Time: 0.2202 Steps: 43400, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 003808, Sample Num: 60928, Cur Loss: 0.00002292, Cur Avg Loss: 0.00028489, Log Avg loss: 0.00022832, Global Avg Loss: 0.01114132, Time: 0.2193 Steps: 43600, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004008, Sample Num: 64128, Cur Loss: 0.00393393, Cur Avg Loss: 0.00028577, Log Avg loss: 0.00030260, Global Avg Loss: 0.01109183, Time: 0.2261 Steps: 43800, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004208, Sample Num: 67328, Cur Loss: 0.00003363, Cur Avg Loss: 0.00028109, Log Avg loss: 0.00018714, Global Avg Loss: 0.01104226, Time: 0.3131 Steps: 44000, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004408, Sample Num: 70528, Cur Loss: 0.00004445, Cur Avg Loss: 0.00027919, Log Avg loss: 0.00023929, Global Avg Loss: 0.01099338, Time: 0.2717 Steps: 44200, Updated lr: 0.000083 Training, Epoch: 0009, Batch: 004608, Sample Num: 73728, Cur Loss: 0.00001254, Cur Avg Loss: 0.00027959, Log Avg loss: 0.00028843, Global Avg Loss: 0.01094516, Time: 0.2205 Steps: 44400, Updated lr: 0.000082 Training, Epoch: 0009, Batch: 004808, Sample Num: 76928, Cur Loss: 0.00000555, Cur Avg Loss: 0.00027950, Log Avg loss: 0.00027731, Global Avg Loss: 0.01089732, Time: 0.0874 Steps: 44600, Updated lr: 0.000082 ***** Running evaluation checkpoint-44766 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-44766 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1258.075704, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001567, "eval_total_loss": 1.670054, "eval_acc": 0.999683, "eval_jaccard": 0.986687, "eval_prec": 0.988012, "eval_recall": 0.988313, "eval_f1": 0.987724, "eval_pr_auc": 0.99566, "eval_roc_auc": 0.999437, "eval_fmax": 0.994497, "eval_pmax": 0.996609, "eval_rmax": 0.992394, "eval_tmax": 0.11, "update_flag": false, "test_avg_loss": 0.001806, "test_total_loss": 1.92569, "test_acc": 0.999687, "test_jaccard": 0.985888, "test_prec": 0.98695, "test_recall": 0.987478, "test_f1": 0.986843, "test_pr_auc": 0.994623, "test_roc_auc": 0.999189, "test_fmax": 0.993805, "test_pmax": 0.996741, "test_rmax": 0.990886, "test_tmax": 0.11, "lr": 8.233104561970125e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.010858034454638074, "train_cur_epoch_loss": 1.3942001246010136, "train_cur_epoch_avg_loss": 0.00028029757229614265, "train_cur_epoch_time": 1258.0757036209106, "train_cur_epoch_avg_time": 0.25293037869338775, "epoch": 9, "step": 44766} ################################################## Training, Epoch: 0010, Batch: 000034, Sample Num: 544, Cur Loss: 0.00000943, Cur Avg Loss: 0.00007573, Log Avg loss: 0.00026479, Global Avg Loss: 0.01084985, Time: 0.0906 Steps: 44800, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000234, Sample Num: 3744, Cur Loss: 0.00007425, Cur Avg Loss: 0.00018314, Log Avg loss: 0.00020140, Global Avg Loss: 0.01080253, Time: 0.3160 Steps: 45000, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000434, Sample Num: 6944, Cur Loss: 0.00001518, Cur Avg Loss: 0.00022528, Log Avg loss: 0.00027459, Global Avg Loss: 0.01075594, Time: 0.2925 Steps: 45200, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000634, Sample Num: 10144, Cur Loss: 0.00002895, Cur Avg Loss: 0.00025749, Log Avg loss: 0.00032737, Global Avg Loss: 0.01071000, Time: 0.2192 Steps: 45400, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 000834, Sample Num: 13344, Cur Loss: 0.00001823, Cur Avg Loss: 0.00028031, Log Avg loss: 0.00035267, Global Avg Loss: 0.01066457, Time: 0.2207 Steps: 45600, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001034, Sample Num: 16544, Cur Loss: 0.00007957, Cur Avg Loss: 0.00025875, Log Avg loss: 0.00016884, Global Avg Loss: 0.01061874, Time: 0.2704 Steps: 45800, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001234, Sample Num: 19744, Cur Loss: 0.00002332, Cur Avg Loss: 0.00025154, Log Avg loss: 0.00021426, Global Avg Loss: 0.01057350, Time: 0.4898 Steps: 46000, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001434, Sample Num: 22944, Cur Loss: 0.00000693, Cur Avg Loss: 0.00023839, Log Avg loss: 0.00015728, Global Avg Loss: 0.01052841, Time: 0.4356 Steps: 46200, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001634, Sample Num: 26144, Cur Loss: 0.00000705, Cur Avg Loss: 0.00023377, Log Avg loss: 0.00020064, Global Avg Loss: 0.01048390, Time: 0.2158 Steps: 46400, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 001834, Sample Num: 29344, Cur Loss: 0.00001686, Cur Avg Loss: 0.00022747, Log Avg loss: 0.00017599, Global Avg Loss: 0.01043966, Time: 0.2195 Steps: 46600, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 002034, Sample Num: 32544, Cur Loss: 0.00612012, Cur Avg Loss: 0.00022705, Log Avg loss: 0.00022318, Global Avg Loss: 0.01039600, Time: 0.2492 Steps: 46800, Updated lr: 0.000082 Training, Epoch: 0010, Batch: 002234, Sample Num: 35744, Cur Loss: 0.00004078, Cur Avg Loss: 0.00023774, Log Avg loss: 0.00034650, Global Avg Loss: 0.01035323, Time: 0.2446 Steps: 47000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 002434, Sample Num: 38944, Cur Loss: 0.00003090, Cur Avg Loss: 0.00024097, Log Avg loss: 0.00027703, Global Avg Loss: 0.01031054, Time: 0.2164 Steps: 47200, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 002634, Sample Num: 42144, Cur Loss: 0.00000344, Cur Avg Loss: 0.00024383, Log Avg loss: 0.00027869, Global Avg Loss: 0.01026821, Time: 0.2997 Steps: 47400, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 002834, Sample Num: 45344, Cur Loss: 0.00000817, Cur Avg Loss: 0.00024584, Log Avg loss: 0.00027230, Global Avg Loss: 0.01022621, Time: 0.2168 Steps: 47600, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003034, Sample Num: 48544, Cur Loss: 0.00408453, Cur Avg Loss: 0.00025522, Log Avg loss: 0.00038801, Global Avg Loss: 0.01018504, Time: 0.2175 Steps: 47800, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003234, Sample Num: 51744, Cur Loss: 0.00002953, Cur Avg Loss: 0.00024958, Log Avg loss: 0.00016417, Global Avg Loss: 0.01014329, Time: 0.2223 Steps: 48000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003434, Sample Num: 54944, Cur Loss: 0.00001544, Cur Avg Loss: 0.00025098, Log Avg loss: 0.00027360, Global Avg Loss: 0.01010234, Time: 0.2721 Steps: 48200, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003634, Sample Num: 58144, Cur Loss: 0.00003032, Cur Avg Loss: 0.00025243, Log Avg loss: 0.00027735, Global Avg Loss: 0.01006174, Time: 0.3153 Steps: 48400, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 003834, Sample Num: 61344, Cur Loss: 0.00000668, Cur Avg Loss: 0.00024639, Log Avg loss: 0.00013661, Global Avg Loss: 0.01002089, Time: 0.2186 Steps: 48600, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004034, Sample Num: 64544, Cur Loss: 0.00000553, Cur Avg Loss: 0.00025190, Log Avg loss: 0.00035751, Global Avg Loss: 0.00998129, Time: 0.2182 Steps: 48800, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004234, Sample Num: 67744, Cur Loss: 0.00008684, Cur Avg Loss: 0.00025288, Log Avg loss: 0.00027253, Global Avg Loss: 0.00994166, Time: 0.2234 Steps: 49000, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004434, Sample Num: 70944, Cur Loss: 0.00015816, Cur Avg Loss: 0.00024727, Log Avg loss: 0.00012851, Global Avg Loss: 0.00990177, Time: 0.2179 Steps: 49200, Updated lr: 0.000081 Training, Epoch: 0010, Batch: 004634, Sample Num: 74144, Cur Loss: 0.00001265, Cur Avg Loss: 0.00025037, Log Avg loss: 0.00031921, Global Avg Loss: 0.00986298, Time: 0.2184 Steps: 49400, Updated lr: 0.000080 Training, Epoch: 0010, Batch: 004834, Sample Num: 77344, Cur Loss: 0.00000497, Cur Avg Loss: 0.00025029, Log Avg loss: 0.00024845, Global Avg Loss: 0.00982421, Time: 0.2981 Steps: 49600, Updated lr: 0.000080 ***** Running evaluation checkpoint-49740 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-49740 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1260.408430, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001561, "eval_total_loss": 1.663643, "eval_acc": 0.999694, "eval_jaccard": 0.987227, "eval_prec": 0.988719, "eval_recall": 0.988703, "eval_f1": 0.988281, "eval_pr_auc": 0.995632, "eval_roc_auc": 0.999404, "eval_fmax": 0.994474, "eval_pmax": 0.996279, "eval_rmax": 0.992676, "eval_tmax": 0.07, "update_flag": true, "test_avg_loss": 0.001801, "test_total_loss": 1.920198, "test_acc": 0.999693, "test_jaccard": 0.986079, "test_prec": 0.987103, "test_recall": 0.987574, "test_f1": 0.986981, "test_pr_auc": 0.994478, "test_roc_auc": 0.999116, "test_fmax": 0.993952, "test_pmax": 0.996799, "test_rmax": 0.991121, "test_tmax": 0.09, "lr": 8.03229713362939e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.009797730916086338, "train_cur_epoch_loss": 1.268365369806439, "train_cur_epoch_avg_loss": 0.0002549990691207155, "train_cur_epoch_time": 1260.4084296226501, "train_cur_epoch_avg_time": 0.25339936261010254, "epoch": 10, "step": 49740} ################################################## Training, Epoch: 0011, Batch: 000060, Sample Num: 960, Cur Loss: 0.00001438, Cur Avg Loss: 0.00024154, Log Avg loss: 0.00036473, Global Avg Loss: 0.00978622, Time: 0.3636 Steps: 49800, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000260, Sample Num: 4160, Cur Loss: 0.00004024, Cur Avg Loss: 0.00025871, Log Avg loss: 0.00026386, Global Avg Loss: 0.00974813, Time: 0.2301 Steps: 50000, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000460, Sample Num: 7360, Cur Loss: 0.00003468, Cur Avg Loss: 0.00029135, Log Avg loss: 0.00033378, Global Avg Loss: 0.00971062, Time: 0.0860 Steps: 50200, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000660, Sample Num: 10560, Cur Loss: 0.00045046, Cur Avg Loss: 0.00028341, Log Avg loss: 0.00026515, Global Avg Loss: 0.00967314, Time: 0.2279 Steps: 50400, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 000860, Sample Num: 13760, Cur Loss: 0.00001512, Cur Avg Loss: 0.00028879, Log Avg loss: 0.00030655, Global Avg Loss: 0.00963612, Time: 0.3285 Steps: 50600, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001060, Sample Num: 16960, Cur Loss: 0.00004188, Cur Avg Loss: 0.00027009, Log Avg loss: 0.00018967, Global Avg Loss: 0.00959893, Time: 0.2214 Steps: 50800, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001260, Sample Num: 20160, Cur Loss: 0.00000360, Cur Avg Loss: 0.00025484, Log Avg loss: 0.00017401, Global Avg Loss: 0.00956197, Time: 0.4787 Steps: 51000, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001460, Sample Num: 23360, Cur Loss: 0.00000386, Cur Avg Loss: 0.00023841, Log Avg loss: 0.00013492, Global Avg Loss: 0.00952514, Time: 0.2723 Steps: 51200, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001660, Sample Num: 26560, Cur Loss: 0.00001554, Cur Avg Loss: 0.00022490, Log Avg loss: 0.00012628, Global Avg Loss: 0.00948857, Time: 0.2583 Steps: 51400, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 001860, Sample Num: 29760, Cur Loss: 0.00000960, Cur Avg Loss: 0.00022152, Log Avg loss: 0.00019345, Global Avg Loss: 0.00945254, Time: 0.2169 Steps: 51600, Updated lr: 0.000080 Training, Epoch: 0011, Batch: 002060, Sample Num: 32960, Cur Loss: 0.00005029, Cur Avg Loss: 0.00022298, Log Avg loss: 0.00023660, Global Avg Loss: 0.00941696, Time: 0.3157 Steps: 51800, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002260, Sample Num: 36160, Cur Loss: 0.00579973, Cur Avg Loss: 0.00023027, Log Avg loss: 0.00030527, Global Avg Loss: 0.00938191, Time: 0.4366 Steps: 52000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002460, Sample Num: 39360, Cur Loss: 0.00001276, Cur Avg Loss: 0.00022732, Log Avg loss: 0.00019400, Global Avg Loss: 0.00934671, Time: 0.3127 Steps: 52200, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002660, Sample Num: 42560, Cur Loss: 0.00000123, Cur Avg Loss: 0.00022662, Log Avg loss: 0.00021801, Global Avg Loss: 0.00931187, Time: 0.2477 Steps: 52400, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 002860, Sample Num: 45760, Cur Loss: 0.00016726, Cur Avg Loss: 0.00022983, Log Avg loss: 0.00027255, Global Avg Loss: 0.00927750, Time: 0.3192 Steps: 52600, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003060, Sample Num: 48960, Cur Loss: 0.00015515, Cur Avg Loss: 0.00023746, Log Avg loss: 0.00034650, Global Avg Loss: 0.00924367, Time: 0.2711 Steps: 52800, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003260, Sample Num: 52160, Cur Loss: 0.00000137, Cur Avg Loss: 0.00022946, Log Avg loss: 0.00010705, Global Avg Loss: 0.00920919, Time: 0.4236 Steps: 53000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003460, Sample Num: 55360, Cur Loss: 0.00000203, Cur Avg Loss: 0.00022949, Log Avg loss: 0.00023002, Global Avg Loss: 0.00917544, Time: 0.2903 Steps: 53200, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003660, Sample Num: 58560, Cur Loss: 0.00000259, Cur Avg Loss: 0.00023265, Log Avg loss: 0.00028738, Global Avg Loss: 0.00914215, Time: 0.3158 Steps: 53400, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 003860, Sample Num: 61760, Cur Loss: 0.00002204, Cur Avg Loss: 0.00022602, Log Avg loss: 0.00010458, Global Avg Loss: 0.00910842, Time: 0.2199 Steps: 53600, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004060, Sample Num: 64960, Cur Loss: 0.00000307, Cur Avg Loss: 0.00022815, Log Avg loss: 0.00026935, Global Avg Loss: 0.00907557, Time: 0.4302 Steps: 53800, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004260, Sample Num: 68160, Cur Loss: 0.00001438, Cur Avg Loss: 0.00022363, Log Avg loss: 0.00013185, Global Avg Loss: 0.00904244, Time: 0.5077 Steps: 54000, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004460, Sample Num: 71360, Cur Loss: 0.00025728, Cur Avg Loss: 0.00022183, Log Avg loss: 0.00018344, Global Avg Loss: 0.00900975, Time: 0.2432 Steps: 54200, Updated lr: 0.000079 Training, Epoch: 0011, Batch: 004660, Sample Num: 74560, Cur Loss: 0.00000131, Cur Avg Loss: 0.00022241, Log Avg loss: 0.00023534, Global Avg Loss: 0.00897749, Time: 0.2195 Steps: 54400, Updated lr: 0.000078 Training, Epoch: 0011, Batch: 004860, Sample Num: 77760, Cur Loss: 0.00000862, Cur Avg Loss: 0.00022334, Log Avg loss: 0.00024497, Global Avg Loss: 0.00894550, Time: 0.2186 Steps: 54600, Updated lr: 0.000078 ***** Running evaluation checkpoint-54714 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-54714 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1294.933127, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001667, "eval_total_loss": 1.776498, "eval_acc": 0.999677, "eval_jaccard": 0.986691, "eval_prec": 0.988032, "eval_recall": 0.988328, "eval_f1": 0.987725, "eval_pr_auc": 0.995446, "eval_roc_auc": 0.999435, "eval_fmax": 0.994131, "eval_pmax": 0.996742, "eval_rmax": 0.991534, "eval_tmax": 0.17, "update_flag": false, "test_avg_loss": 0.001869, "test_total_loss": 1.992274, "test_acc": 0.999694, "test_jaccard": 0.986214, "test_prec": 0.98708, "test_recall": 0.987842, "test_f1": 0.987101, "test_pr_auc": 0.994647, "test_roc_auc": 0.999157, "test_fmax": 0.993587, "test_pmax": 0.997183, "test_rmax": 0.990016, "test_tmax": 0.22, "lr": 7.831489705288656e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.008927568747540145, "train_cur_epoch_loss": 1.1238606867770997, "train_cur_epoch_avg_loss": 0.00022594706207822672, "train_cur_epoch_time": 1294.9331266880035, "train_cur_epoch_avg_time": 0.26034039539364767, "epoch": 11, "step": 54714} ################################################## Training, Epoch: 0012, Batch: 000086, Sample Num: 1376, Cur Loss: 0.00008102, Cur Avg Loss: 0.00028375, Log Avg loss: 0.00031426, Global Avg Loss: 0.00891400, Time: 0.2198 Steps: 54800, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000286, Sample Num: 4576, Cur Loss: 0.00003136, Cur Avg Loss: 0.00020757, Log Avg loss: 0.00017481, Global Avg Loss: 0.00888222, Time: 0.2165 Steps: 55000, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000486, Sample Num: 7776, Cur Loss: 0.00003322, Cur Avg Loss: 0.00026194, Log Avg loss: 0.00033970, Global Avg Loss: 0.00885127, Time: 0.2903 Steps: 55200, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000686, Sample Num: 10976, Cur Loss: 0.00000064, Cur Avg Loss: 0.00026021, Log Avg loss: 0.00025599, Global Avg Loss: 0.00882024, Time: 0.3332 Steps: 55400, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 000886, Sample Num: 14176, Cur Loss: 0.00000854, Cur Avg Loss: 0.00025733, Log Avg loss: 0.00024746, Global Avg Loss: 0.00878941, Time: 0.2484 Steps: 55600, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001086, Sample Num: 17376, Cur Loss: 0.00000125, Cur Avg Loss: 0.00024188, Log Avg loss: 0.00017342, Global Avg Loss: 0.00875852, Time: 0.3724 Steps: 55800, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001286, Sample Num: 20576, Cur Loss: 0.00000796, Cur Avg Loss: 0.00021891, Log Avg loss: 0.00009419, Global Avg Loss: 0.00872758, Time: 0.2228 Steps: 56000, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001486, Sample Num: 23776, Cur Loss: 0.00001225, Cur Avg Loss: 0.00020415, Log Avg loss: 0.00010922, Global Avg Loss: 0.00869691, Time: 0.2206 Steps: 56200, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001686, Sample Num: 26976, Cur Loss: 0.00018562, Cur Avg Loss: 0.00019483, Log Avg loss: 0.00012560, Global Avg Loss: 0.00866652, Time: 0.2096 Steps: 56400, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 001886, Sample Num: 30176, Cur Loss: 0.00000520, Cur Avg Loss: 0.00019062, Log Avg loss: 0.00015514, Global Avg Loss: 0.00863644, Time: 0.2730 Steps: 56600, Updated lr: 0.000078 Training, Epoch: 0012, Batch: 002086, Sample Num: 33376, Cur Loss: 0.00001297, Cur Avg Loss: 0.00019765, Log Avg loss: 0.00026399, Global Avg Loss: 0.00860696, Time: 0.3948 Steps: 56800, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002286, Sample Num: 36576, Cur Loss: 0.00005702, Cur Avg Loss: 0.00020209, Log Avg loss: 0.00024835, Global Avg Loss: 0.00857763, Time: 0.3247 Steps: 57000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002486, Sample Num: 39776, Cur Loss: 0.00000312, Cur Avg Loss: 0.00019739, Log Avg loss: 0.00014364, Global Avg Loss: 0.00854814, Time: 0.2160 Steps: 57200, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002686, Sample Num: 42976, Cur Loss: 0.00004610, Cur Avg Loss: 0.00020007, Log Avg loss: 0.00023342, Global Avg Loss: 0.00851917, Time: 0.2183 Steps: 57400, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 002886, Sample Num: 46176, Cur Loss: 0.00000937, Cur Avg Loss: 0.00020422, Log Avg loss: 0.00025996, Global Avg Loss: 0.00849049, Time: 0.3149 Steps: 57600, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003086, Sample Num: 49376, Cur Loss: 0.00004108, Cur Avg Loss: 0.00020575, Log Avg loss: 0.00022778, Global Avg Loss: 0.00846190, Time: 0.2165 Steps: 57800, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003286, Sample Num: 52576, Cur Loss: 0.00001920, Cur Avg Loss: 0.00020077, Log Avg loss: 0.00012390, Global Avg Loss: 0.00843315, Time: 0.3402 Steps: 58000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003486, Sample Num: 55776, Cur Loss: 0.00000064, Cur Avg Loss: 0.00019995, Log Avg loss: 0.00018648, Global Avg Loss: 0.00840481, Time: 0.2205 Steps: 58200, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003686, Sample Num: 58976, Cur Loss: 0.00585655, Cur Avg Loss: 0.00020507, Log Avg loss: 0.00029429, Global Avg Loss: 0.00837704, Time: 0.2600 Steps: 58400, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 003886, Sample Num: 62176, Cur Loss: 0.00001989, Cur Avg Loss: 0.00019713, Log Avg loss: 0.00005095, Global Avg Loss: 0.00834862, Time: 0.2183 Steps: 58600, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004086, Sample Num: 65376, Cur Loss: 0.00002745, Cur Avg Loss: 0.00020135, Log Avg loss: 0.00028322, Global Avg Loss: 0.00832119, Time: 0.2164 Steps: 58800, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004286, Sample Num: 68576, Cur Loss: 0.00001102, Cur Avg Loss: 0.00019844, Log Avg loss: 0.00013903, Global Avg Loss: 0.00829345, Time: 0.2215 Steps: 59000, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004486, Sample Num: 71776, Cur Loss: 0.00025380, Cur Avg Loss: 0.00019972, Log Avg loss: 0.00022726, Global Avg Loss: 0.00826620, Time: 0.3665 Steps: 59200, Updated lr: 0.000077 Training, Epoch: 0012, Batch: 004686, Sample Num: 74976, Cur Loss: 0.00000066, Cur Avg Loss: 0.00019981, Log Avg loss: 0.00020176, Global Avg Loss: 0.00823905, Time: 0.3247 Steps: 59400, Updated lr: 0.000076 Training, Epoch: 0012, Batch: 004886, Sample Num: 78176, Cur Loss: 0.00461765, Cur Avg Loss: 0.00020336, Log Avg loss: 0.00028662, Global Avg Loss: 0.00821236, Time: 0.2432 Steps: 59600, Updated lr: 0.000076 ***** Running evaluation checkpoint-59688 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-59688 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1311.959716, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001635, "eval_total_loss": 1.74274, "eval_acc": 0.999688, "eval_jaccard": 0.987146, "eval_prec": 0.98847, "eval_recall": 0.98867, "eval_f1": 0.988151, "eval_pr_auc": 0.99578, "eval_roc_auc": 0.999421, "eval_fmax": 0.994407, "eval_pmax": 0.997219, "eval_rmax": 0.991611, "eval_tmax": 0.14, "update_flag": false, "test_avg_loss": 0.001929, "test_total_loss": 2.055896, "test_acc": 0.999696, "test_jaccard": 0.986035, "test_prec": 0.986904, "test_recall": 0.987671, "test_f1": 0.986927, "test_pr_auc": 0.994647, "test_roc_auc": 0.999146, "test_fmax": 0.993889, "test_pmax": 0.996268, "test_rmax": 0.991522, "test_tmax": 0.07, "lr": 7.630682276947921e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.008200725233424449, "train_cur_epoch_loss": 1.021891279726838, "train_cur_epoch_avg_loss": 0.00020544657815175675, "train_cur_epoch_time": 1311.959715604782, "train_cur_epoch_avg_time": 0.26376351339058746, "epoch": 12, "step": 59688} ################################################## Training, Epoch: 0013, Batch: 000112, Sample Num: 1792, Cur Loss: 0.00010194, Cur Avg Loss: 0.00018477, Log Avg loss: 0.00024474, Global Avg Loss: 0.00818571, Time: 0.2188 Steps: 59800, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000312, Sample Num: 4992, Cur Loss: 0.00001479, Cur Avg Loss: 0.00016880, Log Avg loss: 0.00015986, Global Avg Loss: 0.00815896, Time: 0.2248 Steps: 60000, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000512, Sample Num: 8192, Cur Loss: 0.00000142, Cur Avg Loss: 0.00022416, Log Avg loss: 0.00031052, Global Avg Loss: 0.00813288, Time: 0.0890 Steps: 60200, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000712, Sample Num: 11392, Cur Loss: 0.00000664, Cur Avg Loss: 0.00026104, Log Avg loss: 0.00035544, Global Avg Loss: 0.00810713, Time: 0.1760 Steps: 60400, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 000912, Sample Num: 14592, Cur Loss: 0.00002273, Cur Avg Loss: 0.00025325, Log Avg loss: 0.00022552, Global Avg Loss: 0.00808112, Time: 0.2153 Steps: 60600, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001112, Sample Num: 17792, Cur Loss: 0.00000804, Cur Avg Loss: 0.00023111, Log Avg loss: 0.00013019, Global Avg Loss: 0.00805497, Time: 0.0855 Steps: 60800, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001312, Sample Num: 20992, Cur Loss: 0.00000531, Cur Avg Loss: 0.00021268, Log Avg loss: 0.00011017, Global Avg Loss: 0.00802892, Time: 0.2792 Steps: 61000, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001512, Sample Num: 24192, Cur Loss: 0.00002141, Cur Avg Loss: 0.00019878, Log Avg loss: 0.00010764, Global Avg Loss: 0.00800303, Time: 0.2754 Steps: 61200, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001712, Sample Num: 27392, Cur Loss: 0.00000790, Cur Avg Loss: 0.00019938, Log Avg loss: 0.00020385, Global Avg Loss: 0.00797763, Time: 0.2243 Steps: 61400, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 001912, Sample Num: 30592, Cur Loss: 0.00003152, Cur Avg Loss: 0.00018966, Log Avg loss: 0.00010645, Global Avg Loss: 0.00795207, Time: 0.2208 Steps: 61600, Updated lr: 0.000076 Training, Epoch: 0013, Batch: 002112, Sample Num: 33792, Cur Loss: 0.00185414, Cur Avg Loss: 0.00019293, Log Avg loss: 0.00022426, Global Avg Loss: 0.00792706, Time: 0.4214 Steps: 61800, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002312, Sample Num: 36992, Cur Loss: 0.00000431, Cur Avg Loss: 0.00020892, Log Avg loss: 0.00037776, Global Avg Loss: 0.00790271, Time: 0.2147 Steps: 62000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002512, Sample Num: 40192, Cur Loss: 0.00000654, Cur Avg Loss: 0.00020090, Log Avg loss: 0.00010816, Global Avg Loss: 0.00787765, Time: 0.3169 Steps: 62200, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002712, Sample Num: 43392, Cur Loss: 0.00000759, Cur Avg Loss: 0.00020097, Log Avg loss: 0.00020180, Global Avg Loss: 0.00785304, Time: 0.2187 Steps: 62400, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 002912, Sample Num: 46592, Cur Loss: 0.00000027, Cur Avg Loss: 0.00020825, Log Avg loss: 0.00030700, Global Avg Loss: 0.00782893, Time: 0.2149 Steps: 62600, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003112, Sample Num: 49792, Cur Loss: 0.00000853, Cur Avg Loss: 0.00021013, Log Avg loss: 0.00023746, Global Avg Loss: 0.00780476, Time: 0.2187 Steps: 62800, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003312, Sample Num: 52992, Cur Loss: 0.00012917, Cur Avg Loss: 0.00020738, Log Avg loss: 0.00016458, Global Avg Loss: 0.00778050, Time: 0.2757 Steps: 63000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003512, Sample Num: 56192, Cur Loss: 0.00056718, Cur Avg Loss: 0.00020764, Log Avg loss: 0.00021199, Global Avg Loss: 0.00775655, Time: 0.2986 Steps: 63200, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003712, Sample Num: 59392, Cur Loss: 0.00000201, Cur Avg Loss: 0.00020526, Log Avg loss: 0.00016358, Global Avg Loss: 0.00773260, Time: 0.2493 Steps: 63400, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 003912, Sample Num: 62592, Cur Loss: 0.00000130, Cur Avg Loss: 0.00019987, Log Avg loss: 0.00009971, Global Avg Loss: 0.00770860, Time: 0.2606 Steps: 63600, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 004112, Sample Num: 65792, Cur Loss: 0.00002165, Cur Avg Loss: 0.00020094, Log Avg loss: 0.00022191, Global Avg Loss: 0.00768513, Time: 0.1692 Steps: 63800, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 004312, Sample Num: 68992, Cur Loss: 0.00000309, Cur Avg Loss: 0.00019909, Log Avg loss: 0.00016096, Global Avg Loss: 0.00766161, Time: 0.2599 Steps: 64000, Updated lr: 0.000075 Training, Epoch: 0013, Batch: 004512, Sample Num: 72192, Cur Loss: 0.00000245, Cur Avg Loss: 0.00019840, Log Avg loss: 0.00018361, Global Avg Loss: 0.00763832, Time: 0.2717 Steps: 64200, Updated lr: 0.000074 Training, Epoch: 0013, Batch: 004712, Sample Num: 75392, Cur Loss: 0.00000156, Cur Avg Loss: 0.00019669, Log Avg loss: 0.00015805, Global Avg Loss: 0.00761509, Time: 0.2703 Steps: 64400, Updated lr: 0.000074 Training, Epoch: 0013, Batch: 004912, Sample Num: 78592, Cur Loss: 0.00000513, Cur Avg Loss: 0.00019704, Log Avg loss: 0.00020547, Global Avg Loss: 0.00759215, Time: 0.2230 Steps: 64600, Updated lr: 0.000074 ***** Running evaluation checkpoint-64662 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-64662 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1311.838043, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001752, "eval_total_loss": 1.86787, "eval_acc": 0.999683, "eval_jaccard": 0.986681, "eval_prec": 0.987937, "eval_recall": 0.988277, "eval_f1": 0.987686, "eval_pr_auc": 0.995641, "eval_roc_auc": 0.999408, "eval_fmax": 0.994263, "eval_pmax": 0.997165, "eval_rmax": 0.991378, "eval_tmax": 0.15, "update_flag": false, "test_avg_loss": 0.001978, "test_total_loss": 2.109014, "test_acc": 0.99969, "test_jaccard": 0.985889, "test_prec": 0.986705, "test_recall": 0.987778, "test_f1": 0.986852, "test_pr_auc": 0.994746, "test_roc_auc": 0.999166, "test_fmax": 0.99379, "test_pmax": 0.997473, "test_rmax": 0.990134, "test_tmax": 0.2, "lr": 7.429874848607187e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.007585293238015371, "train_cur_epoch_loss": 0.9953436239114417, "train_cur_epoch_avg_loss": 0.00020010929310644183, "train_cur_epoch_time": 1311.838042974472, "train_cur_epoch_avg_time": 0.26373905166354483, "epoch": 13, "step": 64662} ################################################## Training, Epoch: 0014, Batch: 000138, Sample Num: 2208, Cur Loss: 0.00000676, Cur Avg Loss: 0.00010450, Log Avg loss: 0.00020941, Global Avg Loss: 0.00756936, Time: 0.2652 Steps: 64800, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000338, Sample Num: 5408, Cur Loss: 0.00000314, Cur Avg Loss: 0.00011728, Log Avg loss: 0.00012609, Global Avg Loss: 0.00754646, Time: 0.2183 Steps: 65000, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000538, Sample Num: 8608, Cur Loss: 0.00001716, Cur Avg Loss: 0.00017921, Log Avg loss: 0.00028388, Global Avg Loss: 0.00752418, Time: 0.2195 Steps: 65200, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000738, Sample Num: 11808, Cur Loss: 0.00001489, Cur Avg Loss: 0.00018150, Log Avg loss: 0.00018765, Global Avg Loss: 0.00750175, Time: 0.2898 Steps: 65400, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 000938, Sample Num: 15008, Cur Loss: 0.00000098, Cur Avg Loss: 0.00017717, Log Avg loss: 0.00016120, Global Avg Loss: 0.00747937, Time: 0.4731 Steps: 65600, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001138, Sample Num: 18208, Cur Loss: 0.00407679, Cur Avg Loss: 0.00017845, Log Avg loss: 0.00018446, Global Avg Loss: 0.00745719, Time: 0.3217 Steps: 65800, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001338, Sample Num: 21408, Cur Loss: 0.00000158, Cur Avg Loss: 0.00016291, Log Avg loss: 0.00007450, Global Avg Loss: 0.00743482, Time: 0.2246 Steps: 66000, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001538, Sample Num: 24608, Cur Loss: 0.00000305, Cur Avg Loss: 0.00016140, Log Avg loss: 0.00015130, Global Avg Loss: 0.00741282, Time: 0.2183 Steps: 66200, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001738, Sample Num: 27808, Cur Loss: 0.00000122, Cur Avg Loss: 0.00015816, Log Avg loss: 0.00013319, Global Avg Loss: 0.00739089, Time: 0.2190 Steps: 66400, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 001938, Sample Num: 31008, Cur Loss: 0.00002694, Cur Avg Loss: 0.00015635, Log Avg loss: 0.00014067, Global Avg Loss: 0.00736912, Time: 0.2045 Steps: 66600, Updated lr: 0.000074 Training, Epoch: 0014, Batch: 002138, Sample Num: 34208, Cur Loss: 0.00000144, Cur Avg Loss: 0.00016716, Log Avg loss: 0.00027187, Global Avg Loss: 0.00734787, Time: 0.2178 Steps: 66800, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002338, Sample Num: 37408, Cur Loss: 0.00001044, Cur Avg Loss: 0.00016894, Log Avg loss: 0.00018794, Global Avg Loss: 0.00732650, Time: 0.2174 Steps: 67000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002538, Sample Num: 40608, Cur Loss: 0.00001608, Cur Avg Loss: 0.00016213, Log Avg loss: 0.00008256, Global Avg Loss: 0.00730494, Time: 0.4313 Steps: 67200, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002738, Sample Num: 43808, Cur Loss: 0.00412017, Cur Avg Loss: 0.00017225, Log Avg loss: 0.00030064, Global Avg Loss: 0.00728415, Time: 0.2195 Steps: 67400, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 002938, Sample Num: 47008, Cur Loss: 0.00005775, Cur Avg Loss: 0.00017580, Log Avg loss: 0.00022449, Global Avg Loss: 0.00726327, Time: 0.2166 Steps: 67600, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003138, Sample Num: 50208, Cur Loss: 0.00001227, Cur Avg Loss: 0.00017909, Log Avg loss: 0.00022733, Global Avg Loss: 0.00724251, Time: 0.2209 Steps: 67800, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003338, Sample Num: 53408, Cur Loss: 0.00001157, Cur Avg Loss: 0.00017843, Log Avg loss: 0.00016816, Global Avg Loss: 0.00722170, Time: 0.2195 Steps: 68000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003538, Sample Num: 56608, Cur Loss: 0.00203663, Cur Avg Loss: 0.00018053, Log Avg loss: 0.00021554, Global Avg Loss: 0.00720116, Time: 0.2023 Steps: 68200, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003738, Sample Num: 59808, Cur Loss: 0.00001556, Cur Avg Loss: 0.00018239, Log Avg loss: 0.00021521, Global Avg Loss: 0.00718073, Time: 0.2587 Steps: 68400, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 003938, Sample Num: 63008, Cur Loss: 0.00005607, Cur Avg Loss: 0.00017822, Log Avg loss: 0.00010045, Global Avg Loss: 0.00716009, Time: 0.2400 Steps: 68600, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 004138, Sample Num: 66208, Cur Loss: 0.00001641, Cur Avg Loss: 0.00017669, Log Avg loss: 0.00014648, Global Avg Loss: 0.00713970, Time: 0.2205 Steps: 68800, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 004338, Sample Num: 69408, Cur Loss: 0.00001743, Cur Avg Loss: 0.00017429, Log Avg loss: 0.00012473, Global Avg Loss: 0.00711937, Time: 0.2723 Steps: 69000, Updated lr: 0.000073 Training, Epoch: 0014, Batch: 004538, Sample Num: 72608, Cur Loss: 0.00000285, Cur Avg Loss: 0.00017658, Log Avg loss: 0.00022625, Global Avg Loss: 0.00709944, Time: 0.2481 Steps: 69200, Updated lr: 0.000072 Training, Epoch: 0014, Batch: 004738, Sample Num: 75808, Cur Loss: 0.00006411, Cur Avg Loss: 0.00017563, Log Avg loss: 0.00015399, Global Avg Loss: 0.00707943, Time: 0.2174 Steps: 69400, Updated lr: 0.000072 Training, Epoch: 0014, Batch: 004938, Sample Num: 79008, Cur Loss: 0.00000051, Cur Avg Loss: 0.00017714, Log Avg loss: 0.00021297, Global Avg Loss: 0.00705970, Time: 0.2121 Steps: 69600, Updated lr: 0.000072 ***** Running evaluation checkpoint-69636 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-69636 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1337.698095, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001647, "eval_total_loss": 1.755633, "eval_acc": 0.999703, "eval_jaccard": 0.987545, "eval_prec": 0.988671, "eval_recall": 0.989295, "eval_f1": 0.988567, "eval_pr_auc": 0.995695, "eval_roc_auc": 0.999429, "eval_fmax": 0.994498, "eval_pmax": 0.996315, "eval_rmax": 0.992687, "eval_tmax": 0.09, "update_flag": true, "test_avg_loss": 0.001957, "test_total_loss": 2.08663, "test_acc": 0.9997, "test_jaccard": 0.986278, "test_prec": 0.987111, "test_recall": 0.988009, "test_f1": 0.987181, "test_pr_auc": 0.994706, "test_roc_auc": 0.999182, "test_fmax": 0.993672, "test_pmax": 0.997256, "test_rmax": 0.990114, "test_tmax": 0.18, "lr": 7.229067420266452e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.007056390243956146, "train_cur_epoch_loss": 0.8985596715805801, "train_cur_epoch_avg_loss": 0.0001806513211862847, "train_cur_epoch_time": 1337.6980953216553, "train_cur_epoch_avg_time": 0.26893809716961303, "epoch": 14, "step": 69636} ################################################## Training, Epoch: 0015, Batch: 000164, Sample Num: 2624, Cur Loss: 0.00000761, Cur Avg Loss: 0.00009124, Log Avg loss: 0.00019395, Global Avg Loss: 0.00704003, Time: 0.3130 Steps: 69800, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000364, Sample Num: 5824, Cur Loss: 0.00000170, Cur Avg Loss: 0.00014127, Log Avg loss: 0.00018229, Global Avg Loss: 0.00702043, Time: 0.2724 Steps: 70000, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000564, Sample Num: 9024, Cur Loss: 0.00050846, Cur Avg Loss: 0.00020445, Log Avg loss: 0.00031945, Global Avg Loss: 0.00700134, Time: 0.2200 Steps: 70200, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000764, Sample Num: 12224, Cur Loss: 0.00000232, Cur Avg Loss: 0.00020768, Log Avg loss: 0.00021680, Global Avg Loss: 0.00698207, Time: 0.3469 Steps: 70400, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 000964, Sample Num: 15424, Cur Loss: 0.00001861, Cur Avg Loss: 0.00019009, Log Avg loss: 0.00012286, Global Avg Loss: 0.00696264, Time: 0.2751 Steps: 70600, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001164, Sample Num: 18624, Cur Loss: 0.00000077, Cur Avg Loss: 0.00019272, Log Avg loss: 0.00020541, Global Avg Loss: 0.00694355, Time: 0.2178 Steps: 70800, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001364, Sample Num: 21824, Cur Loss: 0.00001126, Cur Avg Loss: 0.00018461, Log Avg loss: 0.00013740, Global Avg Loss: 0.00692437, Time: 0.2144 Steps: 71000, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001564, Sample Num: 25024, Cur Loss: 0.00000492, Cur Avg Loss: 0.00017408, Log Avg loss: 0.00010225, Global Avg Loss: 0.00690521, Time: 0.2166 Steps: 71200, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001764, Sample Num: 28224, Cur Loss: 0.00000611, Cur Avg Loss: 0.00016869, Log Avg loss: 0.00012659, Global Avg Loss: 0.00688622, Time: 0.2186 Steps: 71400, Updated lr: 0.000072 Training, Epoch: 0015, Batch: 001964, Sample Num: 31424, Cur Loss: 0.00000245, Cur Avg Loss: 0.00016003, Log Avg loss: 0.00008361, Global Avg Loss: 0.00686722, Time: 0.3470 Steps: 71600, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002164, Sample Num: 34624, Cur Loss: 0.00002702, Cur Avg Loss: 0.00016363, Log Avg loss: 0.00019896, Global Avg Loss: 0.00684865, Time: 0.2175 Steps: 71800, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002364, Sample Num: 37824, Cur Loss: 0.00002845, Cur Avg Loss: 0.00016605, Log Avg loss: 0.00019232, Global Avg Loss: 0.00683016, Time: 0.3176 Steps: 72000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002564, Sample Num: 41024, Cur Loss: 0.00000118, Cur Avg Loss: 0.00015604, Log Avg loss: 0.00003766, Global Avg Loss: 0.00681134, Time: 0.4721 Steps: 72200, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002764, Sample Num: 44224, Cur Loss: 0.00000408, Cur Avg Loss: 0.00017021, Log Avg loss: 0.00035193, Global Avg Loss: 0.00679350, Time: 0.2224 Steps: 72400, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 002964, Sample Num: 47424, Cur Loss: 0.00000762, Cur Avg Loss: 0.00017343, Log Avg loss: 0.00021794, Global Avg Loss: 0.00677538, Time: 0.2196 Steps: 72600, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003164, Sample Num: 50624, Cur Loss: 0.00002397, Cur Avg Loss: 0.00017493, Log Avg loss: 0.00019707, Global Avg Loss: 0.00675731, Time: 0.2196 Steps: 72800, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003364, Sample Num: 53824, Cur Loss: 0.00000296, Cur Avg Loss: 0.00017302, Log Avg loss: 0.00014283, Global Avg Loss: 0.00673919, Time: 0.2474 Steps: 73000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003564, Sample Num: 57024, Cur Loss: 0.00000376, Cur Avg Loss: 0.00017173, Log Avg loss: 0.00014996, Global Avg Loss: 0.00672119, Time: 0.2184 Steps: 73200, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003764, Sample Num: 60224, Cur Loss: 0.00001664, Cur Avg Loss: 0.00016908, Log Avg loss: 0.00012197, Global Avg Loss: 0.00670320, Time: 0.2185 Steps: 73400, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 003964, Sample Num: 63424, Cur Loss: 0.00000124, Cur Avg Loss: 0.00016987, Log Avg loss: 0.00018469, Global Avg Loss: 0.00668549, Time: 0.2424 Steps: 73600, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 004164, Sample Num: 66624, Cur Loss: 0.00001839, Cur Avg Loss: 0.00016689, Log Avg loss: 0.00010787, Global Avg Loss: 0.00666767, Time: 0.2190 Steps: 73800, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 004364, Sample Num: 69824, Cur Loss: 0.00000878, Cur Avg Loss: 0.00016741, Log Avg loss: 0.00017815, Global Avg Loss: 0.00665013, Time: 0.3835 Steps: 74000, Updated lr: 0.000071 Training, Epoch: 0015, Batch: 004564, Sample Num: 73024, Cur Loss: 0.00000254, Cur Avg Loss: 0.00016783, Log Avg loss: 0.00017714, Global Avg Loss: 0.00663268, Time: 0.2215 Steps: 74200, Updated lr: 0.000070 Training, Epoch: 0015, Batch: 004764, Sample Num: 76224, Cur Loss: 0.00000111, Cur Avg Loss: 0.00016680, Log Avg loss: 0.00014325, Global Avg Loss: 0.00661523, Time: 0.2179 Steps: 74400, Updated lr: 0.000070 Training, Epoch: 0015, Batch: 004964, Sample Num: 79424, Cur Loss: 0.00002571, Cur Avg Loss: 0.00017146, Log Avg loss: 0.00028248, Global Avg Loss: 0.00659826, Time: 0.4536 Steps: 74600, Updated lr: 0.000070 ***** Running evaluation checkpoint-74610 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-74610 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1286.397534, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001649, "eval_total_loss": 1.757983, "eval_acc": 0.9997, "eval_jaccard": 0.987791, "eval_prec": 0.988846, "eval_recall": 0.989718, "eval_f1": 0.988835, "eval_pr_auc": 0.995877, "eval_roc_auc": 0.999445, "eval_fmax": 0.994459, "eval_pmax": 0.996763, "eval_rmax": 0.992165, "eval_tmax": 0.13, "update_flag": true, "test_avg_loss": 0.00195, "test_total_loss": 2.078627, "test_acc": 0.999713, "test_jaccard": 0.987096, "test_prec": 0.987877, "test_recall": 0.988814, "test_f1": 0.987972, "test_pr_auc": 0.994808, "test_roc_auc": 0.999162, "test_fmax": 0.994015, "test_pmax": 0.996214, "test_rmax": 0.991825, "test_tmax": 0.08, "lr": 7.028259991925716e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.006597451249421812, "train_cur_epoch_loss": 0.857046691230984, "train_cur_epoch_avg_loss": 0.00017230532594109045, "train_cur_epoch_time": 1286.3975336551666, "train_cur_epoch_avg_time": 0.2586243533685498, "epoch": 15, "step": 74610} ################################################## Training, Epoch: 0016, Batch: 000190, Sample Num: 3040, Cur Loss: 0.00009151, Cur Avg Loss: 0.00009742, Log Avg loss: 0.00012208, Global Avg Loss: 0.00658094, Time: 0.3158 Steps: 74800, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000390, Sample Num: 6240, Cur Loss: 0.00001924, Cur Avg Loss: 0.00012794, Log Avg loss: 0.00015693, Global Avg Loss: 0.00656381, Time: 0.3421 Steps: 75000, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000590, Sample Num: 9440, Cur Loss: 0.00003538, Cur Avg Loss: 0.00016119, Log Avg loss: 0.00022601, Global Avg Loss: 0.00654695, Time: 0.2178 Steps: 75200, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000790, Sample Num: 12640, Cur Loss: 0.00001049, Cur Avg Loss: 0.00017697, Log Avg loss: 0.00022354, Global Avg Loss: 0.00653018, Time: 0.2163 Steps: 75400, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 000990, Sample Num: 15840, Cur Loss: 0.00001200, Cur Avg Loss: 0.00015841, Log Avg loss: 0.00008508, Global Avg Loss: 0.00651313, Time: 0.2084 Steps: 75600, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001190, Sample Num: 19040, Cur Loss: 0.00004416, Cur Avg Loss: 0.00014961, Log Avg loss: 0.00010605, Global Avg Loss: 0.00649623, Time: 0.4132 Steps: 75800, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001390, Sample Num: 22240, Cur Loss: 0.00001398, Cur Avg Loss: 0.00013907, Log Avg loss: 0.00007636, Global Avg Loss: 0.00647933, Time: 0.6075 Steps: 76000, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001590, Sample Num: 25440, Cur Loss: 0.00000287, Cur Avg Loss: 0.00013390, Log Avg loss: 0.00009796, Global Avg Loss: 0.00646258, Time: 0.4340 Steps: 76200, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001790, Sample Num: 28640, Cur Loss: 0.00000036, Cur Avg Loss: 0.00013648, Log Avg loss: 0.00015704, Global Avg Loss: 0.00644608, Time: 0.4912 Steps: 76400, Updated lr: 0.000070 Training, Epoch: 0016, Batch: 001990, Sample Num: 31840, Cur Loss: 0.00002289, Cur Avg Loss: 0.00013163, Log Avg loss: 0.00008821, Global Avg Loss: 0.00642948, Time: 0.0884 Steps: 76600, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002190, Sample Num: 35040, Cur Loss: 0.00000338, Cur Avg Loss: 0.00014576, Log Avg loss: 0.00028637, Global Avg Loss: 0.00641348, Time: 0.2714 Steps: 76800, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002390, Sample Num: 38240, Cur Loss: 0.00000546, Cur Avg Loss: 0.00014442, Log Avg loss: 0.00012967, Global Avg Loss: 0.00639716, Time: 0.2725 Steps: 77000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002590, Sample Num: 41440, Cur Loss: 0.00000605, Cur Avg Loss: 0.00014153, Log Avg loss: 0.00010701, Global Avg Loss: 0.00638086, Time: 0.3826 Steps: 77200, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002790, Sample Num: 44640, Cur Loss: 0.00000623, Cur Avg Loss: 0.00015265, Log Avg loss: 0.00029673, Global Avg Loss: 0.00636514, Time: 0.4054 Steps: 77400, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 002990, Sample Num: 47840, Cur Loss: 0.00003778, Cur Avg Loss: 0.00015848, Log Avg loss: 0.00023974, Global Avg Loss: 0.00634935, Time: 0.2162 Steps: 77600, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003190, Sample Num: 51040, Cur Loss: 0.00000410, Cur Avg Loss: 0.00015747, Log Avg loss: 0.00014235, Global Avg Loss: 0.00633340, Time: 0.2143 Steps: 77800, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003390, Sample Num: 54240, Cur Loss: 0.00000120, Cur Avg Loss: 0.00015818, Log Avg loss: 0.00016954, Global Avg Loss: 0.00631759, Time: 0.2172 Steps: 78000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003590, Sample Num: 57440, Cur Loss: 0.00006339, Cur Avg Loss: 0.00015998, Log Avg loss: 0.00019051, Global Avg Loss: 0.00630192, Time: 0.3026 Steps: 78200, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003790, Sample Num: 60640, Cur Loss: 0.00000406, Cur Avg Loss: 0.00015924, Log Avg loss: 0.00014586, Global Avg Loss: 0.00628622, Time: 0.3997 Steps: 78400, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 003990, Sample Num: 63840, Cur Loss: 0.00000013, Cur Avg Loss: 0.00015825, Log Avg loss: 0.00013964, Global Avg Loss: 0.00627058, Time: 0.2187 Steps: 78600, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 004190, Sample Num: 67040, Cur Loss: 0.00003258, Cur Avg Loss: 0.00015866, Log Avg loss: 0.00016671, Global Avg Loss: 0.00625508, Time: 0.1382 Steps: 78800, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 004390, Sample Num: 70240, Cur Loss: 0.00000971, Cur Avg Loss: 0.00015764, Log Avg loss: 0.00013634, Global Avg Loss: 0.00623959, Time: 0.2183 Steps: 79000, Updated lr: 0.000069 Training, Epoch: 0016, Batch: 004590, Sample Num: 73440, Cur Loss: 0.00003111, Cur Avg Loss: 0.00016143, Log Avg loss: 0.00024469, Global Avg Loss: 0.00622445, Time: 0.4184 Steps: 79200, Updated lr: 0.000068 Training, Epoch: 0016, Batch: 004790, Sample Num: 76640, Cur Loss: 0.00309900, Cur Avg Loss: 0.00016051, Log Avg loss: 0.00013922, Global Avg Loss: 0.00620913, Time: 0.6842 Steps: 79400, Updated lr: 0.000068 ***** Running evaluation checkpoint-79584 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-79584 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1304.014933, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001692, "eval_total_loss": 1.803361, "eval_acc": 0.999713, "eval_jaccard": 0.988007, "eval_prec": 0.989004, "eval_recall": 0.989726, "eval_f1": 0.988966, "eval_pr_auc": 0.995771, "eval_roc_auc": 0.99945, "eval_fmax": 0.99469, "eval_pmax": 0.997481, "eval_rmax": 0.991914, "eval_tmax": 0.18, "update_flag": true, "test_avg_loss": 0.002002, "test_total_loss": 2.134459, "test_acc": 0.999711, "test_jaccard": 0.986953, "test_prec": 0.987746, "test_recall": 0.988663, "test_f1": 0.987847, "test_pr_auc": 0.994815, "test_roc_auc": 0.999175, "test_fmax": 0.993864, "test_pmax": 0.996187, "test_rmax": 0.991552, "test_tmax": 0.07, "lr": 6.827452563584982e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00619552088903951, "train_cur_epoch_loss": 0.8284967139590886, "train_cur_epoch_avg_loss": 0.00016656548330500374, "train_cur_epoch_time": 1304.014933347702, "train_cur_epoch_avg_time": 0.2621662511756538, "epoch": 16, "step": 79584} ################################################## Training, Epoch: 0017, Batch: 000016, Sample Num: 256, Cur Loss: 0.00006669, Cur Avg Loss: 0.00016419, Log Avg loss: 0.00031149, Global Avg Loss: 0.00619431, Time: 0.4999 Steps: 79600, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000216, Sample Num: 3456, Cur Loss: 0.00000951, Cur Avg Loss: 0.00012150, Log Avg loss: 0.00011808, Global Avg Loss: 0.00617908, Time: 0.2793 Steps: 79800, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000416, Sample Num: 6656, Cur Loss: 0.00000256, Cur Avg Loss: 0.00012732, Log Avg loss: 0.00013361, Global Avg Loss: 0.00616397, Time: 0.2419 Steps: 80000, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000616, Sample Num: 9856, Cur Loss: 0.00000248, Cur Avg Loss: 0.00020758, Log Avg loss: 0.00037452, Global Avg Loss: 0.00614953, Time: 0.2176 Steps: 80200, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 000816, Sample Num: 13056, Cur Loss: 0.00000622, Cur Avg Loss: 0.00021505, Log Avg loss: 0.00023804, Global Avg Loss: 0.00613482, Time: 0.3407 Steps: 80400, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001016, Sample Num: 16256, Cur Loss: 0.00000529, Cur Avg Loss: 0.00018565, Log Avg loss: 0.00006570, Global Avg Loss: 0.00611976, Time: 0.0835 Steps: 80600, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001216, Sample Num: 19456, Cur Loss: 0.00000054, Cur Avg Loss: 0.00017923, Log Avg loss: 0.00014664, Global Avg Loss: 0.00610498, Time: 0.2711 Steps: 80800, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001416, Sample Num: 22656, Cur Loss: 0.00000077, Cur Avg Loss: 0.00015951, Log Avg loss: 0.00003960, Global Avg Loss: 0.00609000, Time: 0.4232 Steps: 81000, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001616, Sample Num: 25856, Cur Loss: 0.00000132, Cur Avg Loss: 0.00015961, Log Avg loss: 0.00016028, Global Avg Loss: 0.00607540, Time: 0.1873 Steps: 81200, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 001816, Sample Num: 29056, Cur Loss: 0.00000317, Cur Avg Loss: 0.00015246, Log Avg loss: 0.00009467, Global Avg Loss: 0.00606070, Time: 0.4097 Steps: 81400, Updated lr: 0.000068 Training, Epoch: 0017, Batch: 002016, Sample Num: 32256, Cur Loss: 0.00000521, Cur Avg Loss: 0.00014839, Log Avg loss: 0.00011144, Global Avg Loss: 0.00604612, Time: 0.2483 Steps: 81600, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002216, Sample Num: 35456, Cur Loss: 0.00001108, Cur Avg Loss: 0.00015665, Log Avg loss: 0.00023991, Global Avg Loss: 0.00603193, Time: 0.2215 Steps: 81800, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002416, Sample Num: 38656, Cur Loss: 0.00000264, Cur Avg Loss: 0.00015849, Log Avg loss: 0.00017894, Global Avg Loss: 0.00601765, Time: 0.2174 Steps: 82000, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002616, Sample Num: 41856, Cur Loss: 0.00000306, Cur Avg Loss: 0.00015381, Log Avg loss: 0.00009730, Global Avg Loss: 0.00600324, Time: 0.2196 Steps: 82200, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 002816, Sample Num: 45056, Cur Loss: 0.00000324, Cur Avg Loss: 0.00015849, Log Avg loss: 0.00021958, Global Avg Loss: 0.00598921, Time: 0.2201 Steps: 82400, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003016, Sample Num: 48256, Cur Loss: 0.00000787, Cur Avg Loss: 0.00016822, Log Avg loss: 0.00030530, Global Avg Loss: 0.00597544, Time: 0.2241 Steps: 82600, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003216, Sample Num: 51456, Cur Loss: 0.00001656, Cur Avg Loss: 0.00016455, Log Avg loss: 0.00010922, Global Avg Loss: 0.00596127, Time: 0.2199 Steps: 82800, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003416, Sample Num: 54656, Cur Loss: 0.00000236, Cur Avg Loss: 0.00016429, Log Avg loss: 0.00016006, Global Avg Loss: 0.00594730, Time: 0.2207 Steps: 83000, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003616, Sample Num: 57856, Cur Loss: 0.00001126, Cur Avg Loss: 0.00016304, Log Avg loss: 0.00014171, Global Avg Loss: 0.00593334, Time: 0.1929 Steps: 83200, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 003816, Sample Num: 61056, Cur Loss: 0.00000963, Cur Avg Loss: 0.00015963, Log Avg loss: 0.00009804, Global Avg Loss: 0.00591935, Time: 0.1502 Steps: 83400, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 004016, Sample Num: 64256, Cur Loss: 0.00000403, Cur Avg Loss: 0.00015943, Log Avg loss: 0.00015555, Global Avg Loss: 0.00590556, Time: 0.2667 Steps: 83600, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 004216, Sample Num: 67456, Cur Loss: 0.00000156, Cur Avg Loss: 0.00015722, Log Avg loss: 0.00011281, Global Avg Loss: 0.00589173, Time: 0.2181 Steps: 83800, Updated lr: 0.000067 Training, Epoch: 0017, Batch: 004416, Sample Num: 70656, Cur Loss: 0.00000135, Cur Avg Loss: 0.00015405, Log Avg loss: 0.00008721, Global Avg Loss: 0.00587791, Time: 0.2750 Steps: 84000, Updated lr: 0.000066 Training, Epoch: 0017, Batch: 004616, Sample Num: 73856, Cur Loss: 0.00000761, Cur Avg Loss: 0.00015714, Log Avg loss: 0.00022551, Global Avg Loss: 0.00586449, Time: 0.2192 Steps: 84200, Updated lr: 0.000066 Training, Epoch: 0017, Batch: 004816, Sample Num: 77056, Cur Loss: 0.00004250, Cur Avg Loss: 0.00015922, Log Avg loss: 0.00020714, Global Avg Loss: 0.00585108, Time: 0.2153 Steps: 84400, Updated lr: 0.000066 ***** Running evaluation checkpoint-84558 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-84558 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1305.036613, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001727, "eval_total_loss": 1.840633, "eval_acc": 0.99971, "eval_jaccard": 0.988008, "eval_prec": 0.989177, "eval_recall": 0.989677, "eval_f1": 0.989008, "eval_pr_auc": 0.995833, "eval_roc_auc": 0.999433, "eval_fmax": 0.994664, "eval_pmax": 0.99738, "eval_rmax": 0.991963, "eval_tmax": 0.14, "update_flag": true, "test_avg_loss": 0.001997, "test_total_loss": 2.129127, "test_acc": 0.999705, "test_jaccard": 0.987084, "test_prec": 0.987875, "test_recall": 0.988805, "test_f1": 0.987964, "test_pr_auc": 0.995121, "test_roc_auc": 0.99918, "test_fmax": 0.994136, "test_pmax": 0.99703, "test_rmax": 0.991258, "test_tmax": 0.09, "lr": 6.626645135244247e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005840657145465297, "train_cur_epoch_loss": 0.809952472934484, "train_cur_epoch_avg_loss": 0.0001628372482779421, "train_cur_epoch_time": 1305.0366129875183, "train_cur_epoch_avg_time": 0.2623716552045674, "epoch": 17, "step": 84558} ################################################## Training, Epoch: 0018, Batch: 000042, Sample Num: 672, Cur Loss: 0.00002235, Cur Avg Loss: 0.00003244, Log Avg loss: 0.00022255, Global Avg Loss: 0.00583777, Time: 0.2190 Steps: 84600, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000242, Sample Num: 3872, Cur Loss: 0.00000077, Cur Avg Loss: 0.00011982, Log Avg loss: 0.00013817, Global Avg Loss: 0.00582433, Time: 0.2205 Steps: 84800, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000442, Sample Num: 7072, Cur Loss: 0.00002726, Cur Avg Loss: 0.00015078, Log Avg loss: 0.00018824, Global Avg Loss: 0.00581107, Time: 0.2266 Steps: 85000, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000642, Sample Num: 10272, Cur Loss: 0.00000375, Cur Avg Loss: 0.00017932, Log Avg loss: 0.00024241, Global Avg Loss: 0.00579800, Time: 0.4562 Steps: 85200, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 000842, Sample Num: 13472, Cur Loss: 0.00001449, Cur Avg Loss: 0.00017116, Log Avg loss: 0.00014497, Global Avg Loss: 0.00578476, Time: 0.0854 Steps: 85400, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001042, Sample Num: 16672, Cur Loss: 0.00001470, Cur Avg Loss: 0.00015964, Log Avg loss: 0.00011114, Global Avg Loss: 0.00577150, Time: 0.2814 Steps: 85600, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001242, Sample Num: 19872, Cur Loss: 0.00000397, Cur Avg Loss: 0.00015551, Log Avg loss: 0.00013396, Global Avg Loss: 0.00575836, Time: 0.3754 Steps: 85800, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001442, Sample Num: 23072, Cur Loss: 0.00000282, Cur Avg Loss: 0.00014172, Log Avg loss: 0.00005611, Global Avg Loss: 0.00574510, Time: 0.2196 Steps: 86000, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001642, Sample Num: 26272, Cur Loss: 0.00000483, Cur Avg Loss: 0.00013627, Log Avg loss: 0.00009695, Global Avg Loss: 0.00573200, Time: 0.2462 Steps: 86200, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 001842, Sample Num: 29472, Cur Loss: 0.00001301, Cur Avg Loss: 0.00013129, Log Avg loss: 0.00009045, Global Avg Loss: 0.00571894, Time: 0.2149 Steps: 86400, Updated lr: 0.000066 Training, Epoch: 0018, Batch: 002042, Sample Num: 32672, Cur Loss: 0.00000549, Cur Avg Loss: 0.00013280, Log Avg loss: 0.00014670, Global Avg Loss: 0.00570607, Time: 0.2453 Steps: 86600, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002242, Sample Num: 35872, Cur Loss: 0.00000044, Cur Avg Loss: 0.00013720, Log Avg loss: 0.00018209, Global Avg Loss: 0.00569334, Time: 0.4482 Steps: 86800, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002442, Sample Num: 39072, Cur Loss: 0.00000387, Cur Avg Loss: 0.00013903, Log Avg loss: 0.00015959, Global Avg Loss: 0.00568062, Time: 0.2187 Steps: 87000, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002642, Sample Num: 42272, Cur Loss: 0.00010180, Cur Avg Loss: 0.00013679, Log Avg loss: 0.00010947, Global Avg Loss: 0.00566784, Time: 0.2391 Steps: 87200, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 002842, Sample Num: 45472, Cur Loss: 0.00000089, Cur Avg Loss: 0.00014685, Log Avg loss: 0.00027968, Global Avg Loss: 0.00565551, Time: 0.2193 Steps: 87400, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003042, Sample Num: 48672, Cur Loss: 0.00000235, Cur Avg Loss: 0.00015347, Log Avg loss: 0.00024755, Global Avg Loss: 0.00564316, Time: 0.2717 Steps: 87600, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003242, Sample Num: 51872, Cur Loss: 0.00000276, Cur Avg Loss: 0.00015092, Log Avg loss: 0.00011216, Global Avg Loss: 0.00563056, Time: 0.2095 Steps: 87800, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003442, Sample Num: 55072, Cur Loss: 0.00000111, Cur Avg Loss: 0.00015053, Log Avg loss: 0.00014420, Global Avg Loss: 0.00561810, Time: 0.2190 Steps: 88000, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003642, Sample Num: 58272, Cur Loss: 0.00001851, Cur Avg Loss: 0.00015160, Log Avg loss: 0.00016999, Global Avg Loss: 0.00560574, Time: 0.2178 Steps: 88200, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 003842, Sample Num: 61472, Cur Loss: 0.00000934, Cur Avg Loss: 0.00014543, Log Avg loss: 0.00003312, Global Avg Loss: 0.00559313, Time: 0.3218 Steps: 88400, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 004042, Sample Num: 64672, Cur Loss: 0.00000819, Cur Avg Loss: 0.00014691, Log Avg loss: 0.00017527, Global Avg Loss: 0.00558090, Time: 0.2230 Steps: 88600, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 004242, Sample Num: 67872, Cur Loss: 0.00001123, Cur Avg Loss: 0.00014338, Log Avg loss: 0.00007209, Global Avg Loss: 0.00556850, Time: 0.3298 Steps: 88800, Updated lr: 0.000065 Training, Epoch: 0018, Batch: 004442, Sample Num: 71072, Cur Loss: 0.00001031, Cur Avg Loss: 0.00014114, Log Avg loss: 0.00009357, Global Avg Loss: 0.00555619, Time: 0.2870 Steps: 89000, Updated lr: 0.000064 Training, Epoch: 0018, Batch: 004642, Sample Num: 74272, Cur Loss: 0.00002048, Cur Avg Loss: 0.00014587, Log Avg loss: 0.00025087, Global Avg Loss: 0.00554430, Time: 0.3608 Steps: 89200, Updated lr: 0.000064 Training, Epoch: 0018, Batch: 004842, Sample Num: 77472, Cur Loss: 0.00000573, Cur Avg Loss: 0.00014824, Log Avg loss: 0.00020335, Global Avg Loss: 0.00553235, Time: 0.3313 Steps: 89400, Updated lr: 0.000064 ***** Running evaluation checkpoint-89532 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-89532 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1285.726656, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001801, "eval_total_loss": 1.920283, "eval_acc": 0.999712, "eval_jaccard": 0.988122, "eval_prec": 0.989293, "eval_recall": 0.989669, "eval_f1": 0.989071, "eval_pr_auc": 0.995744, "eval_roc_auc": 0.999416, "eval_fmax": 0.994499, "eval_pmax": 0.997524, "eval_rmax": 0.991492, "eval_tmax": 0.16, "update_flag": true, "test_avg_loss": 0.002042, "test_total_loss": 2.176985, "test_acc": 0.999715, "test_jaccard": 0.987223, "test_prec": 0.987992, "test_recall": 0.988874, "test_f1": 0.98807, "test_pr_auc": 0.994913, "test_roc_auc": 0.999172, "test_fmax": 0.994088, "test_pmax": 0.997078, "test_rmax": 0.991116, "test_tmax": 0.11, "lr": 6.425837706903513e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005524551509563735, "train_cur_epoch_loss": 0.7498588480057358, "train_cur_epoch_avg_loss": 0.0001507556992371805, "train_cur_epoch_time": 1285.7266557216644, "train_cur_epoch_avg_time": 0.2584894764217259, "epoch": 18, "step": 89532} ################################################## Training, Epoch: 0019, Batch: 000068, Sample Num: 1088, Cur Loss: 0.00015368, Cur Avg Loss: 0.00010606, Log Avg loss: 0.00019644, Global Avg Loss: 0.00552044, Time: 0.2162 Steps: 89600, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000268, Sample Num: 4288, Cur Loss: 0.00000332, Cur Avg Loss: 0.00011575, Log Avg loss: 0.00011904, Global Avg Loss: 0.00550841, Time: 0.2801 Steps: 89800, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000468, Sample Num: 7488, Cur Loss: 0.00008157, Cur Avg Loss: 0.00017292, Log Avg loss: 0.00024952, Global Avg Loss: 0.00549672, Time: 0.2877 Steps: 90000, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000668, Sample Num: 10688, Cur Loss: 0.00000432, Cur Avg Loss: 0.00015619, Log Avg loss: 0.00011705, Global Avg Loss: 0.00548479, Time: 0.4324 Steps: 90200, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 000868, Sample Num: 13888, Cur Loss: 0.00006328, Cur Avg Loss: 0.00016218, Log Avg loss: 0.00018217, Global Avg Loss: 0.00547306, Time: 0.3018 Steps: 90400, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001068, Sample Num: 17088, Cur Loss: 0.01031706, Cur Avg Loss: 0.00015598, Log Avg loss: 0.00012911, Global Avg Loss: 0.00546127, Time: 0.1217 Steps: 90600, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001268, Sample Num: 20288, Cur Loss: 0.00001252, Cur Avg Loss: 0.00014254, Log Avg loss: 0.00007073, Global Avg Loss: 0.00544939, Time: 0.2438 Steps: 90800, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001468, Sample Num: 23488, Cur Loss: 0.00003450, Cur Avg Loss: 0.00013273, Log Avg loss: 0.00007054, Global Avg Loss: 0.00543757, Time: 0.2146 Steps: 91000, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001668, Sample Num: 26688, Cur Loss: 0.00000561, Cur Avg Loss: 0.00012939, Log Avg loss: 0.00010486, Global Avg Loss: 0.00542588, Time: 0.1743 Steps: 91200, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 001868, Sample Num: 29888, Cur Loss: 0.00000115, Cur Avg Loss: 0.00012520, Log Avg loss: 0.00009029, Global Avg Loss: 0.00541420, Time: 0.3369 Steps: 91400, Updated lr: 0.000064 Training, Epoch: 0019, Batch: 002068, Sample Num: 33088, Cur Loss: 0.00021740, Cur Avg Loss: 0.00012853, Log Avg loss: 0.00015966, Global Avg Loss: 0.00540273, Time: 0.4593 Steps: 91600, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002268, Sample Num: 36288, Cur Loss: 0.00000351, Cur Avg Loss: 0.00012834, Log Avg loss: 0.00012630, Global Avg Loss: 0.00539123, Time: 0.2837 Steps: 91800, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002468, Sample Num: 39488, Cur Loss: 0.00000030, Cur Avg Loss: 0.00012853, Log Avg loss: 0.00013076, Global Avg Loss: 0.00537980, Time: 0.2173 Steps: 92000, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002668, Sample Num: 42688, Cur Loss: 0.00053783, Cur Avg Loss: 0.00012992, Log Avg loss: 0.00014708, Global Avg Loss: 0.00536845, Time: 0.2188 Steps: 92200, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 002868, Sample Num: 45888, Cur Loss: 0.00000101, Cur Avg Loss: 0.00014226, Log Avg loss: 0.00030683, Global Avg Loss: 0.00535749, Time: 0.4420 Steps: 92400, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003068, Sample Num: 49088, Cur Loss: 0.00001827, Cur Avg Loss: 0.00014531, Log Avg loss: 0.00018910, Global Avg Loss: 0.00534633, Time: 0.2085 Steps: 92600, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003268, Sample Num: 52288, Cur Loss: 0.00000131, Cur Avg Loss: 0.00014031, Log Avg loss: 0.00006351, Global Avg Loss: 0.00533494, Time: 0.2166 Steps: 92800, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003468, Sample Num: 55488, Cur Loss: 0.00117926, Cur Avg Loss: 0.00013989, Log Avg loss: 0.00013315, Global Avg Loss: 0.00532376, Time: 0.3124 Steps: 93000, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003668, Sample Num: 58688, Cur Loss: 0.00000113, Cur Avg Loss: 0.00014298, Log Avg loss: 0.00019641, Global Avg Loss: 0.00531275, Time: 0.2659 Steps: 93200, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 003868, Sample Num: 61888, Cur Loss: 0.00000350, Cur Avg Loss: 0.00013716, Log Avg loss: 0.00003045, Global Avg Loss: 0.00530144, Time: 0.2149 Steps: 93400, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 004068, Sample Num: 65088, Cur Loss: 0.00050490, Cur Avg Loss: 0.00013794, Log Avg loss: 0.00015300, Global Avg Loss: 0.00529044, Time: 0.2190 Steps: 93600, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 004268, Sample Num: 68288, Cur Loss: 0.00001200, Cur Avg Loss: 0.00013628, Log Avg loss: 0.00010268, Global Avg Loss: 0.00527938, Time: 0.2126 Steps: 93800, Updated lr: 0.000063 Training, Epoch: 0019, Batch: 004468, Sample Num: 71488, Cur Loss: 0.00081195, Cur Avg Loss: 0.00013445, Log Avg loss: 0.00009521, Global Avg Loss: 0.00526835, Time: 0.3251 Steps: 94000, Updated lr: 0.000062 Training, Epoch: 0019, Batch: 004668, Sample Num: 74688, Cur Loss: 0.00001279, Cur Avg Loss: 0.00013959, Log Avg loss: 0.00025450, Global Avg Loss: 0.00525770, Time: 0.2181 Steps: 94200, Updated lr: 0.000062 Training, Epoch: 0019, Batch: 004868, Sample Num: 77888, Cur Loss: 0.00001089, Cur Avg Loss: 0.00014064, Log Avg loss: 0.00016516, Global Avg Loss: 0.00524692, Time: 0.3240 Steps: 94400, Updated lr: 0.000062 ***** Running evaluation checkpoint-94506 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-94506 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1297.991109, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001747, "eval_total_loss": 1.862431, "eval_acc": 0.99971, "eval_jaccard": 0.988137, "eval_prec": 0.989182, "eval_recall": 0.99003, "eval_f1": 0.989163, "eval_pr_auc": 0.995827, "eval_roc_auc": 0.999449, "eval_fmax": 0.994657, "eval_pmax": 0.997274, "eval_rmax": 0.992054, "eval_tmax": 0.18, "update_flag": true, "test_avg_loss": 0.002073, "test_total_loss": 2.210214, "test_acc": 0.999715, "test_jaccard": 0.987417, "test_prec": 0.988176, "test_recall": 0.989246, "test_f1": 0.988323, "test_pr_auc": 0.994902, "test_roc_auc": 0.999179, "test_fmax": 0.994136, "test_pmax": 0.997071, "test_rmax": 0.991219, "test_tmax": 0.15, "lr": 6.225030278562778e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.005241445797609469, "train_cur_epoch_loss": 0.7239307946200457, "train_cur_epoch_avg_loss": 0.00014554298243265898, "train_cur_epoch_time": 1297.9911093711853, "train_cur_epoch_avg_time": 0.26095518885628977, "epoch": 19, "step": 94506} ################################################## Training, Epoch: 0020, Batch: 000094, Sample Num: 1504, Cur Loss: 0.00001988, Cur Avg Loss: 0.00012355, Log Avg loss: 0.00025455, Global Avg Loss: 0.00523636, Time: 0.2694 Steps: 94600, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000294, Sample Num: 4704, Cur Loss: 0.00000691, Cur Avg Loss: 0.00012072, Log Avg loss: 0.00011939, Global Avg Loss: 0.00522557, Time: 0.1313 Steps: 94800, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000494, Sample Num: 7904, Cur Loss: 0.00003589, Cur Avg Loss: 0.00018531, Log Avg loss: 0.00028025, Global Avg Loss: 0.00521515, Time: 0.3749 Steps: 95000, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000694, Sample Num: 11104, Cur Loss: 0.00002162, Cur Avg Loss: 0.00016819, Log Avg loss: 0.00012591, Global Avg Loss: 0.00520446, Time: 0.3434 Steps: 95200, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 000894, Sample Num: 14304, Cur Loss: 0.00024100, Cur Avg Loss: 0.00016869, Log Avg loss: 0.00017044, Global Avg Loss: 0.00519391, Time: 0.3170 Steps: 95400, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001094, Sample Num: 17504, Cur Loss: 0.00001310, Cur Avg Loss: 0.00016000, Log Avg loss: 0.00012114, Global Avg Loss: 0.00518330, Time: 0.2152 Steps: 95600, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001294, Sample Num: 20704, Cur Loss: 0.00002763, Cur Avg Loss: 0.00014575, Log Avg loss: 0.00006778, Global Avg Loss: 0.00517262, Time: 0.2811 Steps: 95800, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001494, Sample Num: 23904, Cur Loss: 0.00000033, Cur Avg Loss: 0.00013957, Log Avg loss: 0.00009962, Global Avg Loss: 0.00516205, Time: 0.2233 Steps: 96000, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001694, Sample Num: 27104, Cur Loss: 0.00000412, Cur Avg Loss: 0.00013496, Log Avg loss: 0.00010050, Global Avg Loss: 0.00515152, Time: 0.2197 Steps: 96200, Updated lr: 0.000062 Training, Epoch: 0020, Batch: 001894, Sample Num: 30304, Cur Loss: 0.00000442, Cur Avg Loss: 0.00013021, Log Avg loss: 0.00009003, Global Avg Loss: 0.00514102, Time: 0.2097 Steps: 96400, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002094, Sample Num: 33504, Cur Loss: 0.00000005, Cur Avg Loss: 0.00012963, Log Avg loss: 0.00012414, Global Avg Loss: 0.00513064, Time: 0.2139 Steps: 96600, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002294, Sample Num: 36704, Cur Loss: 0.00000082, Cur Avg Loss: 0.00013602, Log Avg loss: 0.00020293, Global Avg Loss: 0.00512046, Time: 0.2165 Steps: 96800, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002494, Sample Num: 39904, Cur Loss: 0.00001209, Cur Avg Loss: 0.00013059, Log Avg loss: 0.00006825, Global Avg Loss: 0.00511004, Time: 0.3115 Steps: 97000, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002694, Sample Num: 43104, Cur Loss: 0.00001237, Cur Avg Loss: 0.00012989, Log Avg loss: 0.00012123, Global Avg Loss: 0.00509977, Time: 0.2198 Steps: 97200, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 002894, Sample Num: 46304, Cur Loss: 0.00000120, Cur Avg Loss: 0.00013477, Log Avg loss: 0.00020052, Global Avg Loss: 0.00508971, Time: 0.2711 Steps: 97400, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003094, Sample Num: 49504, Cur Loss: 0.00000536, Cur Avg Loss: 0.00013690, Log Avg loss: 0.00016762, Global Avg Loss: 0.00507963, Time: 0.2201 Steps: 97600, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003294, Sample Num: 52704, Cur Loss: 0.00000139, Cur Avg Loss: 0.00013576, Log Avg loss: 0.00011822, Global Avg Loss: 0.00506948, Time: 0.4465 Steps: 97800, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003494, Sample Num: 55904, Cur Loss: 0.00000079, Cur Avg Loss: 0.00013475, Log Avg loss: 0.00011807, Global Avg Loss: 0.00505938, Time: 0.2133 Steps: 98000, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003694, Sample Num: 59104, Cur Loss: 0.00000045, Cur Avg Loss: 0.00013549, Log Avg loss: 0.00014834, Global Avg Loss: 0.00504937, Time: 0.2180 Steps: 98200, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 003894, Sample Num: 62304, Cur Loss: 0.00000231, Cur Avg Loss: 0.00012981, Log Avg loss: 0.00002487, Global Avg Loss: 0.00503916, Time: 0.2184 Steps: 98400, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 004094, Sample Num: 65504, Cur Loss: 0.00000018, Cur Avg Loss: 0.00013016, Log Avg loss: 0.00013699, Global Avg Loss: 0.00502922, Time: 0.2194 Steps: 98600, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 004294, Sample Num: 68704, Cur Loss: 0.00003792, Cur Avg Loss: 0.00012750, Log Avg loss: 0.00007304, Global Avg Loss: 0.00501919, Time: 0.2534 Steps: 98800, Updated lr: 0.000061 Training, Epoch: 0020, Batch: 004494, Sample Num: 71904, Cur Loss: 0.00000301, Cur Avg Loss: 0.00012924, Log Avg loss: 0.00016677, Global Avg Loss: 0.00500938, Time: 0.1716 Steps: 99000, Updated lr: 0.000060 Training, Epoch: 0020, Batch: 004694, Sample Num: 75104, Cur Loss: 0.00000450, Cur Avg Loss: 0.00013080, Log Avg loss: 0.00016575, Global Avg Loss: 0.00499962, Time: 0.2071 Steps: 99200, Updated lr: 0.000060 Training, Epoch: 0020, Batch: 004894, Sample Num: 78304, Cur Loss: 0.00000138, Cur Avg Loss: 0.00013156, Log Avg loss: 0.00014941, Global Avg Loss: 0.00498986, Time: 0.3715 Steps: 99400, Updated lr: 0.000060 ***** Running evaluation checkpoint-99480 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-99480 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1287.707749, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001757, "eval_total_loss": 1.873423, "eval_acc": 0.99971, "eval_jaccard": 0.987842, "eval_prec": 0.988973, "eval_recall": 0.989475, "eval_f1": 0.988825, "eval_pr_auc": 0.995777, "eval_roc_auc": 0.999442, "eval_fmax": 0.99486, "eval_pmax": 0.997461, "eval_rmax": 0.992271, "eval_tmax": 0.11, "update_flag": false, "test_avg_loss": 0.00209, "test_total_loss": 2.227549, "test_acc": 0.99971, "test_jaccard": 0.986991, "test_prec": 0.987796, "test_recall": 0.988658, "test_f1": 0.987858, "test_pr_auc": 0.994881, "test_roc_auc": 0.99918, "test_fmax": 0.994263, "test_pmax": 0.996437, "test_rmax": 0.992098, "test_tmax": 0.04, "lr": 6.024222850222043e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004986119769154523, "train_cur_epoch_loss": 0.6711180866110986, "train_cur_epoch_avg_loss": 0.0001349252285104742, "train_cur_epoch_time": 1287.7077486515045, "train_cur_epoch_avg_time": 0.2588877661140942, "epoch": 20, "step": 99480} ################################################## Training, Epoch: 0021, Batch: 000120, Sample Num: 1920, Cur Loss: 0.00000100, Cur Avg Loss: 0.00012213, Log Avg loss: 0.00020961, Global Avg Loss: 0.00498026, Time: 0.2139 Steps: 99600, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000320, Sample Num: 5120, Cur Loss: 0.00001376, Cur Avg Loss: 0.00010695, Log Avg loss: 0.00009784, Global Avg Loss: 0.00497048, Time: 0.2681 Steps: 99800, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00005615, Cur Avg Loss: 0.00016203, Log Avg loss: 0.00025017, Global Avg Loss: 0.00496103, Time: 0.2174 Steps: 100000, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000720, Sample Num: 11520, Cur Loss: 0.00000559, Cur Avg Loss: 0.00014891, Log Avg loss: 0.00011480, Global Avg Loss: 0.00495136, Time: 0.1099 Steps: 100200, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 000920, Sample Num: 14720, Cur Loss: 0.00000417, Cur Avg Loss: 0.00016229, Log Avg loss: 0.00021044, Global Avg Loss: 0.00494192, Time: 0.0854 Steps: 100400, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001120, Sample Num: 17920, Cur Loss: 0.00000043, Cur Avg Loss: 0.00014907, Log Avg loss: 0.00008828, Global Avg Loss: 0.00493227, Time: 0.2551 Steps: 100600, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001320, Sample Num: 21120, Cur Loss: 0.00010668, Cur Avg Loss: 0.00013600, Log Avg loss: 0.00006276, Global Avg Loss: 0.00492261, Time: 0.4218 Steps: 100800, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00000205, Cur Avg Loss: 0.00012338, Log Avg loss: 0.00004012, Global Avg Loss: 0.00491294, Time: 0.2711 Steps: 101000, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001720, Sample Num: 27520, Cur Loss: 0.00000294, Cur Avg Loss: 0.00012280, Log Avg loss: 0.00011837, Global Avg Loss: 0.00490346, Time: 0.2706 Steps: 101200, Updated lr: 0.000060 Training, Epoch: 0021, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00000046, Cur Avg Loss: 0.00011772, Log Avg loss: 0.00007407, Global Avg Loss: 0.00489394, Time: 0.4420 Steps: 101400, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002120, Sample Num: 33920, Cur Loss: 0.00027299, Cur Avg Loss: 0.00012017, Log Avg loss: 0.00014370, Global Avg Loss: 0.00488459, Time: 0.3984 Steps: 101600, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00000366, Cur Avg Loss: 0.00012690, Log Avg loss: 0.00019818, Global Avg Loss: 0.00487538, Time: 0.3209 Steps: 101800, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002520, Sample Num: 40320, Cur Loss: 0.00000267, Cur Avg Loss: 0.00011841, Log Avg loss: 0.00001994, Global Avg Loss: 0.00486586, Time: 0.3331 Steps: 102000, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00003519, Cur Avg Loss: 0.00012051, Log Avg loss: 0.00014705, Global Avg Loss: 0.00485662, Time: 0.2340 Steps: 102200, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 002920, Sample Num: 46720, Cur Loss: 0.00013801, Cur Avg Loss: 0.00013346, Log Avg loss: 0.00030950, Global Avg Loss: 0.00484774, Time: 0.2192 Steps: 102400, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003120, Sample Num: 49920, Cur Loss: 0.00004999, Cur Avg Loss: 0.00013466, Log Avg loss: 0.00015216, Global Avg Loss: 0.00483859, Time: 0.2196 Steps: 102600, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003320, Sample Num: 53120, Cur Loss: 0.00000310, Cur Avg Loss: 0.00013387, Log Avg loss: 0.00012156, Global Avg Loss: 0.00482941, Time: 0.4122 Steps: 102800, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00000163, Cur Avg Loss: 0.00013281, Log Avg loss: 0.00011522, Global Avg Loss: 0.00482026, Time: 0.2390 Steps: 103000, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003720, Sample Num: 59520, Cur Loss: 0.00000010, Cur Avg Loss: 0.00013351, Log Avg loss: 0.00014589, Global Avg Loss: 0.00481120, Time: 0.2187 Steps: 103200, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 003920, Sample Num: 62720, Cur Loss: 0.00000342, Cur Avg Loss: 0.00012871, Log Avg loss: 0.00003947, Global Avg Loss: 0.00480197, Time: 0.2257 Steps: 103400, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00000091, Cur Avg Loss: 0.00012812, Log Avg loss: 0.00011648, Global Avg Loss: 0.00479293, Time: 0.2168 Steps: 103600, Updated lr: 0.000059 Training, Epoch: 0021, Batch: 004320, Sample Num: 69120, Cur Loss: 0.00000488, Cur Avg Loss: 0.00012727, Log Avg loss: 0.00010984, Global Avg Loss: 0.00478390, Time: 0.2444 Steps: 103800, Updated lr: 0.000058 Training, Epoch: 0021, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00000200, Cur Avg Loss: 0.00012782, Log Avg loss: 0.00013971, Global Avg Loss: 0.00477497, Time: 0.5576 Steps: 104000, Updated lr: 0.000058 Training, Epoch: 0021, Batch: 004720, Sample Num: 75520, Cur Loss: 0.00000846, Cur Avg Loss: 0.00012754, Log Avg loss: 0.00012108, Global Avg Loss: 0.00476604, Time: 0.2175 Steps: 104200, Updated lr: 0.000058 Training, Epoch: 0021, Batch: 004920, Sample Num: 78720, Cur Loss: 0.00000026, Cur Avg Loss: 0.00012762, Log Avg loss: 0.00012953, Global Avg Loss: 0.00475716, Time: 0.2195 Steps: 104400, Updated lr: 0.000058 ***** Running evaluation checkpoint-104454 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-104454 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1269.779508, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001802, "eval_total_loss": 1.92041, "eval_acc": 0.999717, "eval_jaccard": 0.988356, "eval_prec": 0.989528, "eval_recall": 0.989903, "eval_f1": 0.98931, "eval_pr_auc": 0.995903, "eval_roc_auc": 0.999441, "eval_fmax": 0.994636, "eval_pmax": 0.996233, "eval_rmax": 0.993043, "eval_tmax": 0.05, "update_flag": true, "test_avg_loss": 0.002095, "test_total_loss": 2.23352, "test_acc": 0.999712, "test_jaccard": 0.987026, "test_prec": 0.987853, "test_recall": 0.988657, "test_f1": 0.9879, "test_pr_auc": 0.994994, "test_roc_auc": 0.999176, "test_fmax": 0.99426, "test_pmax": 0.996091, "test_rmax": 0.992435, "test_tmax": 0.04, "lr": 5.8234154218813086e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004754861204578217, "train_cur_epoch_loss": 0.6450776275214407, "train_cur_epoch_avg_loss": 0.00012968991305215938, "train_cur_epoch_time": 1269.7795078754425, "train_cur_epoch_avg_time": 0.25528337512574234, "epoch": 21, "step": 104454} ################################################## Training, Epoch: 0022, Batch: 000146, Sample Num: 2336, Cur Loss: 0.00006520, Cur Avg Loss: 0.00009988, Log Avg loss: 0.00015887, Global Avg Loss: 0.00474836, Time: 0.2184 Steps: 104600, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000346, Sample Num: 5536, Cur Loss: 0.00000127, Cur Avg Loss: 0.00008800, Log Avg loss: 0.00007932, Global Avg Loss: 0.00473945, Time: 0.2222 Steps: 104800, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000546, Sample Num: 8736, Cur Loss: 0.00000319, Cur Avg Loss: 0.00012651, Log Avg loss: 0.00019314, Global Avg Loss: 0.00473079, Time: 0.3459 Steps: 105000, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000746, Sample Num: 11936, Cur Loss: 0.00000271, Cur Avg Loss: 0.00013592, Log Avg loss: 0.00016159, Global Avg Loss: 0.00472211, Time: 0.3942 Steps: 105200, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 000946, Sample Num: 15136, Cur Loss: 0.00000068, Cur Avg Loss: 0.00013031, Log Avg loss: 0.00010942, Global Avg Loss: 0.00471335, Time: 0.4158 Steps: 105400, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001146, Sample Num: 18336, Cur Loss: 0.00000164, Cur Avg Loss: 0.00012814, Log Avg loss: 0.00011788, Global Avg Loss: 0.00470465, Time: 0.2864 Steps: 105600, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001346, Sample Num: 21536, Cur Loss: 0.00000070, Cur Avg Loss: 0.00011451, Log Avg loss: 0.00003641, Global Avg Loss: 0.00469583, Time: 0.2190 Steps: 105800, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001546, Sample Num: 24736, Cur Loss: 0.00000020, Cur Avg Loss: 0.00010812, Log Avg loss: 0.00006510, Global Avg Loss: 0.00468709, Time: 0.2671 Steps: 106000, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001746, Sample Num: 27936, Cur Loss: 0.00000149, Cur Avg Loss: 0.00010778, Log Avg loss: 0.00010514, Global Avg Loss: 0.00467846, Time: 0.2207 Steps: 106200, Updated lr: 0.000058 Training, Epoch: 0022, Batch: 001946, Sample Num: 31136, Cur Loss: 0.00000005, Cur Avg Loss: 0.00010064, Log Avg loss: 0.00003828, Global Avg Loss: 0.00466974, Time: 0.2173 Steps: 106400, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002146, Sample Num: 34336, Cur Loss: 0.00000458, Cur Avg Loss: 0.00010743, Log Avg loss: 0.00017356, Global Avg Loss: 0.00466130, Time: 0.4444 Steps: 106600, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002346, Sample Num: 37536, Cur Loss: 0.00000344, Cur Avg Loss: 0.00011154, Log Avg loss: 0.00015563, Global Avg Loss: 0.00465286, Time: 0.5329 Steps: 106800, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002546, Sample Num: 40736, Cur Loss: 0.00000068, Cur Avg Loss: 0.00010508, Log Avg loss: 0.00002929, Global Avg Loss: 0.00464422, Time: 0.4432 Steps: 107000, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002746, Sample Num: 43936, Cur Loss: 0.00000179, Cur Avg Loss: 0.00011568, Log Avg loss: 0.00025062, Global Avg Loss: 0.00463603, Time: 0.3739 Steps: 107200, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 002946, Sample Num: 47136, Cur Loss: 0.00030002, Cur Avg Loss: 0.00011781, Log Avg loss: 0.00014702, Global Avg Loss: 0.00462767, Time: 0.2267 Steps: 107400, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003146, Sample Num: 50336, Cur Loss: 0.00000124, Cur Avg Loss: 0.00012135, Log Avg loss: 0.00017361, Global Avg Loss: 0.00461939, Time: 0.2154 Steps: 107600, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003346, Sample Num: 53536, Cur Loss: 0.00005969, Cur Avg Loss: 0.00012210, Log Avg loss: 0.00013374, Global Avg Loss: 0.00461106, Time: 0.2467 Steps: 107800, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003546, Sample Num: 56736, Cur Loss: 0.00000827, Cur Avg Loss: 0.00012110, Log Avg loss: 0.00010453, Global Avg Loss: 0.00460272, Time: 0.2906 Steps: 108000, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003746, Sample Num: 59936, Cur Loss: 0.00000210, Cur Avg Loss: 0.00012051, Log Avg loss: 0.00010997, Global Avg Loss: 0.00459442, Time: 0.2185 Steps: 108200, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 003946, Sample Num: 63136, Cur Loss: 0.00001034, Cur Avg Loss: 0.00011762, Log Avg loss: 0.00006342, Global Avg Loss: 0.00458606, Time: 0.3280 Steps: 108400, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 004146, Sample Num: 66336, Cur Loss: 0.00000185, Cur Avg Loss: 0.00011670, Log Avg loss: 0.00009852, Global Avg Loss: 0.00457779, Time: 0.2195 Steps: 108600, Updated lr: 0.000057 Training, Epoch: 0022, Batch: 004346, Sample Num: 69536, Cur Loss: 0.00000041, Cur Avg Loss: 0.00011573, Log Avg loss: 0.00009562, Global Avg Loss: 0.00456955, Time: 0.2690 Steps: 108800, Updated lr: 0.000056 Training, Epoch: 0022, Batch: 004546, Sample Num: 72736, Cur Loss: 0.00012805, Cur Avg Loss: 0.00011713, Log Avg loss: 0.00014757, Global Avg Loss: 0.00456144, Time: 0.2152 Steps: 109000, Updated lr: 0.000056 Training, Epoch: 0022, Batch: 004746, Sample Num: 75936, Cur Loss: 0.00000754, Cur Avg Loss: 0.00011844, Log Avg loss: 0.00014833, Global Avg Loss: 0.00455336, Time: 0.2926 Steps: 109200, Updated lr: 0.000056 Training, Epoch: 0022, Batch: 004946, Sample Num: 79136, Cur Loss: 0.00000415, Cur Avg Loss: 0.00012320, Log Avg loss: 0.00023606, Global Avg Loss: 0.00454546, Time: 0.2125 Steps: 109400, Updated lr: 0.000056 ***** Running evaluation checkpoint-109428 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-109428 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1300.339403, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001812, "eval_total_loss": 1.931779, "eval_acc": 0.999716, "eval_jaccard": 0.988217, "eval_prec": 0.989344, "eval_recall": 0.989948, "eval_f1": 0.989234, "eval_pr_auc": 0.996033, "eval_roc_auc": 0.999456, "eval_fmax": 0.994481, "eval_pmax": 0.996704, "eval_rmax": 0.992269, "eval_tmax": 0.1, "update_flag": false, "test_avg_loss": 0.002164, "test_total_loss": 2.30676, "test_acc": 0.999708, "test_jaccard": 0.986842, "test_prec": 0.987702, "test_recall": 0.988746, "test_f1": 0.987829, "test_pr_auc": 0.994956, "test_roc_auc": 0.999183, "test_fmax": 0.993794, "test_pmax": 0.996679, "test_rmax": 0.990925, "test_tmax": 0.11, "lr": 5.622607993540574e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0045443779774159915, "train_cur_epoch_loss": 0.6179210496638383, "train_cur_epoch_avg_loss": 0.0001242302070092156, "train_cur_epoch_time": 1300.3394029140472, "train_cur_epoch_avg_time": 0.2614273025561012, "epoch": 22, "step": 109428} ################################################## Training, Epoch: 0023, Batch: 000172, Sample Num: 2752, Cur Loss: 0.00000491, Cur Avg Loss: 0.00012150, Log Avg loss: 0.00014742, Global Avg Loss: 0.00453744, Time: 0.2467 Steps: 109600, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000372, Sample Num: 5952, Cur Loss: 0.00001267, Cur Avg Loss: 0.00011613, Log Avg loss: 0.00011151, Global Avg Loss: 0.00452938, Time: 0.2188 Steps: 109800, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000572, Sample Num: 9152, Cur Loss: 0.00054393, Cur Avg Loss: 0.00012557, Log Avg loss: 0.00014314, Global Avg Loss: 0.00452140, Time: 0.2178 Steps: 110000, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000772, Sample Num: 12352, Cur Loss: 0.00000191, Cur Avg Loss: 0.00013945, Log Avg loss: 0.00017913, Global Avg Loss: 0.00451352, Time: 0.4778 Steps: 110200, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 000972, Sample Num: 15552, Cur Loss: 0.00000222, Cur Avg Loss: 0.00012649, Log Avg loss: 0.00007646, Global Avg Loss: 0.00450548, Time: 0.1082 Steps: 110400, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001172, Sample Num: 18752, Cur Loss: 0.00000787, Cur Avg Loss: 0.00012527, Log Avg loss: 0.00011934, Global Avg Loss: 0.00449755, Time: 0.2229 Steps: 110600, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001372, Sample Num: 21952, Cur Loss: 0.00000339, Cur Avg Loss: 0.00011342, Log Avg loss: 0.00004398, Global Avg Loss: 0.00448951, Time: 0.2789 Steps: 110800, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001572, Sample Num: 25152, Cur Loss: 0.00000118, Cur Avg Loss: 0.00010865, Log Avg loss: 0.00007596, Global Avg Loss: 0.00448156, Time: 0.2374 Steps: 111000, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001772, Sample Num: 28352, Cur Loss: 0.00000122, Cur Avg Loss: 0.00010562, Log Avg loss: 0.00008175, Global Avg Loss: 0.00447365, Time: 0.2209 Steps: 111200, Updated lr: 0.000056 Training, Epoch: 0023, Batch: 001972, Sample Num: 31552, Cur Loss: 0.00000261, Cur Avg Loss: 0.00009977, Log Avg loss: 0.00004797, Global Avg Loss: 0.00446570, Time: 0.2209 Steps: 111400, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002172, Sample Num: 34752, Cur Loss: 0.00000401, Cur Avg Loss: 0.00010600, Log Avg loss: 0.00016742, Global Avg Loss: 0.00445800, Time: 0.4268 Steps: 111600, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002372, Sample Num: 37952, Cur Loss: 0.00000913, Cur Avg Loss: 0.00011976, Log Avg loss: 0.00026916, Global Avg Loss: 0.00445050, Time: 0.1131 Steps: 111800, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002572, Sample Num: 41152, Cur Loss: 0.00000209, Cur Avg Loss: 0.00011185, Log Avg loss: 0.00001811, Global Avg Loss: 0.00444259, Time: 0.2181 Steps: 112000, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002772, Sample Num: 44352, Cur Loss: 0.00003759, Cur Avg Loss: 0.00012025, Log Avg loss: 0.00022818, Global Avg Loss: 0.00443508, Time: 0.2208 Steps: 112200, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 002972, Sample Num: 47552, Cur Loss: 0.00000059, Cur Avg Loss: 0.00012115, Log Avg loss: 0.00013372, Global Avg Loss: 0.00442742, Time: 0.2494 Steps: 112400, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003172, Sample Num: 50752, Cur Loss: 0.00001730, Cur Avg Loss: 0.00012380, Log Avg loss: 0.00016316, Global Avg Loss: 0.00441985, Time: 0.2453 Steps: 112600, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003372, Sample Num: 53952, Cur Loss: 0.00000036, Cur Avg Loss: 0.00012221, Log Avg loss: 0.00009690, Global Avg Loss: 0.00441218, Time: 0.4650 Steps: 112800, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003572, Sample Num: 57152, Cur Loss: 0.00002818, Cur Avg Loss: 0.00012087, Log Avg loss: 0.00009833, Global Avg Loss: 0.00440455, Time: 0.2102 Steps: 113000, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003772, Sample Num: 60352, Cur Loss: 0.00000269, Cur Avg Loss: 0.00011928, Log Avg loss: 0.00009100, Global Avg Loss: 0.00439693, Time: 0.2168 Steps: 113200, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 003972, Sample Num: 63552, Cur Loss: 0.00000017, Cur Avg Loss: 0.00011731, Log Avg loss: 0.00008005, Global Avg Loss: 0.00438931, Time: 0.2182 Steps: 113400, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 004172, Sample Num: 66752, Cur Loss: 0.00000407, Cur Avg Loss: 0.00011351, Log Avg loss: 0.00003813, Global Avg Loss: 0.00438165, Time: 0.2777 Steps: 113600, Updated lr: 0.000055 Training, Epoch: 0023, Batch: 004372, Sample Num: 69952, Cur Loss: 0.00001735, Cur Avg Loss: 0.00011276, Log Avg loss: 0.00009707, Global Avg Loss: 0.00437412, Time: 0.3088 Steps: 113800, Updated lr: 0.000054 Training, Epoch: 0023, Batch: 004572, Sample Num: 73152, Cur Loss: 0.00004384, Cur Avg Loss: 0.00011353, Log Avg loss: 0.00013039, Global Avg Loss: 0.00436668, Time: 0.3057 Steps: 114000, Updated lr: 0.000054 Training, Epoch: 0023, Batch: 004772, Sample Num: 76352, Cur Loss: 0.00000325, Cur Avg Loss: 0.00011660, Log Avg loss: 0.00018661, Global Avg Loss: 0.00435936, Time: 0.2218 Steps: 114200, Updated lr: 0.000054 Training, Epoch: 0023, Batch: 004972, Sample Num: 79552, Cur Loss: 0.00000657, Cur Avg Loss: 0.00012102, Log Avg loss: 0.00022667, Global Avg Loss: 0.00435213, Time: 0.2181 Steps: 114400, Updated lr: 0.000054 ***** Running evaluation checkpoint-114402 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-114402 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1352.442308, Avg time per batch (s): 0.270000 {"eval_avg_loss": 0.001824, "eval_total_loss": 1.944773, "eval_acc": 0.999718, "eval_jaccard": 0.988244, "eval_prec": 0.989398, "eval_recall": 0.989902, "eval_f1": 0.989237, "eval_pr_auc": 0.995796, "eval_roc_auc": 0.9994, "eval_fmax": 0.994731, "eval_pmax": 0.996524, "eval_rmax": 0.992944, "eval_tmax": 0.06, "update_flag": false, "test_avg_loss": 0.002129, "test_total_loss": 2.269056, "test_acc": 0.999719, "test_jaccard": 0.987341, "test_prec": 0.98822, "test_recall": 0.98902, "test_f1": 0.988243, "test_pr_auc": 0.994844, "test_roc_auc": 0.999142, "test_fmax": 0.994129, "test_pmax": 0.997195, "test_rmax": 0.991082, "test_tmax": 0.15, "lr": 5.421800565199838e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004352056118424228, "train_cur_epoch_loss": 0.6017307472917346, "train_cur_epoch_avg_loss": 0.0001209752206054955, "train_cur_epoch_time": 1352.4423084259033, "train_cur_epoch_avg_time": 0.27190235392559375, "epoch": 23, "step": 114402} ################################################## Training, Epoch: 0024, Batch: 000198, Sample Num: 3168, Cur Loss: 0.00000229, Cur Avg Loss: 0.00010487, Log Avg loss: 0.00010383, Global Avg Loss: 0.00434472, Time: 0.2728 Steps: 114600, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000398, Sample Num: 6368, Cur Loss: 0.00002662, Cur Avg Loss: 0.00010866, Log Avg loss: 0.00011242, Global Avg Loss: 0.00433734, Time: 0.2183 Steps: 114800, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000598, Sample Num: 9568, Cur Loss: 0.00000208, Cur Avg Loss: 0.00011603, Log Avg loss: 0.00013069, Global Avg Loss: 0.00433003, Time: 0.2869 Steps: 115000, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000798, Sample Num: 12768, Cur Loss: 0.00000061, Cur Avg Loss: 0.00012074, Log Avg loss: 0.00013481, Global Avg Loss: 0.00432275, Time: 0.0764 Steps: 115200, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 000998, Sample Num: 15968, Cur Loss: 0.00001076, Cur Avg Loss: 0.00010925, Log Avg loss: 0.00006340, Global Avg Loss: 0.00431536, Time: 0.3792 Steps: 115400, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001198, Sample Num: 19168, Cur Loss: 0.00000159, Cur Avg Loss: 0.00011182, Log Avg loss: 0.00012470, Global Avg Loss: 0.00430811, Time: 0.2839 Steps: 115600, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001398, Sample Num: 22368, Cur Loss: 0.00000130, Cur Avg Loss: 0.00009951, Log Avg loss: 0.00002571, Global Avg Loss: 0.00430072, Time: 0.3263 Steps: 115800, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001598, Sample Num: 25568, Cur Loss: 0.00000143, Cur Avg Loss: 0.00009309, Log Avg loss: 0.00004825, Global Avg Loss: 0.00429339, Time: 0.2199 Steps: 116000, Updated lr: 0.000054 Training, Epoch: 0024, Batch: 001798, Sample Num: 28768, Cur Loss: 0.00001331, Cur Avg Loss: 0.00009279, Log Avg loss: 0.00009039, Global Avg Loss: 0.00428615, Time: 0.2197 Steps: 116200, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 001998, Sample Num: 31968, Cur Loss: 0.00000030, Cur Avg Loss: 0.00008725, Log Avg loss: 0.00003749, Global Avg Loss: 0.00427885, Time: 0.2959 Steps: 116400, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002198, Sample Num: 35168, Cur Loss: 0.00000021, Cur Avg Loss: 0.00010236, Log Avg loss: 0.00025323, Global Avg Loss: 0.00427195, Time: 0.2171 Steps: 116600, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002398, Sample Num: 38368, Cur Loss: 0.00000563, Cur Avg Loss: 0.00009990, Log Avg loss: 0.00007286, Global Avg Loss: 0.00426476, Time: 0.0881 Steps: 116800, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002598, Sample Num: 41568, Cur Loss: 0.00000824, Cur Avg Loss: 0.00009835, Log Avg loss: 0.00007976, Global Avg Loss: 0.00425760, Time: 0.3082 Steps: 117000, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002798, Sample Num: 44768, Cur Loss: 0.00043022, Cur Avg Loss: 0.00010721, Log Avg loss: 0.00022238, Global Avg Loss: 0.00425072, Time: 0.1612 Steps: 117200, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 002998, Sample Num: 47968, Cur Loss: 0.00000498, Cur Avg Loss: 0.00010914, Log Avg loss: 0.00013618, Global Avg Loss: 0.00424371, Time: 0.5270 Steps: 117400, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003198, Sample Num: 51168, Cur Loss: 0.00000384, Cur Avg Loss: 0.00010790, Log Avg loss: 0.00008930, Global Avg Loss: 0.00423664, Time: 0.1302 Steps: 117600, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003398, Sample Num: 54368, Cur Loss: 0.00000047, Cur Avg Loss: 0.00010771, Log Avg loss: 0.00010466, Global Avg Loss: 0.00422963, Time: 0.2173 Steps: 117800, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003598, Sample Num: 57568, Cur Loss: 0.00000092, Cur Avg Loss: 0.00010724, Log Avg loss: 0.00009925, Global Avg Loss: 0.00422263, Time: 0.4550 Steps: 118000, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003798, Sample Num: 60768, Cur Loss: 0.00000835, Cur Avg Loss: 0.00010599, Log Avg loss: 0.00008353, Global Avg Loss: 0.00421562, Time: 0.2177 Steps: 118200, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 003998, Sample Num: 63968, Cur Loss: 0.00000073, Cur Avg Loss: 0.00010407, Log Avg loss: 0.00006746, Global Avg Loss: 0.00420861, Time: 0.2183 Steps: 118400, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 004198, Sample Num: 67168, Cur Loss: 0.00000412, Cur Avg Loss: 0.00010229, Log Avg loss: 0.00006684, Global Avg Loss: 0.00420163, Time: 0.2137 Steps: 118600, Updated lr: 0.000053 Training, Epoch: 0024, Batch: 004398, Sample Num: 70368, Cur Loss: 0.00000099, Cur Avg Loss: 0.00010138, Log Avg loss: 0.00008224, Global Avg Loss: 0.00419470, Time: 0.2193 Steps: 118800, Updated lr: 0.000052 Training, Epoch: 0024, Batch: 004598, Sample Num: 73568, Cur Loss: 0.00000056, Cur Avg Loss: 0.00010333, Log Avg loss: 0.00014617, Global Avg Loss: 0.00418789, Time: 0.3936 Steps: 119000, Updated lr: 0.000052 Training, Epoch: 0024, Batch: 004798, Sample Num: 76768, Cur Loss: 0.00071145, Cur Avg Loss: 0.00010626, Log Avg loss: 0.00017372, Global Avg Loss: 0.00418116, Time: 0.5153 Steps: 119200, Updated lr: 0.000052 ***** Running evaluation checkpoint-119376 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-119376 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1265.125220, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001871, "eval_total_loss": 1.994096, "eval_acc": 0.999729, "eval_jaccard": 0.988538, "eval_prec": 0.989545, "eval_recall": 0.990185, "eval_f1": 0.989466, "eval_pr_auc": 0.995927, "eval_roc_auc": 0.999426, "eval_fmax": 0.994562, "eval_pmax": 0.997103, "eval_rmax": 0.992035, "eval_tmax": 0.1, "update_flag": true, "test_avg_loss": 0.002244, "test_total_loss": 2.392392, "test_acc": 0.999719, "test_jaccard": 0.987087, "test_prec": 0.987858, "test_recall": 0.988775, "test_f1": 0.987949, "test_pr_auc": 0.994928, "test_roc_auc": 0.999143, "test_fmax": 0.994057, "test_pmax": 0.997263, "test_rmax": 0.990872, "test_tmax": 0.13, "lr": 5.220993136859104e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004175243035820417, "train_cur_epoch_loss": 0.5398885841296057, "train_cur_epoch_avg_loss": 0.00010854213593277155, "train_cur_epoch_time": 1265.1252200603485, "train_cur_epoch_avg_time": 0.2543476518014372, "epoch": 24, "step": 119376} ################################################## Training, Epoch: 0025, Batch: 000024, Sample Num: 384, Cur Loss: 0.00000295, Cur Avg Loss: 0.00008037, Log Avg loss: 0.00015985, Global Avg Loss: 0.00417442, Time: 0.2179 Steps: 119400, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000224, Sample Num: 3584, Cur Loss: 0.00000323, Cur Avg Loss: 0.00009564, Log Avg loss: 0.00009747, Global Avg Loss: 0.00416760, Time: 0.2475 Steps: 119600, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000424, Sample Num: 6784, Cur Loss: 0.00000717, Cur Avg Loss: 0.00008850, Log Avg loss: 0.00008050, Global Avg Loss: 0.00416078, Time: 0.2336 Steps: 119800, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000624, Sample Num: 9984, Cur Loss: 0.00000469, Cur Avg Loss: 0.00010431, Log Avg loss: 0.00013783, Global Avg Loss: 0.00415407, Time: 0.3362 Steps: 120000, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 000824, Sample Num: 13184, Cur Loss: 0.00001210, Cur Avg Loss: 0.00011946, Log Avg loss: 0.00016673, Global Avg Loss: 0.00414744, Time: 0.3470 Steps: 120200, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001024, Sample Num: 16384, Cur Loss: 0.00000042, Cur Avg Loss: 0.00010630, Log Avg loss: 0.00005206, Global Avg Loss: 0.00414064, Time: 0.2511 Steps: 120400, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001224, Sample Num: 19584, Cur Loss: 0.00000055, Cur Avg Loss: 0.00010851, Log Avg loss: 0.00011984, Global Avg Loss: 0.00413397, Time: 0.2162 Steps: 120600, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001424, Sample Num: 22784, Cur Loss: 0.00000114, Cur Avg Loss: 0.00009600, Log Avg loss: 0.00001943, Global Avg Loss: 0.00412716, Time: 0.2170 Steps: 120800, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001624, Sample Num: 25984, Cur Loss: 0.00000004, Cur Avg Loss: 0.00009864, Log Avg loss: 0.00011742, Global Avg Loss: 0.00412053, Time: 0.2173 Steps: 121000, Updated lr: 0.000052 Training, Epoch: 0025, Batch: 001824, Sample Num: 29184, Cur Loss: 0.00000039, Cur Avg Loss: 0.00009498, Log Avg loss: 0.00006527, Global Avg Loss: 0.00411384, Time: 0.3889 Steps: 121200, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002024, Sample Num: 32384, Cur Loss: 0.00000439, Cur Avg Loss: 0.00009329, Log Avg loss: 0.00007789, Global Avg Loss: 0.00410719, Time: 0.3023 Steps: 121400, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002224, Sample Num: 35584, Cur Loss: 0.00000158, Cur Avg Loss: 0.00010109, Log Avg loss: 0.00018007, Global Avg Loss: 0.00410073, Time: 0.3675 Steps: 121600, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002424, Sample Num: 38784, Cur Loss: 0.00000521, Cur Avg Loss: 0.00010179, Log Avg loss: 0.00010950, Global Avg Loss: 0.00409418, Time: 0.2177 Steps: 121800, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002624, Sample Num: 41984, Cur Loss: 0.00000202, Cur Avg Loss: 0.00009980, Log Avg loss: 0.00007569, Global Avg Loss: 0.00408759, Time: 0.2556 Steps: 122000, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 002824, Sample Num: 45184, Cur Loss: 0.00000067, Cur Avg Loss: 0.00010456, Log Avg loss: 0.00016705, Global Avg Loss: 0.00408117, Time: 0.2194 Steps: 122200, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003024, Sample Num: 48384, Cur Loss: 0.00002046, Cur Avg Loss: 0.00010967, Log Avg loss: 0.00018187, Global Avg Loss: 0.00407480, Time: 0.1444 Steps: 122400, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003224, Sample Num: 51584, Cur Loss: 0.00000023, Cur Avg Loss: 0.00010586, Log Avg loss: 0.00004819, Global Avg Loss: 0.00406823, Time: 0.2180 Steps: 122600, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003424, Sample Num: 54784, Cur Loss: 0.00000061, Cur Avg Loss: 0.00010546, Log Avg loss: 0.00009904, Global Avg Loss: 0.00406177, Time: 0.3391 Steps: 122800, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003624, Sample Num: 57984, Cur Loss: 0.00000205, Cur Avg Loss: 0.00010455, Log Avg loss: 0.00008886, Global Avg Loss: 0.00405531, Time: 0.2177 Steps: 123000, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 003824, Sample Num: 61184, Cur Loss: 0.00000031, Cur Avg Loss: 0.00010356, Log Avg loss: 0.00008573, Global Avg Loss: 0.00404886, Time: 0.2170 Steps: 123200, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 004024, Sample Num: 64384, Cur Loss: 0.00000011, Cur Avg Loss: 0.00010164, Log Avg loss: 0.00006496, Global Avg Loss: 0.00404241, Time: 0.2178 Steps: 123400, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 004224, Sample Num: 67584, Cur Loss: 0.00000032, Cur Avg Loss: 0.00010081, Log Avg loss: 0.00008398, Global Avg Loss: 0.00403600, Time: 0.2368 Steps: 123600, Updated lr: 0.000051 Training, Epoch: 0025, Batch: 004424, Sample Num: 70784, Cur Loss: 0.00000022, Cur Avg Loss: 0.00009921, Log Avg loss: 0.00006554, Global Avg Loss: 0.00402959, Time: 0.2240 Steps: 123800, Updated lr: 0.000050 Training, Epoch: 0025, Batch: 004624, Sample Num: 73984, Cur Loss: 0.00000142, Cur Avg Loss: 0.00010075, Log Avg loss: 0.00013484, Global Avg Loss: 0.00402330, Time: 0.2165 Steps: 124000, Updated lr: 0.000050 Training, Epoch: 0025, Batch: 004824, Sample Num: 77184, Cur Loss: 0.00009555, Cur Avg Loss: 0.00010340, Log Avg loss: 0.00016466, Global Avg Loss: 0.00401709, Time: 0.1683 Steps: 124200, Updated lr: 0.000050 ***** Running evaluation checkpoint-124350 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-124350 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1238.960356, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001835, "eval_total_loss": 1.956068, "eval_acc": 0.999725, "eval_jaccard": 0.988483, "eval_prec": 0.989618, "eval_recall": 0.989998, "eval_f1": 0.989413, "eval_pr_auc": 0.995804, "eval_roc_auc": 0.999389, "eval_fmax": 0.994625, "eval_pmax": 0.997043, "eval_rmax": 0.992219, "eval_tmax": 0.08, "update_flag": false, "test_avg_loss": 0.002207, "test_total_loss": 2.352868, "test_acc": 0.999721, "test_jaccard": 0.987489, "test_prec": 0.988416, "test_recall": 0.989205, "test_f1": 0.988432, "test_pr_auc": 0.994849, "test_roc_auc": 0.999112, "test_fmax": 0.994108, "test_pmax": 0.996417, "test_rmax": 0.99181, "test_tmax": 0.04, "lr": 5.020185708518369e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.004012488418712855, "train_cur_epoch_loss": 0.529122222845015, "train_cur_epoch_avg_loss": 0.00010637760813128569, "train_cur_epoch_time": 1238.9603564739227, "train_cur_epoch_avg_time": 0.24908732538679587, "epoch": 25, "step": 124350} ################################################## Training, Epoch: 0026, Batch: 000050, Sample Num: 800, Cur Loss: 0.00000039, Cur Avg Loss: 0.00001641, Log Avg loss: 0.00015563, Global Avg Loss: 0.00401088, Time: 0.2141 Steps: 124400, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000250, Sample Num: 4000, Cur Loss: 0.00000518, Cur Avg Loss: 0.00007571, Log Avg loss: 0.00009053, Global Avg Loss: 0.00400459, Time: 0.2154 Steps: 124600, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000450, Sample Num: 7200, Cur Loss: 0.00000170, Cur Avg Loss: 0.00009172, Log Avg loss: 0.00011173, Global Avg Loss: 0.00399835, Time: 0.2283 Steps: 124800, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000650, Sample Num: 10400, Cur Loss: 0.00001229, Cur Avg Loss: 0.00009868, Log Avg loss: 0.00011434, Global Avg Loss: 0.00399214, Time: 0.0835 Steps: 125000, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 000850, Sample Num: 13600, Cur Loss: 0.00000192, Cur Avg Loss: 0.00010740, Log Avg loss: 0.00013575, Global Avg Loss: 0.00398598, Time: 0.3085 Steps: 125200, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001050, Sample Num: 16800, Cur Loss: 0.00000002, Cur Avg Loss: 0.00009354, Log Avg loss: 0.00003463, Global Avg Loss: 0.00397967, Time: 0.2169 Steps: 125400, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001250, Sample Num: 20000, Cur Loss: 0.00000182, Cur Avg Loss: 0.00009784, Log Avg loss: 0.00012039, Global Avg Loss: 0.00397353, Time: 0.2188 Steps: 125600, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001450, Sample Num: 23200, Cur Loss: 0.00000525, Cur Avg Loss: 0.00009222, Log Avg loss: 0.00005713, Global Avg Loss: 0.00396730, Time: 0.2186 Steps: 125800, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001650, Sample Num: 26400, Cur Loss: 0.00000313, Cur Avg Loss: 0.00008918, Log Avg loss: 0.00006713, Global Avg Loss: 0.00396111, Time: 0.2235 Steps: 126000, Updated lr: 0.000050 Training, Epoch: 0026, Batch: 001850, Sample Num: 29600, Cur Loss: 0.00031064, Cur Avg Loss: 0.00008384, Log Avg loss: 0.00003982, Global Avg Loss: 0.00395490, Time: 0.3554 Steps: 126200, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002050, Sample Num: 32800, Cur Loss: 0.00000001, Cur Avg Loss: 0.00008360, Log Avg loss: 0.00008130, Global Avg Loss: 0.00394877, Time: 0.0880 Steps: 126400, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002250, Sample Num: 36000, Cur Loss: 0.00000387, Cur Avg Loss: 0.00008964, Log Avg loss: 0.00015155, Global Avg Loss: 0.00394277, Time: 0.4282 Steps: 126600, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002450, Sample Num: 39200, Cur Loss: 0.00000009, Cur Avg Loss: 0.00008891, Log Avg loss: 0.00008075, Global Avg Loss: 0.00393668, Time: 0.2167 Steps: 126800, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002650, Sample Num: 42400, Cur Loss: 0.00096776, Cur Avg Loss: 0.00009020, Log Avg loss: 0.00010593, Global Avg Loss: 0.00393065, Time: 0.4065 Steps: 127000, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 002850, Sample Num: 45600, Cur Loss: 0.00000019, Cur Avg Loss: 0.00009518, Log Avg loss: 0.00016117, Global Avg Loss: 0.00392472, Time: 0.1086 Steps: 127200, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003050, Sample Num: 48800, Cur Loss: 0.00000016, Cur Avg Loss: 0.00010112, Log Avg loss: 0.00018588, Global Avg Loss: 0.00391885, Time: 0.1290 Steps: 127400, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003250, Sample Num: 52000, Cur Loss: 0.00000927, Cur Avg Loss: 0.00009816, Log Avg loss: 0.00005303, Global Avg Loss: 0.00391279, Time: 0.3391 Steps: 127600, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003450, Sample Num: 55200, Cur Loss: 0.00000089, Cur Avg Loss: 0.00009775, Log Avg loss: 0.00009103, Global Avg Loss: 0.00390681, Time: 0.2172 Steps: 127800, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003650, Sample Num: 58400, Cur Loss: 0.00000326, Cur Avg Loss: 0.00010169, Log Avg loss: 0.00016966, Global Avg Loss: 0.00390097, Time: 0.2188 Steps: 128000, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 003850, Sample Num: 61600, Cur Loss: 0.00000813, Cur Avg Loss: 0.00009779, Log Avg loss: 0.00002656, Global Avg Loss: 0.00389493, Time: 0.2145 Steps: 128200, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 004050, Sample Num: 64800, Cur Loss: 0.00001458, Cur Avg Loss: 0.00009550, Log Avg loss: 0.00005155, Global Avg Loss: 0.00388894, Time: 0.2158 Steps: 128400, Updated lr: 0.000049 Training, Epoch: 0026, Batch: 004250, Sample Num: 68000, Cur Loss: 0.00000005, Cur Avg Loss: 0.00009521, Log Avg loss: 0.00008914, Global Avg Loss: 0.00388303, Time: 0.1751 Steps: 128600, Updated lr: 0.000048 Training, Epoch: 0026, Batch: 004450, Sample Num: 71200, Cur Loss: 0.00435027, Cur Avg Loss: 0.00009376, Log Avg loss: 0.00006312, Global Avg Loss: 0.00387710, Time: 0.2168 Steps: 128800, Updated lr: 0.000048 Training, Epoch: 0026, Batch: 004650, Sample Num: 74400, Cur Loss: 0.00000532, Cur Avg Loss: 0.00009496, Log Avg loss: 0.00012160, Global Avg Loss: 0.00387128, Time: 0.2629 Steps: 129000, Updated lr: 0.000048 Training, Epoch: 0026, Batch: 004850, Sample Num: 77600, Cur Loss: 0.00000506, Cur Avg Loss: 0.00009963, Log Avg loss: 0.00020808, Global Avg Loss: 0.00386560, Time: 0.2154 Steps: 129200, Updated lr: 0.000048 ***** Running evaluation checkpoint-129324 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-129324 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1242.958604, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001934, "eval_total_loss": 2.06135, "eval_acc": 0.999729, "eval_jaccard": 0.988736, "eval_prec": 0.989859, "eval_recall": 0.990304, "eval_f1": 0.989686, "eval_pr_auc": 0.996025, "eval_roc_auc": 0.99946, "eval_fmax": 0.99466, "eval_pmax": 0.997621, "eval_rmax": 0.991716, "eval_tmax": 0.15, "update_flag": true, "test_avg_loss": 0.002231, "test_total_loss": 2.378025, "test_acc": 0.99971, "test_jaccard": 0.986969, "test_prec": 0.987799, "test_recall": 0.988717, "test_f1": 0.987871, "test_pr_auc": 0.994915, "test_roc_auc": 0.999166, "test_fmax": 0.994258, "test_pmax": 0.996591, "test_rmax": 0.991937, "test_tmax": 0.05, "lr": 4.8193782801776346e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.003862094952423086, "train_cur_epoch_loss": 0.5086327602185257, "train_cur_epoch_avg_loss": 0.00010225829517863403, "train_cur_epoch_time": 1242.958604335785, "train_cur_epoch_avg_time": 0.24989115487249394, "epoch": 26, "step": 129324} ################################################## Training, Epoch: 0027, Batch: 000076, Sample Num: 1216, Cur Loss: 0.00000282, Cur Avg Loss: 0.00014674, Log Avg loss: 0.00018301, Global Avg Loss: 0.00385991, Time: 0.2164 Steps: 129400, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000276, Sample Num: 4416, Cur Loss: 0.00000407, Cur Avg Loss: 0.00010567, Log Avg loss: 0.00009007, Global Avg Loss: 0.00385410, Time: 0.2188 Steps: 129600, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000476, Sample Num: 7616, Cur Loss: 0.00000867, Cur Avg Loss: 0.00011361, Log Avg loss: 0.00012456, Global Avg Loss: 0.00384835, Time: 0.2154 Steps: 129800, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000676, Sample Num: 10816, Cur Loss: 0.00000112, Cur Avg Loss: 0.00010418, Log Avg loss: 0.00008173, Global Avg Loss: 0.00384255, Time: 0.1885 Steps: 130000, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 000876, Sample Num: 14016, Cur Loss: 0.00000089, Cur Avg Loss: 0.00011262, Log Avg loss: 0.00014117, Global Avg Loss: 0.00383687, Time: 0.2169 Steps: 130200, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001076, Sample Num: 17216, Cur Loss: 0.00000280, Cur Avg Loss: 0.00009907, Log Avg loss: 0.00003968, Global Avg Loss: 0.00383104, Time: 0.2130 Steps: 130400, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001276, Sample Num: 20416, Cur Loss: 0.00000035, Cur Avg Loss: 0.00009634, Log Avg loss: 0.00008165, Global Avg Loss: 0.00382530, Time: 0.1297 Steps: 130600, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001476, Sample Num: 23616, Cur Loss: 0.00000855, Cur Avg Loss: 0.00008951, Log Avg loss: 0.00004594, Global Avg Loss: 0.00381952, Time: 0.2130 Steps: 130800, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001676, Sample Num: 26816, Cur Loss: 0.00000010, Cur Avg Loss: 0.00008263, Log Avg loss: 0.00003188, Global Avg Loss: 0.00381374, Time: 0.4569 Steps: 131000, Updated lr: 0.000048 Training, Epoch: 0027, Batch: 001876, Sample Num: 30016, Cur Loss: 0.00000169, Cur Avg Loss: 0.00007832, Log Avg loss: 0.00004218, Global Avg Loss: 0.00380799, Time: 0.4269 Steps: 131200, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002076, Sample Num: 33216, Cur Loss: 0.00000559, Cur Avg Loss: 0.00008260, Log Avg loss: 0.00012277, Global Avg Loss: 0.00380238, Time: 0.2174 Steps: 131400, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002276, Sample Num: 36416, Cur Loss: 0.00001530, Cur Avg Loss: 0.00008227, Log Avg loss: 0.00007887, Global Avg Loss: 0.00379672, Time: 0.2164 Steps: 131600, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002476, Sample Num: 39616, Cur Loss: 0.00000274, Cur Avg Loss: 0.00008269, Log Avg loss: 0.00008739, Global Avg Loss: 0.00379109, Time: 0.3356 Steps: 131800, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002676, Sample Num: 42816, Cur Loss: 0.01225937, Cur Avg Loss: 0.00008674, Log Avg loss: 0.00013696, Global Avg Loss: 0.00378556, Time: 0.4071 Steps: 132000, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 002876, Sample Num: 46016, Cur Loss: 0.00001551, Cur Avg Loss: 0.00008980, Log Avg loss: 0.00013071, Global Avg Loss: 0.00378003, Time: 0.2176 Steps: 132200, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003076, Sample Num: 49216, Cur Loss: 0.00000571, Cur Avg Loss: 0.00009352, Log Avg loss: 0.00014704, Global Avg Loss: 0.00377454, Time: 0.3909 Steps: 132400, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003276, Sample Num: 52416, Cur Loss: 0.00000412, Cur Avg Loss: 0.00009136, Log Avg loss: 0.00005816, Global Avg Loss: 0.00376894, Time: 0.2166 Steps: 132600, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003476, Sample Num: 55616, Cur Loss: 0.00001773, Cur Avg Loss: 0.00009145, Log Avg loss: 0.00009290, Global Avg Loss: 0.00376340, Time: 0.3054 Steps: 132800, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003676, Sample Num: 58816, Cur Loss: 0.00002431, Cur Avg Loss: 0.00009268, Log Avg loss: 0.00011408, Global Avg Loss: 0.00375791, Time: 0.6252 Steps: 133000, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 003876, Sample Num: 62016, Cur Loss: 0.00000171, Cur Avg Loss: 0.00008842, Log Avg loss: 0.00001017, Global Avg Loss: 0.00375228, Time: 0.2192 Steps: 133200, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 004076, Sample Num: 65216, Cur Loss: 0.00001742, Cur Avg Loss: 0.00008888, Log Avg loss: 0.00009761, Global Avg Loss: 0.00374681, Time: 0.2153 Steps: 133400, Updated lr: 0.000047 Training, Epoch: 0027, Batch: 004276, Sample Num: 68416, Cur Loss: 0.00000198, Cur Avg Loss: 0.00008902, Log Avg loss: 0.00009197, Global Avg Loss: 0.00374133, Time: 0.2254 Steps: 133600, Updated lr: 0.000046 Training, Epoch: 0027, Batch: 004476, Sample Num: 71616, Cur Loss: 0.00000242, Cur Avg Loss: 0.00009073, Log Avg loss: 0.00012724, Global Avg Loss: 0.00373593, Time: 0.3212 Steps: 133800, Updated lr: 0.000046 Training, Epoch: 0027, Batch: 004676, Sample Num: 74816, Cur Loss: 0.00000141, Cur Avg Loss: 0.00008942, Log Avg loss: 0.00006016, Global Avg Loss: 0.00373045, Time: 0.2188 Steps: 134000, Updated lr: 0.000046 Training, Epoch: 0027, Batch: 004876, Sample Num: 78016, Cur Loss: 0.00000224, Cur Avg Loss: 0.00009413, Log Avg loss: 0.00020422, Global Avg Loss: 0.00372519, Time: 0.2164 Steps: 134200, Updated lr: 0.000046 ***** Running evaluation checkpoint-134298 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-134298 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1196.745802, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001883, "eval_total_loss": 2.006861, "eval_acc": 0.999725, "eval_jaccard": 0.988494, "eval_prec": 0.98952, "eval_recall": 0.990166, "eval_f1": 0.989433, "eval_pr_auc": 0.995882, "eval_roc_auc": 0.999448, "eval_fmax": 0.994691, "eval_pmax": 0.997843, "eval_rmax": 0.991559, "eval_tmax": 0.21, "update_flag": false, "test_avg_loss": 0.002239, "test_total_loss": 2.386532, "test_acc": 0.999717, "test_jaccard": 0.987408, "test_prec": 0.988205, "test_recall": 0.989264, "test_f1": 0.98833, "test_pr_auc": 0.994919, "test_roc_auc": 0.999159, "test_fmax": 0.994261, "test_pmax": 0.996379, "test_rmax": 0.992152, "test_tmax": 0.04, "lr": 4.6185708518368994e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0037226084744815405, "train_cur_epoch_loss": 0.47730527875928, "train_cur_epoch_avg_loss": 9.596004800146361e-05, "train_cur_epoch_time": 1196.7458016872406, "train_cur_epoch_avg_time": 0.2406002818028228, "epoch": 27, "step": 134298} ################################################## Training, Epoch: 0028, Batch: 000102, Sample Num: 1632, Cur Loss: 0.00001257, Cur Avg Loss: 0.00006316, Log Avg loss: 0.00012386, Global Avg Loss: 0.00371983, Time: 0.2159 Steps: 134400, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000302, Sample Num: 4832, Cur Loss: 0.00000153, Cur Avg Loss: 0.00005693, Log Avg loss: 0.00005376, Global Avg Loss: 0.00371438, Time: 0.1985 Steps: 134600, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000502, Sample Num: 8032, Cur Loss: 0.00000085, Cur Avg Loss: 0.00010181, Log Avg loss: 0.00016957, Global Avg Loss: 0.00370912, Time: 0.0856 Steps: 134800, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000702, Sample Num: 11232, Cur Loss: 0.00001182, Cur Avg Loss: 0.00009598, Log Avg loss: 0.00008135, Global Avg Loss: 0.00370375, Time: 0.2192 Steps: 135000, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 000902, Sample Num: 14432, Cur Loss: 0.00003633, Cur Avg Loss: 0.00010899, Log Avg loss: 0.00015467, Global Avg Loss: 0.00369850, Time: 0.2180 Steps: 135200, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001102, Sample Num: 17632, Cur Loss: 0.00000022, Cur Avg Loss: 0.00010496, Log Avg loss: 0.00008676, Global Avg Loss: 0.00369316, Time: 0.1983 Steps: 135400, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001302, Sample Num: 20832, Cur Loss: 0.00000052, Cur Avg Loss: 0.00009837, Log Avg loss: 0.00006210, Global Avg Loss: 0.00368781, Time: 0.3943 Steps: 135600, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001502, Sample Num: 24032, Cur Loss: 0.00001169, Cur Avg Loss: 0.00008966, Log Avg loss: 0.00003294, Global Avg Loss: 0.00368243, Time: 0.2009 Steps: 135800, Updated lr: 0.000046 Training, Epoch: 0028, Batch: 001702, Sample Num: 27232, Cur Loss: 0.00000308, Cur Avg Loss: 0.00009070, Log Avg loss: 0.00009850, Global Avg Loss: 0.00367716, Time: 0.2187 Steps: 136000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 001902, Sample Num: 30432, Cur Loss: 0.00000104, Cur Avg Loss: 0.00008333, Log Avg loss: 0.00002061, Global Avg Loss: 0.00367179, Time: 0.2176 Steps: 136200, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002102, Sample Num: 33632, Cur Loss: 0.00000133, Cur Avg Loss: 0.00008744, Log Avg loss: 0.00012658, Global Avg Loss: 0.00366659, Time: 0.2223 Steps: 136400, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002302, Sample Num: 36832, Cur Loss: 0.00000458, Cur Avg Loss: 0.00009263, Log Avg loss: 0.00014715, Global Avg Loss: 0.00366144, Time: 0.3082 Steps: 136600, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002502, Sample Num: 40032, Cur Loss: 0.00000096, Cur Avg Loss: 0.00008835, Log Avg loss: 0.00003906, Global Avg Loss: 0.00365614, Time: 0.1529 Steps: 136800, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002702, Sample Num: 43232, Cur Loss: 0.00000034, Cur Avg Loss: 0.00008912, Log Avg loss: 0.00009876, Global Avg Loss: 0.00365095, Time: 0.3465 Steps: 137000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 002902, Sample Num: 46432, Cur Loss: 0.00002577, Cur Avg Loss: 0.00009139, Log Avg loss: 0.00012209, Global Avg Loss: 0.00364580, Time: 0.2862 Steps: 137200, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003102, Sample Num: 49632, Cur Loss: 0.00000081, Cur Avg Loss: 0.00009630, Log Avg loss: 0.00016745, Global Avg Loss: 0.00364074, Time: 0.4151 Steps: 137400, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003302, Sample Num: 52832, Cur Loss: 0.00000426, Cur Avg Loss: 0.00009368, Log Avg loss: 0.00005308, Global Avg Loss: 0.00363552, Time: 0.2108 Steps: 137600, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003502, Sample Num: 56032, Cur Loss: 0.00000643, Cur Avg Loss: 0.00009431, Log Avg loss: 0.00010474, Global Avg Loss: 0.00363040, Time: 0.2042 Steps: 137800, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003702, Sample Num: 59232, Cur Loss: 0.00000085, Cur Avg Loss: 0.00009335, Log Avg loss: 0.00007658, Global Avg Loss: 0.00362525, Time: 0.2111 Steps: 138000, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 003902, Sample Num: 62432, Cur Loss: 0.00000128, Cur Avg Loss: 0.00008957, Log Avg loss: 0.00001951, Global Avg Loss: 0.00362003, Time: 0.2102 Steps: 138200, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 004102, Sample Num: 65632, Cur Loss: 0.00001185, Cur Avg Loss: 0.00008695, Log Avg loss: 0.00003581, Global Avg Loss: 0.00361485, Time: 0.2184 Steps: 138400, Updated lr: 0.000045 Training, Epoch: 0028, Batch: 004302, Sample Num: 68832, Cur Loss: 0.00000048, Cur Avg Loss: 0.00008687, Log Avg loss: 0.00008539, Global Avg Loss: 0.00360976, Time: 0.3195 Steps: 138600, Updated lr: 0.000044 Training, Epoch: 0028, Batch: 004502, Sample Num: 72032, Cur Loss: 0.00000168, Cur Avg Loss: 0.00008745, Log Avg loss: 0.00009978, Global Avg Loss: 0.00360470, Time: 0.2202 Steps: 138800, Updated lr: 0.000044 Training, Epoch: 0028, Batch: 004702, Sample Num: 75232, Cur Loss: 0.00002276, Cur Avg Loss: 0.00008701, Log Avg loss: 0.00007728, Global Avg Loss: 0.00359963, Time: 0.3226 Steps: 139000, Updated lr: 0.000044 Training, Epoch: 0028, Batch: 004902, Sample Num: 78432, Cur Loss: 0.00000276, Cur Avg Loss: 0.00008997, Log Avg loss: 0.00015937, Global Avg Loss: 0.00359468, Time: 0.2166 Steps: 139200, Updated lr: 0.000044 ***** Running evaluation checkpoint-139272 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-139272 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1278.721195, Avg time per batch (s): 0.260000 {"eval_avg_loss": 0.001883, "eval_total_loss": 2.006776, "eval_acc": 0.999722, "eval_jaccard": 0.988305, "eval_prec": 0.989409, "eval_recall": 0.989912, "eval_f1": 0.989255, "eval_pr_auc": 0.996059, "eval_roc_auc": 0.999449, "eval_fmax": 0.994599, "eval_pmax": 0.996823, "eval_rmax": 0.992384, "eval_tmax": 0.07, "update_flag": false, "test_avg_loss": 0.002283, "test_total_loss": 2.433388, "test_acc": 0.999713, "test_jaccard": 0.987176, "test_prec": 0.987986, "test_recall": 0.988971, "test_f1": 0.988094, "test_pr_auc": 0.994976, "test_roc_auc": 0.999147, "test_fmax": 0.994176, "test_pmax": 0.995992, "test_rmax": 0.992367, "test_tmax": 0.03, "lr": 4.417763423496165e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0035929816218832605, "train_cur_epoch_loss": 0.46286353700345395, "train_cur_epoch_avg_loss": 9.305660172968515e-05, "train_cur_epoch_time": 1278.7211949825287, "train_cur_epoch_avg_time": 0.2570810605111638, "epoch": 28, "step": 139272} ################################################## Training, Epoch: 0029, Batch: 000128, Sample Num: 2048, Cur Loss: 0.00000124, Cur Avg Loss: 0.00008211, Log Avg loss: 0.00016178, Global Avg Loss: 0.00358976, Time: 0.2826 Steps: 139400, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000328, Sample Num: 5248, Cur Loss: 0.00000056, Cur Avg Loss: 0.00006852, Log Avg loss: 0.00005983, Global Avg Loss: 0.00358470, Time: 0.2174 Steps: 139600, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000528, Sample Num: 8448, Cur Loss: 0.00001336, Cur Avg Loss: 0.00008593, Log Avg loss: 0.00011447, Global Avg Loss: 0.00357974, Time: 0.2162 Steps: 139800, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000728, Sample Num: 11648, Cur Loss: 0.00063869, Cur Avg Loss: 0.00007748, Log Avg loss: 0.00005517, Global Avg Loss: 0.00357470, Time: 0.2203 Steps: 140000, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 000928, Sample Num: 14848, Cur Loss: 0.00000086, Cur Avg Loss: 0.00007836, Log Avg loss: 0.00008158, Global Avg Loss: 0.00356972, Time: 0.2476 Steps: 140200, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001128, Sample Num: 18048, Cur Loss: 0.00000024, Cur Avg Loss: 0.00007417, Log Avg loss: 0.00005470, Global Avg Loss: 0.00356471, Time: 0.4341 Steps: 140400, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001328, Sample Num: 21248, Cur Loss: 0.00000002, Cur Avg Loss: 0.00007202, Log Avg loss: 0.00005991, Global Avg Loss: 0.00355973, Time: 0.3378 Steps: 140600, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001528, Sample Num: 24448, Cur Loss: 0.00000127, Cur Avg Loss: 0.00007159, Log Avg loss: 0.00006873, Global Avg Loss: 0.00355477, Time: 0.2180 Steps: 140800, Updated lr: 0.000044 Training, Epoch: 0029, Batch: 001728, Sample Num: 27648, Cur Loss: 0.00000222, Cur Avg Loss: 0.00007183, Log Avg loss: 0.00007366, Global Avg Loss: 0.00354983, Time: 0.2490 Steps: 141000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 001928, Sample Num: 30848, Cur Loss: 0.00003970, Cur Avg Loss: 0.00006774, Log Avg loss: 0.00003244, Global Avg Loss: 0.00354485, Time: 0.2215 Steps: 141200, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002128, Sample Num: 34048, Cur Loss: 0.00000011, Cur Avg Loss: 0.00007474, Log Avg loss: 0.00014216, Global Avg Loss: 0.00354003, Time: 0.2182 Steps: 141400, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002328, Sample Num: 37248, Cur Loss: 0.00000015, Cur Avg Loss: 0.00008182, Log Avg loss: 0.00015722, Global Avg Loss: 0.00353526, Time: 0.3387 Steps: 141600, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002528, Sample Num: 40448, Cur Loss: 0.00000040, Cur Avg Loss: 0.00007714, Log Avg loss: 0.00002262, Global Avg Loss: 0.00353030, Time: 0.2832 Steps: 141800, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002728, Sample Num: 43648, Cur Loss: 0.00000188, Cur Avg Loss: 0.00007897, Log Avg loss: 0.00010206, Global Avg Loss: 0.00352547, Time: 0.3525 Steps: 142000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 002928, Sample Num: 46848, Cur Loss: 0.00000576, Cur Avg Loss: 0.00008128, Log Avg loss: 0.00011276, Global Avg Loss: 0.00352067, Time: 0.2284 Steps: 142200, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003128, Sample Num: 50048, Cur Loss: 0.00000507, Cur Avg Loss: 0.00008790, Log Avg loss: 0.00018485, Global Avg Loss: 0.00351599, Time: 0.2689 Steps: 142400, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003328, Sample Num: 53248, Cur Loss: 0.00000004, Cur Avg Loss: 0.00008676, Log Avg loss: 0.00006891, Global Avg Loss: 0.00351115, Time: 0.2502 Steps: 142600, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003528, Sample Num: 56448, Cur Loss: 0.00000033, Cur Avg Loss: 0.00008563, Log Avg loss: 0.00006694, Global Avg Loss: 0.00350633, Time: 0.0901 Steps: 142800, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003728, Sample Num: 59648, Cur Loss: 0.00000645, Cur Avg Loss: 0.00008862, Log Avg loss: 0.00014122, Global Avg Loss: 0.00350162, Time: 0.2200 Steps: 143000, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 003928, Sample Num: 62848, Cur Loss: 0.00000371, Cur Avg Loss: 0.00008491, Log Avg loss: 0.00001589, Global Avg Loss: 0.00349675, Time: 0.2175 Steps: 143200, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 004128, Sample Num: 66048, Cur Loss: 0.00000052, Cur Avg Loss: 0.00008237, Log Avg loss: 0.00003240, Global Avg Loss: 0.00349192, Time: 0.2372 Steps: 143400, Updated lr: 0.000043 Training, Epoch: 0029, Batch: 004328, Sample Num: 69248, Cur Loss: 0.00000594, Cur Avg Loss: 0.00008288, Log Avg loss: 0.00009348, Global Avg Loss: 0.00348719, Time: 0.2132 Steps: 143600, Updated lr: 0.000042 Training, Epoch: 0029, Batch: 004528, Sample Num: 72448, Cur Loss: 0.00000280, Cur Avg Loss: 0.00008533, Log Avg loss: 0.00013832, Global Avg Loss: 0.00348253, Time: 0.3258 Steps: 143800, Updated lr: 0.000042 Training, Epoch: 0029, Batch: 004728, Sample Num: 75648, Cur Loss: 0.00002304, Cur Avg Loss: 0.00008689, Log Avg loss: 0.00012228, Global Avg Loss: 0.00347787, Time: 0.2108 Steps: 144000, Updated lr: 0.000042 Training, Epoch: 0029, Batch: 004928, Sample Num: 78848, Cur Loss: 0.00000191, Cur Avg Loss: 0.00008771, Log Avg loss: 0.00010713, Global Avg Loss: 0.00347319, Time: 0.2984 Steps: 144200, Updated lr: 0.000042 ***** Running evaluation checkpoint-144246 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-144246 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1259.986722, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001846, "eval_total_loss": 1.96793, "eval_acc": 0.999731, "eval_jaccard": 0.988679, "eval_prec": 0.989921, "eval_recall": 0.990068, "eval_f1": 0.989617, "eval_pr_auc": 0.996084, "eval_roc_auc": 0.999449, "eval_fmax": 0.994778, "eval_pmax": 0.997471, "eval_rmax": 0.9921, "eval_tmax": 0.12, "update_flag": false, "test_avg_loss": 0.002252, "test_total_loss": 2.400617, "test_acc": 0.999722, "test_jaccard": 0.987536, "test_prec": 0.988435, "test_recall": 0.989176, "test_f1": 0.988432, "test_pr_auc": 0.995027, "test_roc_auc": 0.999143, "test_fmax": 0.994251, "test_pmax": 0.995955, "test_rmax": 0.992553, "test_tmax": 0.02, "lr": 4.21695599515543e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0034722144371632036, "train_cur_epoch_loss": 0.45130726011756916, "train_cur_epoch_avg_loss": 9.073326500152174e-05, "train_cur_epoch_time": 1259.986721754074, "train_cur_epoch_avg_time": 0.25331458016768676, "epoch": 29, "step": 144246} ################################################## Training, Epoch: 0030, Batch: 000154, Sample Num: 2464, Cur Loss: 0.00000155, Cur Avg Loss: 0.00009391, Log Avg loss: 0.00016755, Global Avg Loss: 0.00346861, Time: 0.2658 Steps: 144400, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000354, Sample Num: 5664, Cur Loss: 0.00000977, Cur Avg Loss: 0.00007179, Log Avg loss: 0.00005476, Global Avg Loss: 0.00346389, Time: 0.2119 Steps: 144600, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000554, Sample Num: 8864, Cur Loss: 0.00000201, Cur Avg Loss: 0.00008785, Log Avg loss: 0.00011627, Global Avg Loss: 0.00345927, Time: 0.2192 Steps: 144800, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000754, Sample Num: 12064, Cur Loss: 0.00000024, Cur Avg Loss: 0.00009042, Log Avg loss: 0.00009753, Global Avg Loss: 0.00345463, Time: 0.2154 Steps: 145000, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 000954, Sample Num: 15264, Cur Loss: 0.00000099, Cur Avg Loss: 0.00008435, Log Avg loss: 0.00006146, Global Avg Loss: 0.00344996, Time: 0.2177 Steps: 145200, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001154, Sample Num: 18464, Cur Loss: 0.00000400, Cur Avg Loss: 0.00008715, Log Avg loss: 0.00010053, Global Avg Loss: 0.00344535, Time: 0.2611 Steps: 145400, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001354, Sample Num: 21664, Cur Loss: 0.00000086, Cur Avg Loss: 0.00007883, Log Avg loss: 0.00003079, Global Avg Loss: 0.00344066, Time: 0.2174 Steps: 145600, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001554, Sample Num: 24864, Cur Loss: 0.00320930, Cur Avg Loss: 0.00007528, Log Avg loss: 0.00005124, Global Avg Loss: 0.00343601, Time: 0.2184 Steps: 145800, Updated lr: 0.000042 Training, Epoch: 0030, Batch: 001754, Sample Num: 28064, Cur Loss: 0.00000201, Cur Avg Loss: 0.00007341, Log Avg loss: 0.00005893, Global Avg Loss: 0.00343138, Time: 0.2181 Steps: 146000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 001954, Sample Num: 31264, Cur Loss: 0.00000964, Cur Avg Loss: 0.00006873, Log Avg loss: 0.00002770, Global Avg Loss: 0.00342673, Time: 0.2174 Steps: 146200, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002154, Sample Num: 34464, Cur Loss: 0.00009514, Cur Avg Loss: 0.00007215, Log Avg loss: 0.00010549, Global Avg Loss: 0.00342219, Time: 0.2174 Steps: 146400, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002354, Sample Num: 37664, Cur Loss: 0.00000017, Cur Avg Loss: 0.00007453, Log Avg loss: 0.00010021, Global Avg Loss: 0.00341766, Time: 0.3918 Steps: 146600, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002554, Sample Num: 40864, Cur Loss: 0.00000106, Cur Avg Loss: 0.00007033, Log Avg loss: 0.00002083, Global Avg Loss: 0.00341303, Time: 0.2792 Steps: 146800, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002754, Sample Num: 44064, Cur Loss: 0.00000068, Cur Avg Loss: 0.00007498, Log Avg loss: 0.00013441, Global Avg Loss: 0.00340857, Time: 0.2574 Steps: 147000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 002954, Sample Num: 47264, Cur Loss: 0.00441882, Cur Avg Loss: 0.00007909, Log Avg loss: 0.00013566, Global Avg Loss: 0.00340412, Time: 0.4306 Steps: 147200, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003154, Sample Num: 50464, Cur Loss: 0.00000138, Cur Avg Loss: 0.00007763, Log Avg loss: 0.00005613, Global Avg Loss: 0.00339958, Time: 0.3918 Steps: 147400, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003354, Sample Num: 53664, Cur Loss: 0.00002632, Cur Avg Loss: 0.00007767, Log Avg loss: 0.00007833, Global Avg Loss: 0.00339508, Time: 0.2157 Steps: 147600, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003554, Sample Num: 56864, Cur Loss: 0.00000059, Cur Avg Loss: 0.00007661, Log Avg loss: 0.00005874, Global Avg Loss: 0.00339056, Time: 0.2172 Steps: 147800, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003754, Sample Num: 60064, Cur Loss: 0.00000089, Cur Avg Loss: 0.00007934, Log Avg loss: 0.00012792, Global Avg Loss: 0.00338615, Time: 0.0849 Steps: 148000, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 003954, Sample Num: 63264, Cur Loss: 0.00000410, Cur Avg Loss: 0.00007612, Log Avg loss: 0.00001558, Global Avg Loss: 0.00338161, Time: 0.2148 Steps: 148200, Updated lr: 0.000041 Training, Epoch: 0030, Batch: 004154, Sample Num: 66464, Cur Loss: 0.00000117, Cur Avg Loss: 0.00007295, Log Avg loss: 0.00001032, Global Avg Loss: 0.00337706, Time: 0.1309 Steps: 148400, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004354, Sample Num: 69664, Cur Loss: 0.00000720, Cur Avg Loss: 0.00007344, Log Avg loss: 0.00008365, Global Avg Loss: 0.00337263, Time: 0.2182 Steps: 148600, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004554, Sample Num: 72864, Cur Loss: 0.00000075, Cur Avg Loss: 0.00007506, Log Avg loss: 0.00011024, Global Avg Loss: 0.00336824, Time: 0.2171 Steps: 148800, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004754, Sample Num: 76064, Cur Loss: 0.00001816, Cur Avg Loss: 0.00007770, Log Avg loss: 0.00013791, Global Avg Loss: 0.00336391, Time: 0.2179 Steps: 149000, Updated lr: 0.000040 Training, Epoch: 0030, Batch: 004954, Sample Num: 79264, Cur Loss: 0.00000277, Cur Avg Loss: 0.00008198, Log Avg loss: 0.00018371, Global Avg Loss: 0.00335965, Time: 0.3144 Steps: 149200, Updated lr: 0.000040 ***** Running evaluation checkpoint-149220 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-149220 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1253.768343, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001886, "eval_total_loss": 2.010759, "eval_acc": 0.99973, "eval_jaccard": 0.988626, "eval_prec": 0.98973, "eval_recall": 0.990146, "eval_f1": 0.989553, "eval_pr_auc": 0.996121, "eval_roc_auc": 0.999465, "eval_fmax": 0.994936, "eval_pmax": 0.997146, "eval_rmax": 0.992735, "eval_tmax": 0.05, "update_flag": false, "test_avg_loss": 0.00231, "test_total_loss": 2.462534, "test_acc": 0.999721, "test_jaccard": 0.987194, "test_prec": 0.988014, "test_recall": 0.988902, "test_f1": 0.988082, "test_pr_auc": 0.994921, "test_roc_auc": 0.99914, "test_fmax": 0.99415, "test_pmax": 0.99701, "test_rmax": 0.991307, "test_tmax": 0.08, "lr": 4.016148566814695e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0033592631495409997, "train_cur_epoch_loss": 0.41620347146501907, "train_cur_epoch_avg_loss": 8.36758084971892e-05, "train_cur_epoch_time": 1253.768343448639, "train_cur_epoch_avg_time": 0.2520644035883874, "epoch": 30, "step": 149220} ################################################## Training, Epoch: 0031, Batch: 000180, Sample Num: 2880, Cur Loss: 0.00000184, Cur Avg Loss: 0.00008251, Log Avg loss: 0.00012461, Global Avg Loss: 0.00335532, Time: 0.2164 Steps: 149400, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000380, Sample Num: 6080, Cur Loss: 0.00000231, Cur Avg Loss: 0.00007648, Log Avg loss: 0.00007105, Global Avg Loss: 0.00335092, Time: 0.2185 Steps: 149600, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000580, Sample Num: 9280, Cur Loss: 0.00000003, Cur Avg Loss: 0.00007578, Log Avg loss: 0.00007446, Global Avg Loss: 0.00334655, Time: 0.2172 Steps: 149800, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000780, Sample Num: 12480, Cur Loss: 0.00000054, Cur Avg Loss: 0.00007901, Log Avg loss: 0.00008836, Global Avg Loss: 0.00334221, Time: 0.2129 Steps: 150000, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 000980, Sample Num: 15680, Cur Loss: 0.00000090, Cur Avg Loss: 0.00007218, Log Avg loss: 0.00004556, Global Avg Loss: 0.00333782, Time: 0.0853 Steps: 150200, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001180, Sample Num: 18880, Cur Loss: 0.00000514, Cur Avg Loss: 0.00007621, Log Avg loss: 0.00009594, Global Avg Loss: 0.00333351, Time: 0.2137 Steps: 150400, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001380, Sample Num: 22080, Cur Loss: 0.00000339, Cur Avg Loss: 0.00007134, Log Avg loss: 0.00004262, Global Avg Loss: 0.00332913, Time: 0.2185 Steps: 150600, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001580, Sample Num: 25280, Cur Loss: 0.00000151, Cur Avg Loss: 0.00006914, Log Avg loss: 0.00005397, Global Avg Loss: 0.00332479, Time: 0.2185 Steps: 150800, Updated lr: 0.000040 Training, Epoch: 0031, Batch: 001780, Sample Num: 28480, Cur Loss: 0.00000057, Cur Avg Loss: 0.00006795, Log Avg loss: 0.00005854, Global Avg Loss: 0.00332046, Time: 0.2108 Steps: 151000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 001980, Sample Num: 31680, Cur Loss: 0.00000006, Cur Avg Loss: 0.00006231, Log Avg loss: 0.00001216, Global Avg Loss: 0.00331609, Time: 0.2176 Steps: 151200, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002180, Sample Num: 34880, Cur Loss: 0.00000076, Cur Avg Loss: 0.00006875, Log Avg loss: 0.00013243, Global Avg Loss: 0.00331188, Time: 0.3649 Steps: 151400, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002380, Sample Num: 38080, Cur Loss: 0.00000231, Cur Avg Loss: 0.00007122, Log Avg loss: 0.00009821, Global Avg Loss: 0.00330764, Time: 0.3578 Steps: 151600, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002580, Sample Num: 41280, Cur Loss: 0.00000050, Cur Avg Loss: 0.00006690, Log Avg loss: 0.00001542, Global Avg Loss: 0.00330331, Time: 0.2158 Steps: 151800, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002780, Sample Num: 44480, Cur Loss: 0.00000079, Cur Avg Loss: 0.00006800, Log Avg loss: 0.00008216, Global Avg Loss: 0.00329907, Time: 0.2856 Steps: 152000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 002980, Sample Num: 47680, Cur Loss: 0.00000087, Cur Avg Loss: 0.00007170, Log Avg loss: 0.00012315, Global Avg Loss: 0.00329489, Time: 0.2196 Steps: 152200, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003180, Sample Num: 50880, Cur Loss: 0.00000007, Cur Avg Loss: 0.00007270, Log Avg loss: 0.00008759, Global Avg Loss: 0.00329069, Time: 0.0830 Steps: 152400, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003380, Sample Num: 54080, Cur Loss: 0.00000603, Cur Avg Loss: 0.00007307, Log Avg loss: 0.00007907, Global Avg Loss: 0.00328648, Time: 0.2162 Steps: 152600, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003580, Sample Num: 57280, Cur Loss: 0.00000085, Cur Avg Loss: 0.00007288, Log Avg loss: 0.00006956, Global Avg Loss: 0.00328227, Time: 0.2171 Steps: 152800, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003780, Sample Num: 60480, Cur Loss: 0.00000024, Cur Avg Loss: 0.00007252, Log Avg loss: 0.00006604, Global Avg Loss: 0.00327806, Time: 0.2195 Steps: 153000, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 003980, Sample Num: 63680, Cur Loss: 0.00000033, Cur Avg Loss: 0.00007017, Log Avg loss: 0.00002582, Global Avg Loss: 0.00327382, Time: 0.1795 Steps: 153200, Updated lr: 0.000039 Training, Epoch: 0031, Batch: 004180, Sample Num: 66880, Cur Loss: 0.00000004, Cur Avg Loss: 0.00006808, Log Avg loss: 0.00002639, Global Avg Loss: 0.00326958, Time: 0.2195 Steps: 153400, Updated lr: 0.000038 Training, Epoch: 0031, Batch: 004380, Sample Num: 70080, Cur Loss: 0.00000420, Cur Avg Loss: 0.00006770, Log Avg loss: 0.00005993, Global Avg Loss: 0.00326540, Time: 0.2160 Steps: 153600, Updated lr: 0.000038 Training, Epoch: 0031, Batch: 004580, Sample Num: 73280, Cur Loss: 0.00000404, Cur Avg Loss: 0.00007082, Log Avg loss: 0.00013904, Global Avg Loss: 0.00326134, Time: 0.1134 Steps: 153800, Updated lr: 0.000038 Training, Epoch: 0031, Batch: 004780, Sample Num: 76480, Cur Loss: 0.00000187, Cur Avg Loss: 0.00007270, Log Avg loss: 0.00011574, Global Avg Loss: 0.00325725, Time: 0.2185 Steps: 154000, Updated lr: 0.000038 ***** Running evaluation checkpoint-154194 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-154194 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1226.311120, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001916, "eval_total_loss": 2.042507, "eval_acc": 0.999732, "eval_jaccard": 0.988513, "eval_prec": 0.989523, "eval_recall": 0.990065, "eval_f1": 0.989418, "eval_pr_auc": 0.996118, "eval_roc_auc": 0.999492, "eval_fmax": 0.994739, "eval_pmax": 0.997285, "eval_rmax": 0.992207, "eval_tmax": 0.07, "update_flag": false, "test_avg_loss": 0.002346, "test_total_loss": 2.50045, "test_acc": 0.999719, "test_jaccard": 0.987097, "test_prec": 0.987897, "test_recall": 0.988805, "test_f1": 0.987975, "test_pr_auc": 0.994968, "test_roc_auc": 0.999141, "test_fmax": 0.994327, "test_pmax": 0.997154, "test_rmax": 0.991517, "test_tmax": 0.08, "lr": 3.8153411384739607e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.003253384064978784, "train_cur_epoch_loss": 0.3830553408311631, "train_cur_epoch_avg_loss": 7.701152811241719e-05, "train_cur_epoch_time": 1226.3111202716827, "train_cur_epoch_avg_time": 0.24654425417605202, "epoch": 31, "step": 154194} ################################################## Training, Epoch: 0032, Batch: 000006, Sample Num: 96, Cur Loss: 0.00003686, Cur Avg Loss: 0.00001025, Log Avg loss: 0.00017810, Global Avg Loss: 0.00325326, Time: 0.3344 Steps: 154200, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000206, Sample Num: 3296, Cur Loss: 0.00000281, Cur Avg Loss: 0.00007402, Log Avg loss: 0.00007593, Global Avg Loss: 0.00324914, Time: 0.2179 Steps: 154400, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000406, Sample Num: 6496, Cur Loss: 0.00000042, Cur Avg Loss: 0.00008254, Log Avg loss: 0.00009131, Global Avg Loss: 0.00324506, Time: 0.2169 Steps: 154600, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000606, Sample Num: 9696, Cur Loss: 0.00000053, Cur Avg Loss: 0.00008122, Log Avg loss: 0.00007855, Global Avg Loss: 0.00324097, Time: 0.2174 Steps: 154800, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 000806, Sample Num: 12896, Cur Loss: 0.00000558, Cur Avg Loss: 0.00008810, Log Avg loss: 0.00010893, Global Avg Loss: 0.00323692, Time: 0.2130 Steps: 155000, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001006, Sample Num: 16096, Cur Loss: 0.00000197, Cur Avg Loss: 0.00008080, Log Avg loss: 0.00005138, Global Avg Loss: 0.00323282, Time: 0.2824 Steps: 155200, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001206, Sample Num: 19296, Cur Loss: 0.00000067, Cur Avg Loss: 0.00008481, Log Avg loss: 0.00010500, Global Avg Loss: 0.00322879, Time: 0.2937 Steps: 155400, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001406, Sample Num: 22496, Cur Loss: 0.00000036, Cur Avg Loss: 0.00007510, Log Avg loss: 0.00001652, Global Avg Loss: 0.00322467, Time: 0.2172 Steps: 155600, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001606, Sample Num: 25696, Cur Loss: 0.00000210, Cur Avg Loss: 0.00007187, Log Avg loss: 0.00004915, Global Avg Loss: 0.00322059, Time: 0.2111 Steps: 155800, Updated lr: 0.000038 Training, Epoch: 0032, Batch: 001806, Sample Num: 28896, Cur Loss: 0.00000002, Cur Avg Loss: 0.00007141, Log Avg loss: 0.00006771, Global Avg Loss: 0.00321655, Time: 0.2085 Steps: 156000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002006, Sample Num: 32096, Cur Loss: 0.00000029, Cur Avg Loss: 0.00006876, Log Avg loss: 0.00004483, Global Avg Loss: 0.00321249, Time: 0.1492 Steps: 156200, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002206, Sample Num: 35296, Cur Loss: 0.00000001, Cur Avg Loss: 0.00007210, Log Avg loss: 0.00010566, Global Avg Loss: 0.00320851, Time: 0.2192 Steps: 156400, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002406, Sample Num: 38496, Cur Loss: 0.00000041, Cur Avg Loss: 0.00007317, Log Avg loss: 0.00008491, Global Avg Loss: 0.00320452, Time: 0.2195 Steps: 156600, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002606, Sample Num: 41696, Cur Loss: 0.00000306, Cur Avg Loss: 0.00006902, Log Avg loss: 0.00001909, Global Avg Loss: 0.00320046, Time: 0.2200 Steps: 156800, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 002806, Sample Num: 44896, Cur Loss: 0.00000244, Cur Avg Loss: 0.00007133, Log Avg loss: 0.00010142, Global Avg Loss: 0.00319651, Time: 0.2165 Steps: 157000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003006, Sample Num: 48096, Cur Loss: 0.00000116, Cur Avg Loss: 0.00007412, Log Avg loss: 0.00011334, Global Avg Loss: 0.00319259, Time: 0.2179 Steps: 157200, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003206, Sample Num: 51296, Cur Loss: 0.00000153, Cur Avg Loss: 0.00007374, Log Avg loss: 0.00006797, Global Avg Loss: 0.00318862, Time: 0.2173 Steps: 157400, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003406, Sample Num: 54496, Cur Loss: 0.00000444, Cur Avg Loss: 0.00007306, Log Avg loss: 0.00006216, Global Avg Loss: 0.00318465, Time: 0.2175 Steps: 157600, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003606, Sample Num: 57696, Cur Loss: 0.00000180, Cur Avg Loss: 0.00007253, Log Avg loss: 0.00006344, Global Avg Loss: 0.00318070, Time: 0.4218 Steps: 157800, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 003806, Sample Num: 60896, Cur Loss: 0.00001413, Cur Avg Loss: 0.00007213, Log Avg loss: 0.00006499, Global Avg Loss: 0.00317675, Time: 0.2189 Steps: 158000, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 004006, Sample Num: 64096, Cur Loss: 0.00000062, Cur Avg Loss: 0.00006968, Log Avg loss: 0.00002307, Global Avg Loss: 0.00317277, Time: 0.2192 Steps: 158200, Updated lr: 0.000037 Training, Epoch: 0032, Batch: 004206, Sample Num: 67296, Cur Loss: 0.00022869, Cur Avg Loss: 0.00006898, Log Avg loss: 0.00005486, Global Avg Loss: 0.00316883, Time: 0.2100 Steps: 158400, Updated lr: 0.000036 Training, Epoch: 0032, Batch: 004406, Sample Num: 70496, Cur Loss: 0.00000044, Cur Avg Loss: 0.00006777, Log Avg loss: 0.00004247, Global Avg Loss: 0.00316489, Time: 0.2167 Steps: 158600, Updated lr: 0.000036 Training, Epoch: 0032, Batch: 004606, Sample Num: 73696, Cur Loss: 0.00000169, Cur Avg Loss: 0.00006960, Log Avg loss: 0.00010995, Global Avg Loss: 0.00316104, Time: 0.2229 Steps: 158800, Updated lr: 0.000036 Training, Epoch: 0032, Batch: 004806, Sample Num: 76896, Cur Loss: 0.00000323, Cur Avg Loss: 0.00007283, Log Avg loss: 0.00014720, Global Avg Loss: 0.00315725, Time: 0.2135 Steps: 159000, Updated lr: 0.000036 ***** Running evaluation checkpoint-159168 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-159168 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1209.636692, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001931, "eval_total_loss": 2.058585, "eval_acc": 0.999723, "eval_jaccard": 0.988194, "eval_prec": 0.989303, "eval_recall": 0.989849, "eval_f1": 0.989169, "eval_pr_auc": 0.996052, "eval_roc_auc": 0.999458, "eval_fmax": 0.994833, "eval_pmax": 0.997293, "eval_rmax": 0.992386, "eval_tmax": 0.08, "update_flag": false, "test_avg_loss": 0.002386, "test_total_loss": 2.543603, "test_acc": 0.99972, "test_jaccard": 0.987321, "test_prec": 0.988122, "test_recall": 0.989098, "test_f1": 0.988227, "test_pr_auc": 0.99485, "test_roc_auc": 0.999133, "test_fmax": 0.994082, "test_pmax": 0.996523, "test_rmax": 0.991653, "test_tmax": 0.04, "lr": 3.614533710133226e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0031540509621590875, "train_cur_epoch_loss": 0.37168102959806537, "train_cur_epoch_avg_loss": 7.472477474830426e-05, "train_cur_epoch_time": 1209.636691570282, "train_cur_epoch_avg_time": 0.2431919363832493, "epoch": 32, "step": 159168} ################################################## Training, Epoch: 0033, Batch: 000032, Sample Num: 512, Cur Loss: 0.00031589, Cur Avg Loss: 0.00006849, Log Avg loss: 0.00011921, Global Avg Loss: 0.00315343, Time: 0.2519 Steps: 159200, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000232, Sample Num: 3712, Cur Loss: 0.00000715, Cur Avg Loss: 0.00007040, Log Avg loss: 0.00007071, Global Avg Loss: 0.00314956, Time: 0.2191 Steps: 159400, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000432, Sample Num: 6912, Cur Loss: 0.00000799, Cur Avg Loss: 0.00007629, Log Avg loss: 0.00008311, Global Avg Loss: 0.00314572, Time: 0.1287 Steps: 159600, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000632, Sample Num: 10112, Cur Loss: 0.00000148, Cur Avg Loss: 0.00006316, Log Avg loss: 0.00003480, Global Avg Loss: 0.00314183, Time: 0.2152 Steps: 159800, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 000832, Sample Num: 13312, Cur Loss: 0.00000059, Cur Avg Loss: 0.00007316, Log Avg loss: 0.00010478, Global Avg Loss: 0.00313803, Time: 0.2219 Steps: 160000, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001032, Sample Num: 16512, Cur Loss: 0.00000031, Cur Avg Loss: 0.00006673, Log Avg loss: 0.00003997, Global Avg Loss: 0.00313416, Time: 0.3350 Steps: 160200, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001232, Sample Num: 19712, Cur Loss: 0.00008112, Cur Avg Loss: 0.00007497, Log Avg loss: 0.00011750, Global Avg Loss: 0.00313040, Time: 0.3222 Steps: 160400, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001432, Sample Num: 22912, Cur Loss: 0.00000697, Cur Avg Loss: 0.00006816, Log Avg loss: 0.00002624, Global Avg Loss: 0.00312654, Time: 0.4416 Steps: 160600, Updated lr: 0.000036 Training, Epoch: 0033, Batch: 001632, Sample Num: 26112, Cur Loss: 0.00534663, Cur Avg Loss: 0.00006741, Log Avg loss: 0.00006203, Global Avg Loss: 0.00312272, Time: 0.1004 Steps: 160800, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 001832, Sample Num: 29312, Cur Loss: 0.00000249, Cur Avg Loss: 0.00006224, Log Avg loss: 0.00002007, Global Avg Loss: 0.00311887, Time: 0.2216 Steps: 161000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002032, Sample Num: 32512, Cur Loss: 0.00000015, Cur Avg Loss: 0.00006190, Log Avg loss: 0.00005877, Global Avg Loss: 0.00311507, Time: 0.2196 Steps: 161200, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002232, Sample Num: 35712, Cur Loss: 0.00001774, Cur Avg Loss: 0.00006910, Log Avg loss: 0.00014224, Global Avg Loss: 0.00311139, Time: 0.2287 Steps: 161400, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002432, Sample Num: 38912, Cur Loss: 0.00001565, Cur Avg Loss: 0.00007126, Log Avg loss: 0.00009539, Global Avg Loss: 0.00310766, Time: 0.2410 Steps: 161600, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002632, Sample Num: 42112, Cur Loss: 0.00000018, Cur Avg Loss: 0.00006883, Log Avg loss: 0.00003923, Global Avg Loss: 0.00310386, Time: 0.3341 Steps: 161800, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 002832, Sample Num: 45312, Cur Loss: 0.00000090, Cur Avg Loss: 0.00007013, Log Avg loss: 0.00008721, Global Avg Loss: 0.00310014, Time: 0.3299 Steps: 162000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003032, Sample Num: 48512, Cur Loss: 0.00000222, Cur Avg Loss: 0.00007497, Log Avg loss: 0.00014348, Global Avg Loss: 0.00309649, Time: 0.3087 Steps: 162200, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003232, Sample Num: 51712, Cur Loss: 0.00000352, Cur Avg Loss: 0.00007205, Log Avg loss: 0.00002785, Global Avg Loss: 0.00309271, Time: 0.2557 Steps: 162400, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003432, Sample Num: 54912, Cur Loss: 0.00001403, Cur Avg Loss: 0.00007255, Log Avg loss: 0.00008057, Global Avg Loss: 0.00308901, Time: 0.2187 Steps: 162600, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003632, Sample Num: 58112, Cur Loss: 0.00000018, Cur Avg Loss: 0.00007146, Log Avg loss: 0.00005287, Global Avg Loss: 0.00308528, Time: 0.2166 Steps: 162800, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 003832, Sample Num: 61312, Cur Loss: 0.00000018, Cur Avg Loss: 0.00006805, Log Avg loss: 0.00000600, Global Avg Loss: 0.00308150, Time: 0.2193 Steps: 163000, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 004032, Sample Num: 64512, Cur Loss: 0.00000016, Cur Avg Loss: 0.00006635, Log Avg loss: 0.00003379, Global Avg Loss: 0.00307777, Time: 0.2820 Steps: 163200, Updated lr: 0.000035 Training, Epoch: 0033, Batch: 004232, Sample Num: 67712, Cur Loss: 0.00000175, Cur Avg Loss: 0.00006494, Log Avg loss: 0.00003647, Global Avg Loss: 0.00307404, Time: 0.2901 Steps: 163400, Updated lr: 0.000034 Training, Epoch: 0033, Batch: 004432, Sample Num: 70912, Cur Loss: 0.00000026, Cur Avg Loss: 0.00006408, Log Avg loss: 0.00004589, Global Avg Loss: 0.00307034, Time: 0.4550 Steps: 163600, Updated lr: 0.000034 Training, Epoch: 0033, Batch: 004632, Sample Num: 74112, Cur Loss: 0.00000002, Cur Avg Loss: 0.00006604, Log Avg loss: 0.00010948, Global Avg Loss: 0.00306673, Time: 0.2189 Steps: 163800, Updated lr: 0.000034 Training, Epoch: 0033, Batch: 004832, Sample Num: 77312, Cur Loss: 0.00000155, Cur Avg Loss: 0.00006860, Log Avg loss: 0.00012801, Global Avg Loss: 0.00306314, Time: 0.2524 Steps: 164000, Updated lr: 0.000034 ***** Running evaluation checkpoint-164142 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-164142 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1233.119554, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001926, "eval_total_loss": 2.053279, "eval_acc": 0.99973, "eval_jaccard": 0.98872, "eval_prec": 0.989764, "eval_recall": 0.990329, "eval_f1": 0.989652, "eval_pr_auc": 0.996035, "eval_roc_auc": 0.999456, "eval_fmax": 0.994763, "eval_pmax": 0.997634, "eval_rmax": 0.991908, "eval_tmax": 0.13, "update_flag": false, "test_avg_loss": 0.002369, "test_total_loss": 2.524989, "test_acc": 0.999719, "test_jaccard": 0.987214, "test_prec": 0.988024, "test_recall": 0.988971, "test_f1": 0.988114, "test_pr_auc": 0.995054, "test_roc_auc": 0.999138, "test_fmax": 0.994261, "test_pmax": 0.996798, "test_rmax": 0.991737, "test_tmax": 0.06, "lr": 3.413726281792491e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0030606214410150725, "train_cur_epoch_loss": 0.35254102615734695, "train_cur_epoch_avg_loss": 7.087676440638258e-05, "train_cur_epoch_time": 1233.1195538043976, "train_cur_epoch_avg_time": 0.2479130586659424, "epoch": 33, "step": 164142} ################################################## Training, Epoch: 0034, Batch: 000058, Sample Num: 928, Cur Loss: 0.00000736, Cur Avg Loss: 0.00001441, Log Avg loss: 0.00010947, Global Avg Loss: 0.00305955, Time: 0.3832 Steps: 164200, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000258, Sample Num: 4128, Cur Loss: 0.00000015, Cur Avg Loss: 0.00004772, Log Avg loss: 0.00005737, Global Avg Loss: 0.00305589, Time: 0.2157 Steps: 164400, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000458, Sample Num: 7328, Cur Loss: 0.00000044, Cur Avg Loss: 0.00005978, Log Avg loss: 0.00007533, Global Avg Loss: 0.00305227, Time: 0.2236 Steps: 164600, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000658, Sample Num: 10528, Cur Loss: 0.00000096, Cur Avg Loss: 0.00005898, Log Avg loss: 0.00005716, Global Avg Loss: 0.00304864, Time: 0.2180 Steps: 164800, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 000858, Sample Num: 13728, Cur Loss: 0.00000349, Cur Avg Loss: 0.00007007, Log Avg loss: 0.00010655, Global Avg Loss: 0.00304507, Time: 0.1997 Steps: 165000, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001058, Sample Num: 16928, Cur Loss: 0.00000927, Cur Avg Loss: 0.00006092, Log Avg loss: 0.00002166, Global Avg Loss: 0.00304141, Time: 0.2568 Steps: 165200, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001258, Sample Num: 20128, Cur Loss: 0.00000023, Cur Avg Loss: 0.00006797, Log Avg loss: 0.00010524, Global Avg Loss: 0.00303786, Time: 0.3633 Steps: 165400, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001458, Sample Num: 23328, Cur Loss: 0.00000670, Cur Avg Loss: 0.00006185, Log Avg loss: 0.00002340, Global Avg Loss: 0.00303422, Time: 0.2311 Steps: 165600, Updated lr: 0.000034 Training, Epoch: 0034, Batch: 001658, Sample Num: 26528, Cur Loss: 0.00000017, Cur Avg Loss: 0.00006019, Log Avg loss: 0.00004809, Global Avg Loss: 0.00303062, Time: 0.2195 Steps: 165800, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 001858, Sample Num: 29728, Cur Loss: 0.00000126, Cur Avg Loss: 0.00005684, Log Avg loss: 0.00002907, Global Avg Loss: 0.00302700, Time: 0.2472 Steps: 166000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002058, Sample Num: 32928, Cur Loss: 0.00001623, Cur Avg Loss: 0.00006013, Log Avg loss: 0.00009070, Global Avg Loss: 0.00302347, Time: 0.3085 Steps: 166200, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002258, Sample Num: 36128, Cur Loss: 0.00000263, Cur Avg Loss: 0.00006244, Log Avg loss: 0.00008614, Global Avg Loss: 0.00301994, Time: 0.4135 Steps: 166400, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002458, Sample Num: 39328, Cur Loss: 0.00000031, Cur Avg Loss: 0.00006299, Log Avg loss: 0.00006927, Global Avg Loss: 0.00301639, Time: 0.3924 Steps: 166600, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002658, Sample Num: 42528, Cur Loss: 0.00000031, Cur Avg Loss: 0.00006037, Log Avg loss: 0.00002811, Global Avg Loss: 0.00301281, Time: 0.2174 Steps: 166800, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 002858, Sample Num: 45728, Cur Loss: 0.00000010, Cur Avg Loss: 0.00006050, Log Avg loss: 0.00006233, Global Avg Loss: 0.00300928, Time: 0.2174 Steps: 167000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003058, Sample Num: 48928, Cur Loss: 0.00000004, Cur Avg Loss: 0.00006544, Log Avg loss: 0.00013604, Global Avg Loss: 0.00300584, Time: 0.2214 Steps: 167200, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003258, Sample Num: 52128, Cur Loss: 0.00000149, Cur Avg Loss: 0.00006623, Log Avg loss: 0.00007828, Global Avg Loss: 0.00300234, Time: 0.2180 Steps: 167400, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003458, Sample Num: 55328, Cur Loss: 0.00000547, Cur Avg Loss: 0.00006740, Log Avg loss: 0.00008646, Global Avg Loss: 0.00299886, Time: 0.3383 Steps: 167600, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003658, Sample Num: 58528, Cur Loss: 0.00000009, Cur Avg Loss: 0.00006777, Log Avg loss: 0.00007418, Global Avg Loss: 0.00299538, Time: 0.3000 Steps: 167800, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 003858, Sample Num: 61728, Cur Loss: 0.00000193, Cur Avg Loss: 0.00006452, Log Avg loss: 0.00000493, Global Avg Loss: 0.00299182, Time: 0.5678 Steps: 168000, Updated lr: 0.000033 Training, Epoch: 0034, Batch: 004058, Sample Num: 64928, Cur Loss: 0.00000010, Cur Avg Loss: 0.00006233, Log Avg loss: 0.00002016, Global Avg Loss: 0.00298828, Time: 0.2649 Steps: 168200, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004258, Sample Num: 68128, Cur Loss: 0.00000039, Cur Avg Loss: 0.00006206, Log Avg loss: 0.00005651, Global Avg Loss: 0.00298480, Time: 0.2345 Steps: 168400, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004458, Sample Num: 71328, Cur Loss: 0.00219303, Cur Avg Loss: 0.00006254, Log Avg loss: 0.00007290, Global Avg Loss: 0.00298135, Time: 0.2315 Steps: 168600, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004658, Sample Num: 74528, Cur Loss: 0.00000948, Cur Avg Loss: 0.00006167, Log Avg loss: 0.00004229, Global Avg Loss: 0.00297787, Time: 0.2172 Steps: 168800, Updated lr: 0.000032 Training, Epoch: 0034, Batch: 004858, Sample Num: 77728, Cur Loss: 0.00000078, Cur Avg Loss: 0.00006428, Log Avg loss: 0.00012508, Global Avg Loss: 0.00297449, Time: 0.2168 Steps: 169000, Updated lr: 0.000032 ***** Running evaluation checkpoint-169116 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-169116 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1244.399864, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001923, "eval_total_loss": 2.049605, "eval_acc": 0.99974, "eval_jaccard": 0.988894, "eval_prec": 0.989852, "eval_recall": 0.990416, "eval_f1": 0.989762, "eval_pr_auc": 0.996049, "eval_roc_auc": 0.999446, "eval_fmax": 0.994799, "eval_pmax": 0.997698, "eval_rmax": 0.991917, "eval_tmax": 0.16, "update_flag": true, "test_avg_loss": 0.002369, "test_total_loss": 2.525155, "test_acc": 0.999718, "test_jaccard": 0.987214, "test_prec": 0.987995, "test_recall": 0.989, "test_f1": 0.988118, "test_pr_auc": 0.994898, "test_roc_auc": 0.999128, "test_fmax": 0.994232, "test_pmax": 0.996987, "test_rmax": 0.991492, "test_tmax": 0.08, "lr": 3.2129188534517564e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.002972587061726018, "train_cur_epoch_loss": 0.3355089597607932, "train_cur_epoch_avg_loss": 6.745254518713173e-05, "train_cur_epoch_time": 1244.399864435196, "train_cur_epoch_avg_time": 0.2501809136379566, "epoch": 34, "step": 169116} ################################################## Training, Epoch: 0035, Batch: 000084, Sample Num: 1344, Cur Loss: 0.00000047, Cur Avg Loss: 0.00007934, Log Avg loss: 0.00014941, Global Avg Loss: 0.00297115, Time: 0.2227 Steps: 169200, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000284, Sample Num: 4544, Cur Loss: 0.00000072, Cur Avg Loss: 0.00005908, Log Avg loss: 0.00005057, Global Avg Loss: 0.00296770, Time: 0.2190 Steps: 169400, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000484, Sample Num: 7744, Cur Loss: 0.00000002, Cur Avg Loss: 0.00006879, Log Avg loss: 0.00008257, Global Avg Loss: 0.00296430, Time: 0.2130 Steps: 169600, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000684, Sample Num: 10944, Cur Loss: 0.00000031, Cur Avg Loss: 0.00006229, Log Avg loss: 0.00004658, Global Avg Loss: 0.00296086, Time: 0.2219 Steps: 169800, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 000884, Sample Num: 14144, Cur Loss: 0.00000640, Cur Avg Loss: 0.00006989, Log Avg loss: 0.00009586, Global Avg Loss: 0.00295749, Time: 0.1972 Steps: 170000, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001084, Sample Num: 17344, Cur Loss: 0.00000212, Cur Avg Loss: 0.00007104, Log Avg loss: 0.00007614, Global Avg Loss: 0.00295411, Time: 0.2984 Steps: 170200, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001284, Sample Num: 20544, Cur Loss: 0.00000681, Cur Avg Loss: 0.00006860, Log Avg loss: 0.00005534, Global Avg Loss: 0.00295070, Time: 0.2194 Steps: 170400, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001484, Sample Num: 23744, Cur Loss: 0.00000062, Cur Avg Loss: 0.00006413, Log Avg loss: 0.00003546, Global Avg Loss: 0.00294729, Time: 0.2192 Steps: 170600, Updated lr: 0.000032 Training, Epoch: 0035, Batch: 001684, Sample Num: 26944, Cur Loss: 0.00000020, Cur Avg Loss: 0.00006342, Log Avg loss: 0.00005816, Global Avg Loss: 0.00294390, Time: 0.2175 Steps: 170800, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 001884, Sample Num: 30144, Cur Loss: 0.00000022, Cur Avg Loss: 0.00005922, Log Avg loss: 0.00002383, Global Avg Loss: 0.00294049, Time: 0.0904 Steps: 171000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002084, Sample Num: 33344, Cur Loss: 0.00000607, Cur Avg Loss: 0.00006330, Log Avg loss: 0.00010179, Global Avg Loss: 0.00293717, Time: 0.0862 Steps: 171200, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002284, Sample Num: 36544, Cur Loss: 0.00000083, Cur Avg Loss: 0.00006489, Log Avg loss: 0.00008139, Global Avg Loss: 0.00293384, Time: 0.3466 Steps: 171400, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002484, Sample Num: 39744, Cur Loss: 0.00000044, Cur Avg Loss: 0.00006118, Log Avg loss: 0.00001889, Global Avg Loss: 0.00293044, Time: 0.2198 Steps: 171600, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002684, Sample Num: 42944, Cur Loss: 0.00000195, Cur Avg Loss: 0.00005825, Log Avg loss: 0.00002179, Global Avg Loss: 0.00292706, Time: 0.2167 Steps: 171800, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 002884, Sample Num: 46144, Cur Loss: 0.00000568, Cur Avg Loss: 0.00006082, Log Avg loss: 0.00009529, Global Avg Loss: 0.00292376, Time: 0.2031 Steps: 172000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003084, Sample Num: 49344, Cur Loss: 0.00001962, Cur Avg Loss: 0.00006419, Log Avg loss: 0.00011279, Global Avg Loss: 0.00292050, Time: 0.3926 Steps: 172200, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003284, Sample Num: 52544, Cur Loss: 0.00000022, Cur Avg Loss: 0.00006209, Log Avg loss: 0.00002978, Global Avg Loss: 0.00291715, Time: 0.1537 Steps: 172400, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003484, Sample Num: 55744, Cur Loss: 0.00000050, Cur Avg Loss: 0.00006336, Log Avg loss: 0.00008419, Global Avg Loss: 0.00291386, Time: 0.2177 Steps: 172600, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003684, Sample Num: 58944, Cur Loss: 0.00000040, Cur Avg Loss: 0.00006296, Log Avg loss: 0.00005601, Global Avg Loss: 0.00291056, Time: 0.3608 Steps: 172800, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 003884, Sample Num: 62144, Cur Loss: 0.00000023, Cur Avg Loss: 0.00005997, Log Avg loss: 0.00000479, Global Avg Loss: 0.00290720, Time: 0.2172 Steps: 173000, Updated lr: 0.000031 Training, Epoch: 0035, Batch: 004084, Sample Num: 65344, Cur Loss: 0.00000431, Cur Avg Loss: 0.00005816, Log Avg loss: 0.00002303, Global Avg Loss: 0.00290387, Time: 0.2186 Steps: 173200, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004284, Sample Num: 68544, Cur Loss: 0.00000010, Cur Avg Loss: 0.00005866, Log Avg loss: 0.00006884, Global Avg Loss: 0.00290060, Time: 0.2197 Steps: 173400, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004484, Sample Num: 71744, Cur Loss: 0.00000133, Cur Avg Loss: 0.00006003, Log Avg loss: 0.00008946, Global Avg Loss: 0.00289736, Time: 0.2199 Steps: 173600, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004684, Sample Num: 74944, Cur Loss: 0.00000108, Cur Avg Loss: 0.00006018, Log Avg loss: 0.00006357, Global Avg Loss: 0.00289410, Time: 0.2971 Steps: 173800, Updated lr: 0.000030 Training, Epoch: 0035, Batch: 004884, Sample Num: 78144, Cur Loss: 0.00026898, Cur Avg Loss: 0.00006383, Log Avg loss: 0.00014916, Global Avg Loss: 0.00289094, Time: 0.2554 Steps: 174000, Updated lr: 0.000030 ***** Running evaluation checkpoint-174090 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-174090 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1228.293073, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001926, "eval_total_loss": 2.053488, "eval_acc": 0.999733, "eval_jaccard": 0.988604, "eval_prec": 0.989664, "eval_recall": 0.990132, "eval_f1": 0.989523, "eval_pr_auc": 0.996071, "eval_roc_auc": 0.999458, "eval_fmax": 0.994938, "eval_pmax": 0.997492, "eval_rmax": 0.992397, "eval_tmax": 0.08, "update_flag": false, "test_avg_loss": 0.002379, "test_total_loss": 2.536121, "test_acc": 0.999724, "test_jaccard": 0.98739, "test_prec": 0.988141, "test_recall": 0.989147, "test_f1": 0.98827, "test_pr_auc": 0.994893, "test_roc_auc": 0.999147, "test_fmax": 0.994342, "test_pmax": 0.996728, "test_rmax": 0.991966, "test_tmax": 0.04, "lr": 3.0121114251110216e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0028895781002399013, "train_cur_epoch_loss": 0.33461793990692934, "train_cur_epoch_avg_loss": 6.727340971188769e-05, "train_cur_epoch_time": 1228.2930727005005, "train_cur_epoch_avg_time": 0.2469427166667673, "epoch": 35, "step": 174090} ################################################## Training, Epoch: 0036, Batch: 000110, Sample Num: 1760, Cur Loss: 0.00000004, Cur Avg Loss: 0.00006612, Log Avg loss: 0.00015084, Global Avg Loss: 0.00288780, Time: 0.2167 Steps: 174200, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000310, Sample Num: 4960, Cur Loss: 0.00000445, Cur Avg Loss: 0.00004642, Log Avg loss: 0.00003558, Global Avg Loss: 0.00288452, Time: 0.2191 Steps: 174400, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000510, Sample Num: 8160, Cur Loss: 0.00000561, Cur Avg Loss: 0.00005837, Log Avg loss: 0.00007690, Global Avg Loss: 0.00288131, Time: 0.2193 Steps: 174600, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000710, Sample Num: 11360, Cur Loss: 0.00000045, Cur Avg Loss: 0.00005914, Log Avg loss: 0.00006111, Global Avg Loss: 0.00287808, Time: 0.2370 Steps: 174800, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 000910, Sample Num: 14560, Cur Loss: 0.00001162, Cur Avg Loss: 0.00007000, Log Avg loss: 0.00010855, Global Avg Loss: 0.00287492, Time: 0.2173 Steps: 175000, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001110, Sample Num: 17760, Cur Loss: 0.00000060, Cur Avg Loss: 0.00006980, Log Avg loss: 0.00006890, Global Avg Loss: 0.00287171, Time: 0.2199 Steps: 175200, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001310, Sample Num: 20960, Cur Loss: 0.00000011, Cur Avg Loss: 0.00006655, Log Avg loss: 0.00004850, Global Avg Loss: 0.00286849, Time: 0.2152 Steps: 175400, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001510, Sample Num: 24160, Cur Loss: 0.00000011, Cur Avg Loss: 0.00005948, Log Avg loss: 0.00001317, Global Avg Loss: 0.00286524, Time: 0.2155 Steps: 175600, Updated lr: 0.000030 Training, Epoch: 0036, Batch: 001710, Sample Num: 27360, Cur Loss: 0.00000041, Cur Avg Loss: 0.00005868, Log Avg loss: 0.00005260, Global Avg Loss: 0.00286204, Time: 0.3295 Steps: 175800, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 001910, Sample Num: 30560, Cur Loss: 0.00000277, Cur Avg Loss: 0.00005474, Log Avg loss: 0.00002107, Global Avg Loss: 0.00285881, Time: 0.2952 Steps: 176000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002110, Sample Num: 33760, Cur Loss: 0.00000838, Cur Avg Loss: 0.00005857, Log Avg loss: 0.00009516, Global Avg Loss: 0.00285568, Time: 0.2233 Steps: 176200, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002310, Sample Num: 36960, Cur Loss: 0.00000008, Cur Avg Loss: 0.00006537, Log Avg loss: 0.00013710, Global Avg Loss: 0.00285259, Time: 0.2161 Steps: 176400, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002510, Sample Num: 40160, Cur Loss: 0.00000012, Cur Avg Loss: 0.00006078, Log Avg loss: 0.00000779, Global Avg Loss: 0.00284937, Time: 0.2165 Steps: 176600, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002710, Sample Num: 43360, Cur Loss: 0.00000157, Cur Avg Loss: 0.00005726, Log Avg loss: 0.00001306, Global Avg Loss: 0.00284616, Time: 0.2192 Steps: 176800, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 002910, Sample Num: 46560, Cur Loss: 0.00021884, Cur Avg Loss: 0.00006107, Log Avg loss: 0.00011275, Global Avg Loss: 0.00284308, Time: 0.2157 Steps: 177000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003110, Sample Num: 49760, Cur Loss: 0.00000595, Cur Avg Loss: 0.00006597, Log Avg loss: 0.00013716, Global Avg Loss: 0.00284002, Time: 0.3645 Steps: 177200, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003310, Sample Num: 52960, Cur Loss: 0.00000023, Cur Avg Loss: 0.00006447, Log Avg loss: 0.00004113, Global Avg Loss: 0.00283687, Time: 0.2199 Steps: 177400, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003510, Sample Num: 56160, Cur Loss: 0.00004999, Cur Avg Loss: 0.00006412, Log Avg loss: 0.00005834, Global Avg Loss: 0.00283374, Time: 0.6409 Steps: 177600, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003710, Sample Num: 59360, Cur Loss: 0.00000107, Cur Avg Loss: 0.00006218, Log Avg loss: 0.00002826, Global Avg Loss: 0.00283058, Time: 0.1440 Steps: 177800, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 003910, Sample Num: 62560, Cur Loss: 0.00000203, Cur Avg Loss: 0.00006012, Log Avg loss: 0.00002176, Global Avg Loss: 0.00282743, Time: 0.0890 Steps: 178000, Updated lr: 0.000029 Training, Epoch: 0036, Batch: 004110, Sample Num: 65760, Cur Loss: 0.00000076, Cur Avg Loss: 0.00005825, Log Avg loss: 0.00002171, Global Avg Loss: 0.00282428, Time: 0.2158 Steps: 178200, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004310, Sample Num: 68960, Cur Loss: 0.00000291, Cur Avg Loss: 0.00005835, Log Avg loss: 0.00006040, Global Avg Loss: 0.00282118, Time: 0.2167 Steps: 178400, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004510, Sample Num: 72160, Cur Loss: 0.00000012, Cur Avg Loss: 0.00005961, Log Avg loss: 0.00008677, Global Avg Loss: 0.00281812, Time: 0.2189 Steps: 178600, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004710, Sample Num: 75360, Cur Loss: 0.00000847, Cur Avg Loss: 0.00006156, Log Avg loss: 0.00010565, Global Avg Loss: 0.00281508, Time: 0.4282 Steps: 178800, Updated lr: 0.000028 Training, Epoch: 0036, Batch: 004910, Sample Num: 78560, Cur Loss: 0.00000108, Cur Avg Loss: 0.00006344, Log Avg loss: 0.00010756, Global Avg Loss: 0.00281206, Time: 0.3745 Steps: 179000, Updated lr: 0.000028 ***** Running evaluation checkpoint-179064 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-179064 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1228.769714, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001898, "eval_total_loss": 2.022989, "eval_acc": 0.999735, "eval_jaccard": 0.988693, "eval_prec": 0.989725, "eval_recall": 0.990278, "eval_f1": 0.989623, "eval_pr_auc": 0.996232, "eval_roc_auc": 0.999482, "eval_fmax": 0.994967, "eval_pmax": 0.997374, "eval_rmax": 0.992571, "eval_tmax": 0.1, "update_flag": false, "test_avg_loss": 0.002365, "test_total_loss": 2.520804, "test_acc": 0.99972, "test_jaccard": 0.987194, "test_prec": 0.988014, "test_recall": 0.98902, "test_f1": 0.988121, "test_pr_auc": 0.994819, "test_roc_auc": 0.999147, "test_fmax": 0.994279, "test_pmax": 0.996218, "test_rmax": 0.992347, "test_tmax": 0.03, "lr": 2.811303996770287e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0028111346725483953, "train_cur_epoch_loss": 0.32636753444119737, "train_cur_epoch_avg_loss": 6.561470334563679e-05, "train_cur_epoch_time": 1228.7697143554688, "train_cur_epoch_avg_time": 0.24703854329623418, "epoch": 36, "step": 179064} ################################################## Training, Epoch: 0037, Batch: 000136, Sample Num: 2176, Cur Loss: 0.00000356, Cur Avg Loss: 0.00006004, Log Avg loss: 0.00011530, Global Avg Loss: 0.00280905, Time: 0.2100 Steps: 179200, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000336, Sample Num: 5376, Cur Loss: 0.00000184, Cur Avg Loss: 0.00004199, Log Avg loss: 0.00002971, Global Avg Loss: 0.00280595, Time: 0.2306 Steps: 179400, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000536, Sample Num: 8576, Cur Loss: 0.00000136, Cur Avg Loss: 0.00005884, Log Avg loss: 0.00008715, Global Avg Loss: 0.00280292, Time: 0.3654 Steps: 179600, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000736, Sample Num: 11776, Cur Loss: 0.00000036, Cur Avg Loss: 0.00005600, Log Avg loss: 0.00004837, Global Avg Loss: 0.00279986, Time: 0.2534 Steps: 179800, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 000936, Sample Num: 14976, Cur Loss: 0.00000098, Cur Avg Loss: 0.00005657, Log Avg loss: 0.00005869, Global Avg Loss: 0.00279681, Time: 0.2169 Steps: 180000, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 001136, Sample Num: 18176, Cur Loss: 0.00000071, Cur Avg Loss: 0.00005933, Log Avg loss: 0.00007226, Global Avg Loss: 0.00279379, Time: 0.2193 Steps: 180200, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 001336, Sample Num: 21376, Cur Loss: 0.00000053, Cur Avg Loss: 0.00005879, Log Avg loss: 0.00005573, Global Avg Loss: 0.00279075, Time: 0.5504 Steps: 180400, Updated lr: 0.000028 Training, Epoch: 0037, Batch: 001536, Sample Num: 24576, Cur Loss: 0.00000004, Cur Avg Loss: 0.00005420, Log Avg loss: 0.00002350, Global Avg Loss: 0.00278769, Time: 0.2473 Steps: 180600, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 001736, Sample Num: 27776, Cur Loss: 0.00000230, Cur Avg Loss: 0.00005470, Log Avg loss: 0.00005851, Global Avg Loss: 0.00278467, Time: 0.3022 Steps: 180800, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 001936, Sample Num: 30976, Cur Loss: 0.00000206, Cur Avg Loss: 0.00004950, Log Avg loss: 0.00000438, Global Avg Loss: 0.00278160, Time: 0.3358 Steps: 181000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002136, Sample Num: 34176, Cur Loss: 0.00000125, Cur Avg Loss: 0.00005605, Log Avg loss: 0.00011946, Global Avg Loss: 0.00277866, Time: 0.2171 Steps: 181200, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002336, Sample Num: 37376, Cur Loss: 0.00000006, Cur Avg Loss: 0.00006120, Log Avg loss: 0.00011622, Global Avg Loss: 0.00277572, Time: 0.2305 Steps: 181400, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002536, Sample Num: 40576, Cur Loss: 0.00000045, Cur Avg Loss: 0.00005719, Log Avg loss: 0.00001040, Global Avg Loss: 0.00277268, Time: 0.2494 Steps: 181600, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002736, Sample Num: 43776, Cur Loss: 0.00000028, Cur Avg Loss: 0.00005497, Log Avg loss: 0.00002673, Global Avg Loss: 0.00276966, Time: 0.2168 Steps: 181800, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 002936, Sample Num: 46976, Cur Loss: 0.00000100, Cur Avg Loss: 0.00005882, Log Avg loss: 0.00011157, Global Avg Loss: 0.00276673, Time: 0.3851 Steps: 182000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003136, Sample Num: 50176, Cur Loss: 0.00001153, Cur Avg Loss: 0.00006140, Log Avg loss: 0.00009924, Global Avg Loss: 0.00276381, Time: 0.2420 Steps: 182200, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003336, Sample Num: 53376, Cur Loss: 0.00000261, Cur Avg Loss: 0.00006134, Log Avg loss: 0.00006040, Global Avg Loss: 0.00276084, Time: 0.2179 Steps: 182400, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003536, Sample Num: 56576, Cur Loss: 0.00000160, Cur Avg Loss: 0.00006031, Log Avg loss: 0.00004304, Global Avg Loss: 0.00275787, Time: 0.2169 Steps: 182600, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003736, Sample Num: 59776, Cur Loss: 0.00000093, Cur Avg Loss: 0.00005789, Log Avg loss: 0.00001518, Global Avg Loss: 0.00275486, Time: 0.2173 Steps: 182800, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 003936, Sample Num: 62976, Cur Loss: 0.00000018, Cur Avg Loss: 0.00005598, Log Avg loss: 0.00002038, Global Avg Loss: 0.00275188, Time: 0.1291 Steps: 183000, Updated lr: 0.000027 Training, Epoch: 0037, Batch: 004136, Sample Num: 66176, Cur Loss: 0.00000039, Cur Avg Loss: 0.00005377, Log Avg loss: 0.00001015, Global Avg Loss: 0.00274888, Time: 0.2198 Steps: 183200, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004336, Sample Num: 69376, Cur Loss: 0.00000426, Cur Avg Loss: 0.00005305, Log Avg loss: 0.00003827, Global Avg Loss: 0.00274593, Time: 0.3857 Steps: 183400, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004536, Sample Num: 72576, Cur Loss: 0.00000211, Cur Avg Loss: 0.00005447, Log Avg loss: 0.00008530, Global Avg Loss: 0.00274303, Time: 0.2148 Steps: 183600, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004736, Sample Num: 75776, Cur Loss: 0.00000339, Cur Avg Loss: 0.00005543, Log Avg loss: 0.00007702, Global Avg Loss: 0.00274013, Time: 0.2410 Steps: 183800, Updated lr: 0.000026 Training, Epoch: 0037, Batch: 004936, Sample Num: 78976, Cur Loss: 0.00000215, Cur Avg Loss: 0.00005619, Log Avg loss: 0.00007416, Global Avg Loss: 0.00273723, Time: 0.2190 Steps: 184000, Updated lr: 0.000026 ***** Running evaluation checkpoint-184038 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-184038 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1264.766596, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001952, "eval_total_loss": 2.080402, "eval_acc": 0.999731, "eval_jaccard": 0.988492, "eval_prec": 0.989591, "eval_recall": 0.990035, "eval_f1": 0.989438, "eval_pr_auc": 0.996031, "eval_roc_auc": 0.999441, "eval_fmax": 0.994833, "eval_pmax": 0.997976, "eval_rmax": 0.99171, "eval_tmax": 0.18, "update_flag": false, "test_avg_loss": 0.00242, "test_total_loss": 2.579522, "test_acc": 0.999723, "test_jaccard": 0.98739, "test_prec": 0.98822, "test_recall": 0.989068, "test_f1": 0.98827, "test_pr_auc": 0.994921, "test_roc_auc": 0.999126, "test_fmax": 0.994288, "test_pmax": 0.997173, "test_rmax": 0.991419, "test_tmax": 0.08, "lr": 2.610496568429552e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0027367342145545896, "train_cur_epoch_loss": 0.29007237299172206, "train_cur_epoch_avg_loss": 5.831772677758787e-05, "train_cur_epoch_time": 1264.7665956020355, "train_cur_epoch_avg_time": 0.2542755519907591, "epoch": 37, "step": 184038} ################################################## Training, Epoch: 0038, Batch: 000162, Sample Num: 2592, Cur Loss: 0.00000733, Cur Avg Loss: 0.00003465, Log Avg loss: 0.00009177, Global Avg Loss: 0.00273436, Time: 0.2151 Steps: 184200, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000362, Sample Num: 5792, Cur Loss: 0.00000088, Cur Avg Loss: 0.00002779, Log Avg loss: 0.00002223, Global Avg Loss: 0.00273142, Time: 0.2596 Steps: 184400, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000562, Sample Num: 8992, Cur Loss: 0.00000002, Cur Avg Loss: 0.00005060, Log Avg loss: 0.00009189, Global Avg Loss: 0.00272856, Time: 0.2215 Steps: 184600, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000762, Sample Num: 12192, Cur Loss: 0.00000064, Cur Avg Loss: 0.00005669, Log Avg loss: 0.00007381, Global Avg Loss: 0.00272568, Time: 0.2064 Steps: 184800, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 000962, Sample Num: 15392, Cur Loss: 0.00000109, Cur Avg Loss: 0.00005638, Log Avg loss: 0.00005518, Global Avg Loss: 0.00272280, Time: 0.4117 Steps: 185000, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 001162, Sample Num: 18592, Cur Loss: 0.00000238, Cur Avg Loss: 0.00006273, Log Avg loss: 0.00009330, Global Avg Loss: 0.00271996, Time: 0.2181 Steps: 185200, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 001362, Sample Num: 21792, Cur Loss: 0.00000128, Cur Avg Loss: 0.00006018, Log Avg loss: 0.00004538, Global Avg Loss: 0.00271707, Time: 0.2196 Steps: 185400, Updated lr: 0.000026 Training, Epoch: 0038, Batch: 001562, Sample Num: 24992, Cur Loss: 0.00000088, Cur Avg Loss: 0.00005906, Log Avg loss: 0.00005137, Global Avg Loss: 0.00271420, Time: 0.2258 Steps: 185600, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 001762, Sample Num: 28192, Cur Loss: 0.00000022, Cur Avg Loss: 0.00005802, Log Avg loss: 0.00004998, Global Avg Loss: 0.00271133, Time: 0.2164 Steps: 185800, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 001962, Sample Num: 31392, Cur Loss: 0.00000002, Cur Avg Loss: 0.00005319, Log Avg loss: 0.00001055, Global Avg Loss: 0.00270843, Time: 0.2177 Steps: 186000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002162, Sample Num: 34592, Cur Loss: 0.00002428, Cur Avg Loss: 0.00006139, Log Avg loss: 0.00014191, Global Avg Loss: 0.00270567, Time: 0.2430 Steps: 186200, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002362, Sample Num: 37792, Cur Loss: 0.00000179, Cur Avg Loss: 0.00006448, Log Avg loss: 0.00009785, Global Avg Loss: 0.00270287, Time: 0.3705 Steps: 186400, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002562, Sample Num: 40992, Cur Loss: 0.00000006, Cur Avg Loss: 0.00006016, Log Avg loss: 0.00000912, Global Avg Loss: 0.00269999, Time: 0.2175 Steps: 186600, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002762, Sample Num: 44192, Cur Loss: 0.00000069, Cur Avg Loss: 0.00006161, Log Avg loss: 0.00008013, Global Avg Loss: 0.00269718, Time: 0.2050 Steps: 186800, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 002962, Sample Num: 47392, Cur Loss: 0.00000008, Cur Avg Loss: 0.00006349, Log Avg loss: 0.00008958, Global Avg Loss: 0.00269439, Time: 0.2178 Steps: 187000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003162, Sample Num: 50592, Cur Loss: 0.00000949, Cur Avg Loss: 0.00006289, Log Avg loss: 0.00005389, Global Avg Loss: 0.00269157, Time: 0.0947 Steps: 187200, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003362, Sample Num: 53792, Cur Loss: 0.00001436, Cur Avg Loss: 0.00006333, Log Avg loss: 0.00007035, Global Avg Loss: 0.00268877, Time: 0.3679 Steps: 187400, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003562, Sample Num: 56992, Cur Loss: 0.00000921, Cur Avg Loss: 0.00006082, Log Avg loss: 0.00001856, Global Avg Loss: 0.00268593, Time: 0.4098 Steps: 187600, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003762, Sample Num: 60192, Cur Loss: 0.00000081, Cur Avg Loss: 0.00005846, Log Avg loss: 0.00001656, Global Avg Loss: 0.00268308, Time: 0.1935 Steps: 187800, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 003962, Sample Num: 63392, Cur Loss: 0.00000285, Cur Avg Loss: 0.00005684, Log Avg loss: 0.00002621, Global Avg Loss: 0.00268026, Time: 0.3801 Steps: 188000, Updated lr: 0.000025 Training, Epoch: 0038, Batch: 004162, Sample Num: 66592, Cur Loss: 0.00000002, Cur Avg Loss: 0.00005467, Log Avg loss: 0.00001173, Global Avg Loss: 0.00267742, Time: 0.2144 Steps: 188200, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004362, Sample Num: 69792, Cur Loss: 0.00000147, Cur Avg Loss: 0.00005420, Log Avg loss: 0.00004450, Global Avg Loss: 0.00267463, Time: 0.2189 Steps: 188400, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004562, Sample Num: 72992, Cur Loss: 0.00000009, Cur Avg Loss: 0.00005566, Log Avg loss: 0.00008746, Global Avg Loss: 0.00267188, Time: 0.2080 Steps: 188600, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004762, Sample Num: 76192, Cur Loss: 0.00000121, Cur Avg Loss: 0.00005689, Log Avg loss: 0.00008489, Global Avg Loss: 0.00266914, Time: 0.2997 Steps: 188800, Updated lr: 0.000024 Training, Epoch: 0038, Batch: 004962, Sample Num: 79392, Cur Loss: 0.00000347, Cur Avg Loss: 0.00005969, Log Avg loss: 0.00012642, Global Avg Loss: 0.00266645, Time: 0.2153 Steps: 189000, Updated lr: 0.000024 ***** Running evaluation checkpoint-189012 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-189012 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1206.276327, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001959, "eval_total_loss": 2.088305, "eval_acc": 0.999736, "eval_jaccard": 0.988697, "eval_prec": 0.989738, "eval_recall": 0.99028, "eval_f1": 0.989629, "eval_pr_auc": 0.996064, "eval_roc_auc": 0.999448, "eval_fmax": 0.994765, "eval_pmax": 0.997769, "eval_rmax": 0.991778, "eval_tmax": 0.17, "update_flag": false, "test_avg_loss": 0.002433, "test_total_loss": 2.594, "test_acc": 0.999719, "test_jaccard": 0.98714, "test_prec": 0.987941, "test_recall": 0.989044, "test_f1": 0.988079, "test_pr_auc": 0.99479, "test_roc_auc": 0.999128, "test_fmax": 0.994159, "test_pmax": 0.996697, "test_rmax": 0.991634, "test_tmax": 0.05, "lr": 2.4096891400888173e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0026662909457511067, "train_cur_epoch_loss": 0.2978928601086668, "train_cur_epoch_avg_loss": 5.989000002184696e-05, "train_cur_epoch_time": 1206.2763266563416, "train_cur_epoch_avg_time": 0.24251635035310445, "epoch": 38, "step": 189012} ################################################## Training, Epoch: 0039, Batch: 000188, Sample Num: 3008, Cur Loss: 0.00000002, Cur Avg Loss: 0.00004116, Log Avg loss: 0.00004724, Global Avg Loss: 0.00266368, Time: 0.3625 Steps: 189200, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000388, Sample Num: 6208, Cur Loss: 0.00000018, Cur Avg Loss: 0.00004669, Log Avg loss: 0.00005189, Global Avg Loss: 0.00266092, Time: 0.2180 Steps: 189400, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000588, Sample Num: 9408, Cur Loss: 0.00000066, Cur Avg Loss: 0.00004266, Log Avg loss: 0.00003484, Global Avg Loss: 0.00265815, Time: 0.2194 Steps: 189600, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000788, Sample Num: 12608, Cur Loss: 0.00000549, Cur Avg Loss: 0.00005168, Log Avg loss: 0.00007821, Global Avg Loss: 0.00265544, Time: 0.3935 Steps: 189800, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 000988, Sample Num: 15808, Cur Loss: 0.00000083, Cur Avg Loss: 0.00004963, Log Avg loss: 0.00004154, Global Avg Loss: 0.00265268, Time: 0.3122 Steps: 190000, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 001188, Sample Num: 19008, Cur Loss: 0.00000157, Cur Avg Loss: 0.00005436, Log Avg loss: 0.00007774, Global Avg Loss: 0.00264998, Time: 0.2190 Steps: 190200, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 001388, Sample Num: 22208, Cur Loss: 0.00000109, Cur Avg Loss: 0.00005210, Log Avg loss: 0.00003867, Global Avg Loss: 0.00264723, Time: 0.2254 Steps: 190400, Updated lr: 0.000024 Training, Epoch: 0039, Batch: 001588, Sample Num: 25408, Cur Loss: 0.00000050, Cur Avg Loss: 0.00005186, Log Avg loss: 0.00005016, Global Avg Loss: 0.00264451, Time: 0.3401 Steps: 190600, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 001788, Sample Num: 28608, Cur Loss: 0.00000038, Cur Avg Loss: 0.00005022, Log Avg loss: 0.00003725, Global Avg Loss: 0.00264178, Time: 0.2128 Steps: 190800, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 001988, Sample Num: 31808, Cur Loss: 0.00000694, Cur Avg Loss: 0.00004609, Log Avg loss: 0.00000913, Global Avg Loss: 0.00263902, Time: 0.2547 Steps: 191000, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002188, Sample Num: 35008, Cur Loss: 0.00000086, Cur Avg Loss: 0.00005050, Log Avg loss: 0.00009439, Global Avg Loss: 0.00263636, Time: 0.3707 Steps: 191200, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002388, Sample Num: 38208, Cur Loss: 0.00000045, Cur Avg Loss: 0.00005033, Log Avg loss: 0.00004839, Global Avg Loss: 0.00263365, Time: 0.2085 Steps: 191400, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002588, Sample Num: 41408, Cur Loss: 0.00000001, Cur Avg Loss: 0.00004689, Log Avg loss: 0.00000583, Global Avg Loss: 0.00263091, Time: 0.1420 Steps: 191600, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002788, Sample Num: 44608, Cur Loss: 0.00000186, Cur Avg Loss: 0.00004683, Log Avg loss: 0.00004607, Global Avg Loss: 0.00262821, Time: 0.4063 Steps: 191800, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 002988, Sample Num: 47808, Cur Loss: 0.00001839, Cur Avg Loss: 0.00004885, Log Avg loss: 0.00007703, Global Avg Loss: 0.00262556, Time: 0.2208 Steps: 192000, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003188, Sample Num: 51008, Cur Loss: 0.00000012, Cur Avg Loss: 0.00004910, Log Avg loss: 0.00005288, Global Avg Loss: 0.00262288, Time: 0.2255 Steps: 192200, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003388, Sample Num: 54208, Cur Loss: 0.00000015, Cur Avg Loss: 0.00005099, Log Avg loss: 0.00008112, Global Avg Loss: 0.00262024, Time: 0.2585 Steps: 192400, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003588, Sample Num: 57408, Cur Loss: 0.00000089, Cur Avg Loss: 0.00004891, Log Avg loss: 0.00001368, Global Avg Loss: 0.00261753, Time: 0.2171 Steps: 192600, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003788, Sample Num: 60608, Cur Loss: 0.00000028, Cur Avg Loss: 0.00004724, Log Avg loss: 0.00001726, Global Avg Loss: 0.00261483, Time: 0.2160 Steps: 192800, Updated lr: 0.000023 Training, Epoch: 0039, Batch: 003988, Sample Num: 63808, Cur Loss: 0.00000056, Cur Avg Loss: 0.00004570, Log Avg loss: 0.00001638, Global Avg Loss: 0.00261214, Time: 0.2163 Steps: 193000, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004188, Sample Num: 67008, Cur Loss: 0.00000018, Cur Avg Loss: 0.00004444, Log Avg loss: 0.00001944, Global Avg Loss: 0.00260946, Time: 0.2160 Steps: 193200, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004388, Sample Num: 70208, Cur Loss: 0.00000018, Cur Avg Loss: 0.00004378, Log Avg loss: 0.00002989, Global Avg Loss: 0.00260679, Time: 0.2173 Steps: 193400, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004588, Sample Num: 73408, Cur Loss: 0.00000031, Cur Avg Loss: 0.00004522, Log Avg loss: 0.00007674, Global Avg Loss: 0.00260418, Time: 0.3221 Steps: 193600, Updated lr: 0.000022 Training, Epoch: 0039, Batch: 004788, Sample Num: 76608, Cur Loss: 0.00000005, Cur Avg Loss: 0.00004682, Log Avg loss: 0.00008363, Global Avg Loss: 0.00260157, Time: 0.2169 Steps: 193800, Updated lr: 0.000022 ***** Running evaluation checkpoint-193986 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-193986 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1214.396462, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001949, "eval_total_loss": 2.07742, "eval_acc": 0.999735, "eval_jaccard": 0.988693, "eval_prec": 0.989764, "eval_recall": 0.990238, "eval_f1": 0.989627, "eval_pr_auc": 0.996123, "eval_roc_auc": 0.999462, "eval_fmax": 0.994778, "eval_pmax": 0.997187, "eval_rmax": 0.99238, "eval_tmax": 0.06, "update_flag": false, "test_avg_loss": 0.002436, "test_total_loss": 2.597292, "test_acc": 0.999723, "test_jaccard": 0.987409, "test_prec": 0.9882, "test_recall": 0.989166, "test_f1": 0.988305, "test_pr_auc": 0.99488, "test_roc_auc": 0.99913, "test_fmax": 0.994244, "test_pmax": 0.99679, "test_rmax": 0.991712, "test_tmax": 0.06, "lr": 2.2088817117480825e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0025992102470543794, "train_cur_epoch_loss": 0.24941474678318362, "train_cur_epoch_avg_loss": 5.0143696578846725e-05, "train_cur_epoch_time": 1214.3964624404907, "train_cur_epoch_avg_time": 0.24414886659438897, "epoch": 39, "step": 193986} ################################################## Training, Epoch: 0040, Batch: 000014, Sample Num: 224, Cur Loss: 0.00000105, Cur Avg Loss: 0.00000357, Log Avg loss: 0.00012646, Global Avg Loss: 0.00259902, Time: 0.2146 Steps: 194000, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000214, Sample Num: 3424, Cur Loss: 0.00000001, Cur Avg Loss: 0.00004219, Log Avg loss: 0.00004489, Global Avg Loss: 0.00259639, Time: 0.2715 Steps: 194200, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000414, Sample Num: 6624, Cur Loss: 0.00000001, Cur Avg Loss: 0.00005584, Log Avg loss: 0.00007045, Global Avg Loss: 0.00259379, Time: 0.2158 Steps: 194400, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000614, Sample Num: 9824, Cur Loss: 0.00000094, Cur Avg Loss: 0.00004482, Log Avg loss: 0.00002202, Global Avg Loss: 0.00259115, Time: 0.2117 Steps: 194600, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 000814, Sample Num: 13024, Cur Loss: 0.00001409, Cur Avg Loss: 0.00004655, Log Avg loss: 0.00005184, Global Avg Loss: 0.00258854, Time: 0.2209 Steps: 194800, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001014, Sample Num: 16224, Cur Loss: 0.00000122, Cur Avg Loss: 0.00004825, Log Avg loss: 0.00005518, Global Avg Loss: 0.00258595, Time: 0.2464 Steps: 195000, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001214, Sample Num: 19424, Cur Loss: 0.00002510, Cur Avg Loss: 0.00005546, Log Avg loss: 0.00009204, Global Avg Loss: 0.00258339, Time: 0.3389 Steps: 195200, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001414, Sample Num: 22624, Cur Loss: 0.00000224, Cur Avg Loss: 0.00005113, Log Avg loss: 0.00002482, Global Avg Loss: 0.00258077, Time: 0.3907 Steps: 195400, Updated lr: 0.000022 Training, Epoch: 0040, Batch: 001614, Sample Num: 25824, Cur Loss: 0.00000129, Cur Avg Loss: 0.00004860, Log Avg loss: 0.00003074, Global Avg Loss: 0.00257816, Time: 0.2073 Steps: 195600, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 001814, Sample Num: 29024, Cur Loss: 0.00004327, Cur Avg Loss: 0.00004980, Log Avg loss: 0.00005946, Global Avg Loss: 0.00257559, Time: 0.2266 Steps: 195800, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002014, Sample Num: 32224, Cur Loss: 0.00000058, Cur Avg Loss: 0.00004807, Log Avg loss: 0.00003240, Global Avg Loss: 0.00257300, Time: 0.2175 Steps: 196000, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002214, Sample Num: 35424, Cur Loss: 0.00000033, Cur Avg Loss: 0.00005060, Log Avg loss: 0.00007609, Global Avg Loss: 0.00257045, Time: 0.2221 Steps: 196200, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002414, Sample Num: 38624, Cur Loss: 0.00000293, Cur Avg Loss: 0.00005207, Log Avg loss: 0.00006828, Global Avg Loss: 0.00256790, Time: 0.2168 Steps: 196400, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002614, Sample Num: 41824, Cur Loss: 0.00000002, Cur Avg Loss: 0.00004917, Log Avg loss: 0.00001418, Global Avg Loss: 0.00256530, Time: 0.4477 Steps: 196600, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 002814, Sample Num: 45024, Cur Loss: 0.00000016, Cur Avg Loss: 0.00005063, Log Avg loss: 0.00006978, Global Avg Loss: 0.00256277, Time: 0.2160 Steps: 196800, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003014, Sample Num: 48224, Cur Loss: 0.00000095, Cur Avg Loss: 0.00005200, Log Avg loss: 0.00007119, Global Avg Loss: 0.00256024, Time: 0.2759 Steps: 197000, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003214, Sample Num: 51424, Cur Loss: 0.00002241, Cur Avg Loss: 0.00005120, Log Avg loss: 0.00003925, Global Avg Loss: 0.00255768, Time: 0.2189 Steps: 197200, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003414, Sample Num: 54624, Cur Loss: 0.00000023, Cur Avg Loss: 0.00005270, Log Avg loss: 0.00007674, Global Avg Loss: 0.00255517, Time: 0.2115 Steps: 197400, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003614, Sample Num: 57824, Cur Loss: 0.00000121, Cur Avg Loss: 0.00005146, Log Avg loss: 0.00003032, Global Avg Loss: 0.00255261, Time: 0.2179 Steps: 197600, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 003814, Sample Num: 61024, Cur Loss: 0.00000104, Cur Avg Loss: 0.00004912, Log Avg loss: 0.00000687, Global Avg Loss: 0.00255004, Time: 0.2688 Steps: 197800, Updated lr: 0.000021 Training, Epoch: 0040, Batch: 004014, Sample Num: 64224, Cur Loss: 0.00000005, Cur Avg Loss: 0.00004900, Log Avg loss: 0.00004657, Global Avg Loss: 0.00254751, Time: 0.1548 Steps: 198000, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004214, Sample Num: 67424, Cur Loss: 0.00000182, Cur Avg Loss: 0.00004874, Log Avg loss: 0.00004365, Global Avg Loss: 0.00254498, Time: 0.2069 Steps: 198200, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004414, Sample Num: 70624, Cur Loss: 0.00000010, Cur Avg Loss: 0.00004784, Log Avg loss: 0.00002887, Global Avg Loss: 0.00254245, Time: 0.1776 Steps: 198400, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004614, Sample Num: 73824, Cur Loss: 0.00000150, Cur Avg Loss: 0.00005043, Log Avg loss: 0.00010745, Global Avg Loss: 0.00254000, Time: 0.3412 Steps: 198600, Updated lr: 0.000020 Training, Epoch: 0040, Batch: 004814, Sample Num: 77024, Cur Loss: 0.00000301, Cur Avg Loss: 0.00005442, Log Avg loss: 0.00014646, Global Avg Loss: 0.00253759, Time: 0.2121 Steps: 198800, Updated lr: 0.000020 ***** Running evaluation checkpoint-198960 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-198960 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1256.021803, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001911, "eval_total_loss": 2.037426, "eval_acc": 0.999734, "eval_jaccard": 0.989006, "eval_prec": 0.990048, "eval_recall": 0.990561, "eval_f1": 0.989931, "eval_pr_auc": 0.995919, "eval_roc_auc": 0.999459, "eval_fmax": 0.994949, "eval_pmax": 0.997343, "eval_rmax": 0.992566, "eval_tmax": 0.09, "update_flag": true, "test_avg_loss": 0.002417, "test_total_loss": 2.576387, "test_acc": 0.999721, "test_jaccard": 0.987411, "test_prec": 0.988213, "test_recall": 0.989161, "test_f1": 0.988302, "test_pr_auc": 0.994757, "test_roc_auc": 0.999132, "test_fmax": 0.994362, "test_pmax": 0.996552, "test_rmax": 0.992181, "test_tmax": 0.04, "lr": 2.0080742834073476e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0025356254291025193, "train_cur_epoch_loss": 0.27763638914522193, "train_cur_epoch_avg_loss": 5.581752897973903e-05, "train_cur_epoch_time": 1256.0218031406403, "train_cur_epoch_avg_time": 0.2525174513752795, "epoch": 40, "step": 198960} ################################################## Training, Epoch: 0041, Batch: 000040, Sample Num: 640, Cur Loss: 0.00000194, Cur Avg Loss: 0.00001355, Log Avg loss: 0.00008110, Global Avg Loss: 0.00253512, Time: 0.2175 Steps: 199000, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00000110, Cur Avg Loss: 0.00003124, Log Avg loss: 0.00003478, Global Avg Loss: 0.00253261, Time: 0.2196 Steps: 199200, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000440, Sample Num: 7040, Cur Loss: 0.00004990, Cur Avg Loss: 0.00005075, Log Avg loss: 0.00007418, Global Avg Loss: 0.00253014, Time: 0.2921 Steps: 199400, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000640, Sample Num: 10240, Cur Loss: 0.00000288, Cur Avg Loss: 0.00004343, Log Avg loss: 0.00002733, Global Avg Loss: 0.00252763, Time: 0.2424 Steps: 199600, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 000840, Sample Num: 13440, Cur Loss: 0.00000013, Cur Avg Loss: 0.00005092, Log Avg loss: 0.00007487, Global Avg Loss: 0.00252518, Time: 0.0853 Steps: 199800, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00000014, Cur Avg Loss: 0.00004675, Log Avg loss: 0.00002925, Global Avg Loss: 0.00252268, Time: 0.2157 Steps: 200000, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 001240, Sample Num: 19840, Cur Loss: 0.00000076, Cur Avg Loss: 0.00005548, Log Avg loss: 0.00010090, Global Avg Loss: 0.00252026, Time: 0.2188 Steps: 200200, Updated lr: 0.000020 Training, Epoch: 0041, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00009427, Cur Avg Loss: 0.00004900, Log Avg loss: 0.00000878, Global Avg Loss: 0.00251776, Time: 0.4256 Steps: 200400, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00000020, Cur Avg Loss: 0.00004864, Log Avg loss: 0.00004608, Global Avg Loss: 0.00251529, Time: 0.0855 Steps: 200600, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 001840, Sample Num: 29440, Cur Loss: 0.00000075, Cur Avg Loss: 0.00004590, Log Avg loss: 0.00002338, Global Avg Loss: 0.00251281, Time: 0.2169 Steps: 200800, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00000015, Cur Avg Loss: 0.00004791, Log Avg loss: 0.00006641, Global Avg Loss: 0.00251038, Time: 0.1950 Steps: 201000, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002240, Sample Num: 35840, Cur Loss: 0.00000010, Cur Avg Loss: 0.00005046, Log Avg loss: 0.00007654, Global Avg Loss: 0.00250796, Time: 0.2223 Steps: 201200, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002440, Sample Num: 39040, Cur Loss: 0.00000354, Cur Avg Loss: 0.00005016, Log Avg loss: 0.00004681, Global Avg Loss: 0.00250551, Time: 0.2833 Steps: 201400, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002640, Sample Num: 42240, Cur Loss: 0.00001387, Cur Avg Loss: 0.00004754, Log Avg loss: 0.00001547, Global Avg Loss: 0.00250304, Time: 0.1448 Steps: 201600, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00000008, Cur Avg Loss: 0.00004742, Log Avg loss: 0.00004584, Global Avg Loss: 0.00250061, Time: 0.3214 Steps: 201800, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00000001, Cur Avg Loss: 0.00005028, Log Avg loss: 0.00009088, Global Avg Loss: 0.00249822, Time: 0.2535 Steps: 202000, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003240, Sample Num: 51840, Cur Loss: 0.00000002, Cur Avg Loss: 0.00004902, Log Avg loss: 0.00002997, Global Avg Loss: 0.00249578, Time: 0.2173 Steps: 202200, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003440, Sample Num: 55040, Cur Loss: 0.00000044, Cur Avg Loss: 0.00004967, Log Avg loss: 0.00006011, Global Avg Loss: 0.00249337, Time: 0.2166 Steps: 202400, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003640, Sample Num: 58240, Cur Loss: 0.00000009, Cur Avg Loss: 0.00004764, Log Avg loss: 0.00001270, Global Avg Loss: 0.00249093, Time: 0.2172 Steps: 202600, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 003840, Sample Num: 61440, Cur Loss: 0.00001235, Cur Avg Loss: 0.00004596, Log Avg loss: 0.00001540, Global Avg Loss: 0.00248848, Time: 0.2398 Steps: 202800, Updated lr: 0.000019 Training, Epoch: 0041, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00000664, Cur Avg Loss: 0.00004551, Log Avg loss: 0.00003697, Global Avg Loss: 0.00248607, Time: 0.2160 Steps: 203000, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004240, Sample Num: 67840, Cur Loss: 0.00000012, Cur Avg Loss: 0.00004493, Log Avg loss: 0.00003322, Global Avg Loss: 0.00248365, Time: 0.2170 Steps: 203200, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004440, Sample Num: 71040, Cur Loss: 0.00000224, Cur Avg Loss: 0.00004354, Log Avg loss: 0.00001395, Global Avg Loss: 0.00248123, Time: 0.2207 Steps: 203400, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00001446, Cur Avg Loss: 0.00004453, Log Avg loss: 0.00006666, Global Avg Loss: 0.00247885, Time: 0.2529 Steps: 203600, Updated lr: 0.000018 Training, Epoch: 0041, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00000021, Cur Avg Loss: 0.00004630, Log Avg loss: 0.00008728, Global Avg Loss: 0.00247651, Time: 0.2173 Steps: 203800, Updated lr: 0.000018 ***** Running evaluation checkpoint-203934 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-203934 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1211.242329, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001953, "eval_total_loss": 2.082374, "eval_acc": 0.999733, "eval_jaccard": 0.988718, "eval_prec": 0.989799, "eval_recall": 0.990346, "eval_f1": 0.989681, "eval_pr_auc": 0.996076, "eval_roc_auc": 0.999448, "eval_fmax": 0.994856, "eval_pmax": 0.99694, "eval_rmax": 0.992781, "eval_tmax": 0.04, "update_flag": false, "test_avg_loss": 0.002473, "test_total_loss": 2.636271, "test_acc": 0.999716, "test_jaccard": 0.987087, "test_prec": 0.987956, "test_recall": 0.988853, "test_f1": 0.988006, "test_pr_auc": 0.994827, "test_roc_auc": 0.999117, "test_fmax": 0.994293, "test_pmax": 0.997223, "test_rmax": 0.99138, "test_tmax": 0.08, "lr": 1.807266855066613e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00247495321254885, "train_cur_epoch_loss": 0.23907307369796216, "train_cur_epoch_avg_loss": 4.8064550401681174e-05, "train_cur_epoch_time": 1211.242329120636, "train_cur_epoch_avg_time": 0.24351474248504945, "epoch": 41, "step": 203934} ################################################## Training, Epoch: 0042, Batch: 000066, Sample Num: 1056, Cur Loss: 0.00002395, Cur Avg Loss: 0.00001065, Log Avg loss: 0.00007842, Global Avg Loss: 0.00247416, Time: 0.1060 Steps: 204000, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000266, Sample Num: 4256, Cur Loss: 0.00000179, Cur Avg Loss: 0.00002441, Log Avg loss: 0.00002895, Global Avg Loss: 0.00247176, Time: 0.2157 Steps: 204200, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000466, Sample Num: 7456, Cur Loss: 0.00000333, Cur Avg Loss: 0.00004023, Log Avg loss: 0.00006128, Global Avg Loss: 0.00246940, Time: 0.2172 Steps: 204400, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000666, Sample Num: 10656, Cur Loss: 0.00000100, Cur Avg Loss: 0.00003527, Log Avg loss: 0.00002370, Global Avg Loss: 0.00246701, Time: 0.3916 Steps: 204600, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 000866, Sample Num: 13856, Cur Loss: 0.00000025, Cur Avg Loss: 0.00004966, Log Avg loss: 0.00009760, Global Avg Loss: 0.00246470, Time: 0.2128 Steps: 204800, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 001066, Sample Num: 17056, Cur Loss: 0.00000303, Cur Avg Loss: 0.00004520, Log Avg loss: 0.00002588, Global Avg Loss: 0.00246232, Time: 0.2184 Steps: 205000, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 001266, Sample Num: 20256, Cur Loss: 0.00000328, Cur Avg Loss: 0.00004796, Log Avg loss: 0.00006269, Global Avg Loss: 0.00245998, Time: 0.3834 Steps: 205200, Updated lr: 0.000018 Training, Epoch: 0042, Batch: 001466, Sample Num: 23456, Cur Loss: 0.00000021, Cur Avg Loss: 0.00004315, Log Avg loss: 0.00001266, Global Avg Loss: 0.00245760, Time: 0.3493 Steps: 205400, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 001666, Sample Num: 26656, Cur Loss: 0.00000419, Cur Avg Loss: 0.00004358, Log Avg loss: 0.00004675, Global Avg Loss: 0.00245525, Time: 0.2162 Steps: 205600, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 001866, Sample Num: 29856, Cur Loss: 0.00000370, Cur Avg Loss: 0.00004185, Log Avg loss: 0.00002744, Global Avg Loss: 0.00245289, Time: 0.2153 Steps: 205800, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002066, Sample Num: 33056, Cur Loss: 0.00282018, Cur Avg Loss: 0.00004412, Log Avg loss: 0.00006527, Global Avg Loss: 0.00245057, Time: 0.3164 Steps: 206000, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002266, Sample Num: 36256, Cur Loss: 0.00000027, Cur Avg Loss: 0.00004417, Log Avg loss: 0.00004476, Global Avg Loss: 0.00244824, Time: 0.2188 Steps: 206200, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002466, Sample Num: 39456, Cur Loss: 0.00000011, Cur Avg Loss: 0.00004264, Log Avg loss: 0.00002530, Global Avg Loss: 0.00244589, Time: 0.1679 Steps: 206400, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002666, Sample Num: 42656, Cur Loss: 0.00000000, Cur Avg Loss: 0.00004022, Log Avg loss: 0.00001035, Global Avg Loss: 0.00244354, Time: 0.2742 Steps: 206600, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 002866, Sample Num: 45856, Cur Loss: 0.00000087, Cur Avg Loss: 0.00004159, Log Avg loss: 0.00005979, Global Avg Loss: 0.00244123, Time: 0.2166 Steps: 206800, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003066, Sample Num: 49056, Cur Loss: 0.00000005, Cur Avg Loss: 0.00004470, Log Avg loss: 0.00008933, Global Avg Loss: 0.00243896, Time: 0.2172 Steps: 207000, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003266, Sample Num: 52256, Cur Loss: 0.00000009, Cur Avg Loss: 0.00004251, Log Avg loss: 0.00000901, Global Avg Loss: 0.00243661, Time: 0.2193 Steps: 207200, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003466, Sample Num: 55456, Cur Loss: 0.00000005, Cur Avg Loss: 0.00004352, Log Avg loss: 0.00005988, Global Avg Loss: 0.00243432, Time: 0.2159 Steps: 207400, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003666, Sample Num: 58656, Cur Loss: 0.00000008, Cur Avg Loss: 0.00004205, Log Avg loss: 0.00001670, Global Avg Loss: 0.00243199, Time: 0.1556 Steps: 207600, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 003866, Sample Num: 61856, Cur Loss: 0.00000004, Cur Avg Loss: 0.00004005, Log Avg loss: 0.00000329, Global Avg Loss: 0.00242965, Time: 0.3173 Steps: 207800, Updated lr: 0.000017 Training, Epoch: 0042, Batch: 004066, Sample Num: 65056, Cur Loss: 0.00000024, Cur Avg Loss: 0.00003928, Log Avg loss: 0.00002446, Global Avg Loss: 0.00242734, Time: 0.2156 Steps: 208000, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004266, Sample Num: 68256, Cur Loss: 0.00000024, Cur Avg Loss: 0.00003920, Log Avg loss: 0.00003761, Global Avg Loss: 0.00242504, Time: 0.2160 Steps: 208200, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004466, Sample Num: 71456, Cur Loss: 0.00000358, Cur Avg Loss: 0.00003990, Log Avg loss: 0.00005476, Global Avg Loss: 0.00242277, Time: 0.2174 Steps: 208400, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004666, Sample Num: 74656, Cur Loss: 0.00000029, Cur Avg Loss: 0.00004008, Log Avg loss: 0.00004411, Global Avg Loss: 0.00242049, Time: 0.3629 Steps: 208600, Updated lr: 0.000016 Training, Epoch: 0042, Batch: 004866, Sample Num: 77856, Cur Loss: 0.00000538, Cur Avg Loss: 0.00004388, Log Avg loss: 0.00013242, Global Avg Loss: 0.00241830, Time: 0.2192 Steps: 208800, Updated lr: 0.000016 ***** Running evaluation checkpoint-208908 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-208908 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1208.621109, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001937, "eval_total_loss": 2.065322, "eval_acc": 0.999744, "eval_jaccard": 0.989123, "eval_prec": 0.990058, "eval_recall": 0.990727, "eval_f1": 0.990021, "eval_pr_auc": 0.995959, "eval_roc_auc": 0.999467, "eval_fmax": 0.994792, "eval_pmax": 0.99691, "eval_rmax": 0.992683, "eval_tmax": 0.05, "update_flag": true, "test_avg_loss": 0.002457, "test_total_loss": 2.619151, "test_acc": 0.999716, "test_jaccard": 0.987177, "test_prec": 0.987988, "test_recall": 0.989034, "test_f1": 0.988104, "test_pr_auc": 0.994885, "test_roc_auc": 0.999142, "test_fmax": 0.994146, "test_pmax": 0.997006, "test_rmax": 0.991302, "test_tmax": 0.09, "lr": 1.6064594267258782e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.002417108933723149, "train_cur_epoch_loss": 0.22628467829841103, "train_cur_epoch_avg_loss": 4.5493501869403104e-05, "train_cur_epoch_time": 1208.6211094856262, "train_cur_epoch_avg_time": 0.2429877582399731, "epoch": 42, "step": 208908} ################################################## Training, Epoch: 0043, Batch: 000092, Sample Num: 1472, Cur Loss: 0.00000022, Cur Avg Loss: 0.00003698, Log Avg loss: 0.00008094, Global Avg Loss: 0.00241606, Time: 0.2176 Steps: 209000, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000292, Sample Num: 4672, Cur Loss: 0.00000859, Cur Avg Loss: 0.00002274, Log Avg loss: 0.00001618, Global Avg Loss: 0.00241377, Time: 0.2179 Steps: 209200, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000492, Sample Num: 7872, Cur Loss: 0.00000551, Cur Avg Loss: 0.00003911, Log Avg loss: 0.00006302, Global Avg Loss: 0.00241152, Time: 0.3029 Steps: 209400, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000692, Sample Num: 11072, Cur Loss: 0.00000036, Cur Avg Loss: 0.00003270, Log Avg loss: 0.00001694, Global Avg Loss: 0.00240924, Time: 0.2190 Steps: 209600, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 000892, Sample Num: 14272, Cur Loss: 0.00000007, Cur Avg Loss: 0.00004265, Log Avg loss: 0.00007709, Global Avg Loss: 0.00240701, Time: 0.2166 Steps: 209800, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 001092, Sample Num: 17472, Cur Loss: 0.00000017, Cur Avg Loss: 0.00004555, Log Avg loss: 0.00005844, Global Avg Loss: 0.00240478, Time: 0.4148 Steps: 210000, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 001292, Sample Num: 20672, Cur Loss: 0.00000273, Cur Avg Loss: 0.00004600, Log Avg loss: 0.00004850, Global Avg Loss: 0.00240253, Time: 0.2155 Steps: 210200, Updated lr: 0.000016 Training, Epoch: 0043, Batch: 001492, Sample Num: 23872, Cur Loss: 0.00000381, Cur Avg Loss: 0.00004066, Log Avg loss: 0.00000618, Global Avg Loss: 0.00240026, Time: 0.2212 Steps: 210400, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 001692, Sample Num: 27072, Cur Loss: 0.00000024, Cur Avg Loss: 0.00003932, Log Avg loss: 0.00002931, Global Avg Loss: 0.00239801, Time: 0.2211 Steps: 210600, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 001892, Sample Num: 30272, Cur Loss: 0.00000028, Cur Avg Loss: 0.00003712, Log Avg loss: 0.00001844, Global Avg Loss: 0.00239575, Time: 0.2207 Steps: 210800, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002092, Sample Num: 33472, Cur Loss: 0.00000808, Cur Avg Loss: 0.00004194, Log Avg loss: 0.00008763, Global Avg Loss: 0.00239356, Time: 0.3926 Steps: 211000, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002292, Sample Num: 36672, Cur Loss: 0.00000013, Cur Avg Loss: 0.00004727, Log Avg loss: 0.00010301, Global Avg Loss: 0.00239139, Time: 0.5793 Steps: 211200, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002492, Sample Num: 39872, Cur Loss: 0.00000327, Cur Avg Loss: 0.00004438, Log Avg loss: 0.00001124, Global Avg Loss: 0.00238914, Time: 0.4992 Steps: 211400, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002692, Sample Num: 43072, Cur Loss: 0.00000122, Cur Avg Loss: 0.00004309, Log Avg loss: 0.00002701, Global Avg Loss: 0.00238691, Time: 0.2160 Steps: 211600, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 002892, Sample Num: 46272, Cur Loss: 0.00000016, Cur Avg Loss: 0.00004570, Log Avg loss: 0.00008087, Global Avg Loss: 0.00238473, Time: 0.0745 Steps: 211800, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003092, Sample Num: 49472, Cur Loss: 0.00000046, Cur Avg Loss: 0.00004954, Log Avg loss: 0.00010496, Global Avg Loss: 0.00238258, Time: 0.2161 Steps: 212000, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003292, Sample Num: 52672, Cur Loss: 0.00000031, Cur Avg Loss: 0.00004687, Log Avg loss: 0.00000566, Global Avg Loss: 0.00238034, Time: 0.2196 Steps: 212200, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003492, Sample Num: 55872, Cur Loss: 0.00000028, Cur Avg Loss: 0.00004712, Log Avg loss: 0.00005119, Global Avg Loss: 0.00237814, Time: 0.4330 Steps: 212400, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003692, Sample Num: 59072, Cur Loss: 0.00000003, Cur Avg Loss: 0.00004522, Log Avg loss: 0.00001199, Global Avg Loss: 0.00237592, Time: 0.3169 Steps: 212600, Updated lr: 0.000015 Training, Epoch: 0043, Batch: 003892, Sample Num: 62272, Cur Loss: 0.00000029, Cur Avg Loss: 0.00004297, Log Avg loss: 0.00000145, Global Avg Loss: 0.00237369, Time: 0.2168 Steps: 212800, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004092, Sample Num: 65472, Cur Loss: 0.00000305, Cur Avg Loss: 0.00004130, Log Avg loss: 0.00000879, Global Avg Loss: 0.00237147, Time: 0.2194 Steps: 213000, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004292, Sample Num: 68672, Cur Loss: 0.00000009, Cur Avg Loss: 0.00004096, Log Avg loss: 0.00003401, Global Avg Loss: 0.00236927, Time: 0.2173 Steps: 213200, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004492, Sample Num: 71872, Cur Loss: 0.00000010, Cur Avg Loss: 0.00004207, Log Avg loss: 0.00006596, Global Avg Loss: 0.00236712, Time: 0.2085 Steps: 213400, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004692, Sample Num: 75072, Cur Loss: 0.00000018, Cur Avg Loss: 0.00004275, Log Avg loss: 0.00005798, Global Avg Loss: 0.00236495, Time: 0.1232 Steps: 213600, Updated lr: 0.000014 Training, Epoch: 0043, Batch: 004892, Sample Num: 78272, Cur Loss: 0.00000003, Cur Avg Loss: 0.00004467, Log Avg loss: 0.00008980, Global Avg Loss: 0.00236282, Time: 0.2171 Steps: 213800, Updated lr: 0.000014 ***** Running evaluation checkpoint-213882 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-213882 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1222.084558, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001961, "eval_total_loss": 2.090185, "eval_acc": 0.999738, "eval_jaccard": 0.988801, "eval_prec": 0.989823, "eval_recall": 0.990463, "eval_f1": 0.989752, "eval_pr_auc": 0.996059, "eval_roc_auc": 0.999472, "eval_fmax": 0.994765, "eval_pmax": 0.997781, "eval_rmax": 0.991767, "eval_tmax": 0.18, "update_flag": false, "test_avg_loss": 0.002479, "test_total_loss": 2.642331, "test_acc": 0.999715, "test_jaccard": 0.987108, "test_prec": 0.987928, "test_recall": 0.988902, "test_f1": 0.988021, "test_pr_auc": 0.994853, "test_roc_auc": 0.999139, "test_fmax": 0.994231, "test_pmax": 0.996881, "test_rmax": 0.991595, "test_tmax": 0.06, "lr": 1.4056519983851435e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0023619840255881376, "train_cur_epoch_loss": 0.23247423460576866, "train_cur_epoch_avg_loss": 4.6737883917524864e-05, "train_cur_epoch_time": 1222.0845580101013, "train_cur_epoch_avg_time": 0.24569452312225598, "epoch": 43, "step": 213882} ################################################## Training, Epoch: 0044, Batch: 000118, Sample Num: 1888, Cur Loss: 0.00000002, Cur Avg Loss: 0.00006215, Log Avg loss: 0.00010635, Global Avg Loss: 0.00236072, Time: 0.2176 Steps: 214000, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000318, Sample Num: 5088, Cur Loss: 0.00000037, Cur Avg Loss: 0.00003883, Log Avg loss: 0.00002507, Global Avg Loss: 0.00235854, Time: 0.2174 Steps: 214200, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000518, Sample Num: 8288, Cur Loss: 0.00000026, Cur Avg Loss: 0.00004823, Log Avg loss: 0.00006318, Global Avg Loss: 0.00235639, Time: 0.2208 Steps: 214400, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000718, Sample Num: 11488, Cur Loss: 0.00000001, Cur Avg Loss: 0.00003950, Log Avg loss: 0.00001687, Global Avg Loss: 0.00235421, Time: 0.2168 Steps: 214600, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 000918, Sample Num: 14688, Cur Loss: 0.00000270, Cur Avg Loss: 0.00004409, Log Avg loss: 0.00006060, Global Avg Loss: 0.00235208, Time: 0.2201 Steps: 214800, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 001118, Sample Num: 17888, Cur Loss: 0.00000091, Cur Avg Loss: 0.00004746, Log Avg loss: 0.00006292, Global Avg Loss: 0.00234995, Time: 0.2165 Steps: 215000, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 001318, Sample Num: 21088, Cur Loss: 0.00000009, Cur Avg Loss: 0.00004714, Log Avg loss: 0.00004536, Global Avg Loss: 0.00234781, Time: 0.2234 Steps: 215200, Updated lr: 0.000014 Training, Epoch: 0044, Batch: 001518, Sample Num: 24288, Cur Loss: 0.00000004, Cur Avg Loss: 0.00004228, Log Avg loss: 0.00001024, Global Avg Loss: 0.00234564, Time: 0.2147 Steps: 215400, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 001718, Sample Num: 27488, Cur Loss: 0.00000016, Cur Avg Loss: 0.00004277, Log Avg loss: 0.00004647, Global Avg Loss: 0.00234350, Time: 0.2161 Steps: 215600, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 001918, Sample Num: 30688, Cur Loss: 0.00000112, Cur Avg Loss: 0.00003865, Log Avg loss: 0.00000326, Global Avg Loss: 0.00234133, Time: 0.2157 Steps: 215800, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002118, Sample Num: 33888, Cur Loss: 0.00000017, Cur Avg Loss: 0.00004290, Log Avg loss: 0.00008370, Global Avg Loss: 0.00233924, Time: 0.2794 Steps: 216000, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002318, Sample Num: 37088, Cur Loss: 0.00000057, Cur Avg Loss: 0.00004983, Log Avg loss: 0.00012314, Global Avg Loss: 0.00233719, Time: 0.2198 Steps: 216200, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002518, Sample Num: 40288, Cur Loss: 0.00000209, Cur Avg Loss: 0.00004658, Log Avg loss: 0.00000891, Global Avg Loss: 0.00233504, Time: 0.3888 Steps: 216400, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002718, Sample Num: 43488, Cur Loss: 0.00000017, Cur Avg Loss: 0.00004367, Log Avg loss: 0.00000709, Global Avg Loss: 0.00233289, Time: 0.2168 Steps: 216600, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 002918, Sample Num: 46688, Cur Loss: 0.00002913, Cur Avg Loss: 0.00004473, Log Avg loss: 0.00005914, Global Avg Loss: 0.00233080, Time: 0.0849 Steps: 216800, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003118, Sample Num: 49888, Cur Loss: 0.00000885, Cur Avg Loss: 0.00004733, Log Avg loss: 0.00008529, Global Avg Loss: 0.00232873, Time: 0.2188 Steps: 217000, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003318, Sample Num: 53088, Cur Loss: 0.00000090, Cur Avg Loss: 0.00004600, Log Avg loss: 0.00002529, Global Avg Loss: 0.00232660, Time: 0.4049 Steps: 217200, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003518, Sample Num: 56288, Cur Loss: 0.00000043, Cur Avg Loss: 0.00004560, Log Avg loss: 0.00003886, Global Avg Loss: 0.00232450, Time: 0.4491 Steps: 217400, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003718, Sample Num: 59488, Cur Loss: 0.00000469, Cur Avg Loss: 0.00004382, Log Avg loss: 0.00001264, Global Avg Loss: 0.00232238, Time: 0.2177 Steps: 217600, Updated lr: 0.000013 Training, Epoch: 0044, Batch: 003918, Sample Num: 62688, Cur Loss: 0.00000010, Cur Avg Loss: 0.00004280, Log Avg loss: 0.00002371, Global Avg Loss: 0.00232026, Time: 0.2239 Steps: 217800, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004118, Sample Num: 65888, Cur Loss: 0.00000001, Cur Avg Loss: 0.00004092, Log Avg loss: 0.00000410, Global Avg Loss: 0.00231814, Time: 0.3769 Steps: 218000, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004318, Sample Num: 69088, Cur Loss: 0.00000357, Cur Avg Loss: 0.00003983, Log Avg loss: 0.00001741, Global Avg Loss: 0.00231603, Time: 0.3857 Steps: 218200, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004518, Sample Num: 72288, Cur Loss: 0.00000156, Cur Avg Loss: 0.00004115, Log Avg loss: 0.00006971, Global Avg Loss: 0.00231397, Time: 0.2234 Steps: 218400, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004718, Sample Num: 75488, Cur Loss: 0.00000006, Cur Avg Loss: 0.00004105, Log Avg loss: 0.00003870, Global Avg Loss: 0.00231189, Time: 0.4462 Steps: 218600, Updated lr: 0.000012 Training, Epoch: 0044, Batch: 004918, Sample Num: 78688, Cur Loss: 0.00000394, Cur Avg Loss: 0.00004207, Log Avg loss: 0.00006629, Global Avg Loss: 0.00230984, Time: 0.2539 Steps: 218800, Updated lr: 0.000012 ***** Running evaluation checkpoint-218856 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-218856 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1239.752229, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.001988, "eval_total_loss": 2.119088, "eval_acc": 0.999734, "eval_jaccard": 0.988763, "eval_prec": 0.989911, "eval_recall": 0.990338, "eval_f1": 0.989732, "eval_pr_auc": 0.996059, "eval_roc_auc": 0.999469, "eval_fmax": 0.994818, "eval_pmax": 0.996048, "eval_rmax": 0.993592, "eval_tmax": 0.02, "update_flag": false, "test_avg_loss": 0.0025, "test_total_loss": 2.665457, "test_acc": 0.999719, "test_jaccard": 0.987284, "test_prec": 0.988104, "test_recall": 0.989078, "test_f1": 0.988197, "test_pr_auc": 0.994781, "test_roc_auc": 0.999127, "test_fmax": 0.994236, "test_pmax": 0.996655, "test_rmax": 0.991829, "test_tmax": 0.04, "lr": 1.2048445700444087e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0023093077287421136, "train_cur_epoch_loss": 0.21998492074387116, "train_cur_epoch_avg_loss": 4.422696436346425e-05, "train_cur_epoch_time": 1239.7522287368774, "train_cur_epoch_avg_time": 0.24924652769137062, "epoch": 44, "step": 218856} ################################################## Training, Epoch: 0045, Batch: 000144, Sample Num: 2304, Cur Loss: 0.00000048, Cur Avg Loss: 0.00002314, Log Avg loss: 0.00008197, Global Avg Loss: 0.00230780, Time: 0.2115 Steps: 219000, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000344, Sample Num: 5504, Cur Loss: 0.00001561, Cur Avg Loss: 0.00002475, Log Avg loss: 0.00002592, Global Avg Loss: 0.00230572, Time: 0.2183 Steps: 219200, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000544, Sample Num: 8704, Cur Loss: 0.00000037, Cur Avg Loss: 0.00003336, Log Avg loss: 0.00004818, Global Avg Loss: 0.00230366, Time: 0.3185 Steps: 219400, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000744, Sample Num: 11904, Cur Loss: 0.00000337, Cur Avg Loss: 0.00004344, Log Avg loss: 0.00007084, Global Avg Loss: 0.00230163, Time: 0.2310 Steps: 219600, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 000944, Sample Num: 15104, Cur Loss: 0.00000037, Cur Avg Loss: 0.00004738, Log Avg loss: 0.00006205, Global Avg Loss: 0.00229959, Time: 0.3228 Steps: 219800, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 001144, Sample Num: 18304, Cur Loss: 0.00090923, Cur Avg Loss: 0.00004836, Log Avg loss: 0.00005298, Global Avg Loss: 0.00229755, Time: 0.2186 Steps: 220000, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 001344, Sample Num: 21504, Cur Loss: 0.00029151, Cur Avg Loss: 0.00004400, Log Avg loss: 0.00001905, Global Avg Loss: 0.00229548, Time: 0.2500 Steps: 220200, Updated lr: 0.000012 Training, Epoch: 0045, Batch: 001544, Sample Num: 24704, Cur Loss: 0.00000173, Cur Avg Loss: 0.00003901, Log Avg loss: 0.00000552, Global Avg Loss: 0.00229340, Time: 0.3634 Steps: 220400, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 001744, Sample Num: 27904, Cur Loss: 0.00000021, Cur Avg Loss: 0.00004118, Log Avg loss: 0.00005789, Global Avg Loss: 0.00229138, Time: 0.1039 Steps: 220600, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 001944, Sample Num: 31104, Cur Loss: 0.00000156, Cur Avg Loss: 0.00003723, Log Avg loss: 0.00000275, Global Avg Loss: 0.00228930, Time: 0.3912 Steps: 220800, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002144, Sample Num: 34304, Cur Loss: 0.00000010, Cur Avg Loss: 0.00003995, Log Avg loss: 0.00006638, Global Avg Loss: 0.00228729, Time: 0.2534 Steps: 221000, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002344, Sample Num: 37504, Cur Loss: 0.00000417, Cur Avg Loss: 0.00004475, Log Avg loss: 0.00009622, Global Avg Loss: 0.00228531, Time: 0.2192 Steps: 221200, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002544, Sample Num: 40704, Cur Loss: 0.00000164, Cur Avg Loss: 0.00004178, Log Avg loss: 0.00000697, Global Avg Loss: 0.00228325, Time: 0.3921 Steps: 221400, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002744, Sample Num: 43904, Cur Loss: 0.00000014, Cur Avg Loss: 0.00004175, Log Avg loss: 0.00004147, Global Avg Loss: 0.00228123, Time: 0.1910 Steps: 221600, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 002944, Sample Num: 47104, Cur Loss: 0.00002136, Cur Avg Loss: 0.00003997, Log Avg loss: 0.00001546, Global Avg Loss: 0.00227919, Time: 0.2558 Steps: 221800, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003144, Sample Num: 50304, Cur Loss: 0.00000245, Cur Avg Loss: 0.00004303, Log Avg loss: 0.00008806, Global Avg Loss: 0.00227721, Time: 0.2186 Steps: 222000, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003344, Sample Num: 53504, Cur Loss: 0.00000009, Cur Avg Loss: 0.00004357, Log Avg loss: 0.00005211, Global Avg Loss: 0.00227521, Time: 0.2090 Steps: 222200, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003544, Sample Num: 56704, Cur Loss: 0.00000017, Cur Avg Loss: 0.00004156, Log Avg loss: 0.00000798, Global Avg Loss: 0.00227317, Time: 0.2168 Steps: 222400, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003744, Sample Num: 59904, Cur Loss: 0.00000024, Cur Avg Loss: 0.00004071, Log Avg loss: 0.00002565, Global Avg Loss: 0.00227115, Time: 0.3968 Steps: 222600, Updated lr: 0.000011 Training, Epoch: 0045, Batch: 003944, Sample Num: 63104, Cur Loss: 0.00002766, Cur Avg Loss: 0.00003970, Log Avg loss: 0.00002080, Global Avg Loss: 0.00226913, Time: 0.6221 Steps: 222800, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004144, Sample Num: 66304, Cur Loss: 0.00000808, Cur Avg Loss: 0.00003841, Log Avg loss: 0.00001302, Global Avg Loss: 0.00226711, Time: 0.3933 Steps: 223000, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004344, Sample Num: 69504, Cur Loss: 0.00002584, Cur Avg Loss: 0.00003745, Log Avg loss: 0.00001745, Global Avg Loss: 0.00226509, Time: 0.2226 Steps: 223200, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004544, Sample Num: 72704, Cur Loss: 0.00000185, Cur Avg Loss: 0.00003854, Log Avg loss: 0.00006219, Global Avg Loss: 0.00226312, Time: 0.2643 Steps: 223400, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004744, Sample Num: 75904, Cur Loss: 0.00000045, Cur Avg Loss: 0.00003851, Log Avg loss: 0.00003790, Global Avg Loss: 0.00226113, Time: 0.2190 Steps: 223600, Updated lr: 0.000010 Training, Epoch: 0045, Batch: 004944, Sample Num: 79104, Cur Loss: 0.00000051, Cur Avg Loss: 0.00004026, Log Avg loss: 0.00008182, Global Avg Loss: 0.00225918, Time: 0.2194 Steps: 223800, Updated lr: 0.000010 ***** Running evaluation checkpoint-223830 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-223830 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1217.515560, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001998, "eval_total_loss": 2.130322, "eval_acc": 0.999739, "eval_jaccard": 0.988818, "eval_prec": 0.989921, "eval_recall": 0.990336, "eval_f1": 0.989749, "eval_pr_auc": 0.996063, "eval_roc_auc": 0.99946, "eval_fmax": 0.99485, "eval_pmax": 0.997283, "eval_rmax": 0.992429, "eval_tmax": 0.07, "update_flag": false, "test_avg_loss": 0.002505, "test_total_loss": 2.669856, "test_acc": 0.999724, "test_jaccard": 0.987384, "test_prec": 0.988195, "test_recall": 0.989151, "test_f1": 0.988279, "test_pr_auc": 0.99486, "test_roc_auc": 0.999121, "test_fmax": 0.994243, "test_pmax": 0.996639, "test_rmax": 0.991859, "test_tmax": 0.04, "lr": 1.0040371417036738e-05, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0022589446161266906, "train_cur_epoch_loss": 0.21372114605212086, "train_cur_epoch_avg_loss": 4.2967661047873115e-05, "train_cur_epoch_time": 1217.5155596733093, "train_cur_epoch_avg_time": 0.24477594685832516, "epoch": 45, "step": 223830} ################################################## Training, Epoch: 0046, Batch: 000170, Sample Num: 2720, Cur Loss: 0.00000018, Cur Avg Loss: 0.00003595, Log Avg loss: 0.00010385, Global Avg Loss: 0.00225726, Time: 0.2723 Steps: 224000, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000370, Sample Num: 5920, Cur Loss: 0.00024879, Cur Avg Loss: 0.00004090, Log Avg loss: 0.00004511, Global Avg Loss: 0.00225528, Time: 0.2192 Steps: 224200, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000570, Sample Num: 9120, Cur Loss: 0.00000049, Cur Avg Loss: 0.00003585, Log Avg loss: 0.00002650, Global Avg Loss: 0.00225330, Time: 0.2989 Steps: 224400, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000770, Sample Num: 12320, Cur Loss: 0.00000079, Cur Avg Loss: 0.00003552, Log Avg loss: 0.00003459, Global Avg Loss: 0.00225132, Time: 0.1074 Steps: 224600, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 000970, Sample Num: 15520, Cur Loss: 0.00000197, Cur Avg Loss: 0.00003700, Log Avg loss: 0.00004271, Global Avg Loss: 0.00224936, Time: 0.3149 Steps: 224800, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 001170, Sample Num: 18720, Cur Loss: 0.00000007, Cur Avg Loss: 0.00004214, Log Avg loss: 0.00006706, Global Avg Loss: 0.00224742, Time: 0.2165 Steps: 225000, Updated lr: 0.000010 Training, Epoch: 0046, Batch: 001370, Sample Num: 21920, Cur Loss: 0.00001212, Cur Avg Loss: 0.00004027, Log Avg loss: 0.00002935, Global Avg Loss: 0.00224545, Time: 0.2871 Steps: 225200, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 001570, Sample Num: 25120, Cur Loss: 0.00000242, Cur Avg Loss: 0.00003798, Log Avg loss: 0.00002223, Global Avg Loss: 0.00224347, Time: 0.3391 Steps: 225400, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 001770, Sample Num: 28320, Cur Loss: 0.00000021, Cur Avg Loss: 0.00003784, Log Avg loss: 0.00003681, Global Avg Loss: 0.00224152, Time: 0.2238 Steps: 225600, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 001970, Sample Num: 31520, Cur Loss: 0.00000344, Cur Avg Loss: 0.00003428, Log Avg loss: 0.00000272, Global Avg Loss: 0.00223954, Time: 0.2184 Steps: 225800, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002170, Sample Num: 34720, Cur Loss: 0.00000019, Cur Avg Loss: 0.00003912, Log Avg loss: 0.00008683, Global Avg Loss: 0.00223763, Time: 0.2198 Steps: 226000, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002370, Sample Num: 37920, Cur Loss: 0.00000187, Cur Avg Loss: 0.00004184, Log Avg loss: 0.00007140, Global Avg Loss: 0.00223572, Time: 0.1717 Steps: 226200, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002570, Sample Num: 41120, Cur Loss: 0.00000010, Cur Avg Loss: 0.00003934, Log Avg loss: 0.00000966, Global Avg Loss: 0.00223375, Time: 0.1306 Steps: 226400, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002770, Sample Num: 44320, Cur Loss: 0.00000023, Cur Avg Loss: 0.00003924, Log Avg loss: 0.00003802, Global Avg Loss: 0.00223181, Time: 0.2667 Steps: 226600, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 002970, Sample Num: 47520, Cur Loss: 0.00000042, Cur Avg Loss: 0.00003919, Log Avg loss: 0.00003846, Global Avg Loss: 0.00222988, Time: 0.0942 Steps: 226800, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003170, Sample Num: 50720, Cur Loss: 0.00000067, Cur Avg Loss: 0.00003981, Log Avg loss: 0.00004896, Global Avg Loss: 0.00222795, Time: 0.2132 Steps: 227000, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003370, Sample Num: 53920, Cur Loss: 0.00006762, Cur Avg Loss: 0.00004073, Log Avg loss: 0.00005532, Global Avg Loss: 0.00222604, Time: 0.3616 Steps: 227200, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003570, Sample Num: 57120, Cur Loss: 0.00000630, Cur Avg Loss: 0.00003898, Log Avg loss: 0.00000954, Global Avg Loss: 0.00222409, Time: 0.2624 Steps: 227400, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003770, Sample Num: 60320, Cur Loss: 0.00000002, Cur Avg Loss: 0.00003711, Log Avg loss: 0.00000367, Global Avg Loss: 0.00222214, Time: 0.2721 Steps: 227600, Updated lr: 0.000009 Training, Epoch: 0046, Batch: 003970, Sample Num: 63520, Cur Loss: 0.00000022, Cur Avg Loss: 0.00003598, Log Avg loss: 0.00001465, Global Avg Loss: 0.00222020, Time: 0.2156 Steps: 227800, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004170, Sample Num: 66720, Cur Loss: 0.00000181, Cur Avg Loss: 0.00003456, Log Avg loss: 0.00000640, Global Avg Loss: 0.00221826, Time: 0.2187 Steps: 228000, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004370, Sample Num: 69920, Cur Loss: 0.00000062, Cur Avg Loss: 0.00003470, Log Avg loss: 0.00003766, Global Avg Loss: 0.00221635, Time: 0.2203 Steps: 228200, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004570, Sample Num: 73120, Cur Loss: 0.00000034, Cur Avg Loss: 0.00003489, Log Avg loss: 0.00003902, Global Avg Loss: 0.00221444, Time: 0.0833 Steps: 228400, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004770, Sample Num: 76320, Cur Loss: 0.00000025, Cur Avg Loss: 0.00003532, Log Avg loss: 0.00004512, Global Avg Loss: 0.00221255, Time: 0.2996 Steps: 228600, Updated lr: 0.000008 Training, Epoch: 0046, Batch: 004970, Sample Num: 79520, Cur Loss: 0.00000002, Cur Avg Loss: 0.00003782, Log Avg loss: 0.00009761, Global Avg Loss: 0.00221070, Time: 0.2168 Steps: 228800, Updated lr: 0.000008 ***** Running evaluation checkpoint-228804 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-228804 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1216.185739, Avg time per batch (s): 0.240000 {"eval_avg_loss": 0.001983, "eval_total_loss": 2.113915, "eval_acc": 0.999743, "eval_jaccard": 0.989033, "eval_prec": 0.990077, "eval_recall": 0.990629, "eval_f1": 0.989968, "eval_pr_auc": 0.995984, "eval_roc_auc": 0.999459, "eval_fmax": 0.99482, "eval_pmax": 0.997179, "eval_rmax": 0.992471, "eval_tmax": 0.07, "update_flag": false, "test_avg_loss": 0.002483, "test_total_loss": 2.646483, "test_acc": 0.999719, "test_jaccard": 0.987267, "test_prec": 0.988117, "test_recall": 0.989112, "test_f1": 0.988205, "test_pr_auc": 0.994896, "test_roc_auc": 0.999136, "test_fmax": 0.994305, "test_pmax": 0.996852, "test_rmax": 0.991771, "test_tmax": 0.06, "lr": 8.032297133629391e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.002210666452000786, "train_cur_epoch_loss": 0.1897534559511298, "train_cur_epoch_avg_loss": 3.814906633516884e-05, "train_cur_epoch_time": 1216.1857385635376, "train_cur_epoch_avg_time": 0.2445085923931519, "epoch": 46, "step": 228804} ################################################## Training, Epoch: 0047, Batch: 000196, Sample Num: 3136, Cur Loss: 0.00000100, Cur Avg Loss: 0.00002662, Log Avg loss: 0.00003491, Global Avg Loss: 0.00220880, Time: 0.2213 Steps: 229000, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000396, Sample Num: 6336, Cur Loss: 0.00000142, Cur Avg Loss: 0.00003588, Log Avg loss: 0.00004496, Global Avg Loss: 0.00220691, Time: 0.2192 Steps: 229200, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000596, Sample Num: 9536, Cur Loss: 0.00000007, Cur Avg Loss: 0.00003017, Log Avg loss: 0.00001886, Global Avg Loss: 0.00220500, Time: 0.2189 Steps: 229400, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000796, Sample Num: 12736, Cur Loss: 0.00000089, Cur Avg Loss: 0.00003926, Log Avg loss: 0.00006635, Global Avg Loss: 0.00220314, Time: 0.4877 Steps: 229600, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 000996, Sample Num: 15936, Cur Loss: 0.00000244, Cur Avg Loss: 0.00003697, Log Avg loss: 0.00002786, Global Avg Loss: 0.00220125, Time: 0.2182 Steps: 229800, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 001196, Sample Num: 19136, Cur Loss: 0.00000019, Cur Avg Loss: 0.00003990, Log Avg loss: 0.00005448, Global Avg Loss: 0.00219938, Time: 0.1925 Steps: 230000, Updated lr: 0.000008 Training, Epoch: 0047, Batch: 001396, Sample Num: 22336, Cur Loss: 0.00000022, Cur Avg Loss: 0.00003840, Log Avg loss: 0.00002945, Global Avg Loss: 0.00219749, Time: 0.2152 Steps: 230200, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 001596, Sample Num: 25536, Cur Loss: 0.00000077, Cur Avg Loss: 0.00003788, Log Avg loss: 0.00003426, Global Avg Loss: 0.00219562, Time: 0.2157 Steps: 230400, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 001796, Sample Num: 28736, Cur Loss: 0.00000073, Cur Avg Loss: 0.00003630, Log Avg loss: 0.00002367, Global Avg Loss: 0.00219373, Time: 0.3639 Steps: 230600, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 001996, Sample Num: 31936, Cur Loss: 0.00000091, Cur Avg Loss: 0.00003322, Log Avg loss: 0.00000556, Global Avg Loss: 0.00219184, Time: 0.3917 Steps: 230800, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002196, Sample Num: 35136, Cur Loss: 0.00000007, Cur Avg Loss: 0.00003935, Log Avg loss: 0.00010049, Global Avg Loss: 0.00219002, Time: 0.2197 Steps: 231000, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002396, Sample Num: 38336, Cur Loss: 0.00000139, Cur Avg Loss: 0.00004092, Log Avg loss: 0.00005818, Global Avg Loss: 0.00218818, Time: 0.2215 Steps: 231200, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002596, Sample Num: 41536, Cur Loss: 0.00000013, Cur Avg Loss: 0.00003824, Log Avg loss: 0.00000614, Global Avg Loss: 0.00218629, Time: 0.2173 Steps: 231400, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002796, Sample Num: 44736, Cur Loss: 0.00000814, Cur Avg Loss: 0.00003829, Log Avg loss: 0.00003891, Global Avg Loss: 0.00218444, Time: 0.2160 Steps: 231600, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 002996, Sample Num: 47936, Cur Loss: 0.00001211, Cur Avg Loss: 0.00003911, Log Avg loss: 0.00005067, Global Avg Loss: 0.00218260, Time: 0.2177 Steps: 231800, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003196, Sample Num: 51136, Cur Loss: 0.00000019, Cur Avg Loss: 0.00003904, Log Avg loss: 0.00003792, Global Avg Loss: 0.00218075, Time: 0.2158 Steps: 232000, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003396, Sample Num: 54336, Cur Loss: 0.00000040, Cur Avg Loss: 0.00003895, Log Avg loss: 0.00003747, Global Avg Loss: 0.00217890, Time: 0.2177 Steps: 232200, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003596, Sample Num: 57536, Cur Loss: 0.00000003, Cur Avg Loss: 0.00003756, Log Avg loss: 0.00001410, Global Avg Loss: 0.00217704, Time: 0.2124 Steps: 232400, Updated lr: 0.000007 Training, Epoch: 0047, Batch: 003796, Sample Num: 60736, Cur Loss: 0.00000973, Cur Avg Loss: 0.00003586, Log Avg loss: 0.00000519, Global Avg Loss: 0.00217517, Time: 0.2180 Steps: 232600, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 003996, Sample Num: 63936, Cur Loss: 0.00000024, Cur Avg Loss: 0.00003668, Log Avg loss: 0.00005232, Global Avg Loss: 0.00217335, Time: 0.2241 Steps: 232800, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004196, Sample Num: 67136, Cur Loss: 0.00000167, Cur Avg Loss: 0.00003542, Log Avg loss: 0.00001031, Global Avg Loss: 0.00217149, Time: 0.2252 Steps: 233000, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004396, Sample Num: 70336, Cur Loss: 0.00004291, Cur Avg Loss: 0.00003430, Log Avg loss: 0.00001076, Global Avg Loss: 0.00216964, Time: 0.2153 Steps: 233200, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004596, Sample Num: 73536, Cur Loss: 0.00001295, Cur Avg Loss: 0.00003488, Log Avg loss: 0.00004749, Global Avg Loss: 0.00216782, Time: 0.3903 Steps: 233400, Updated lr: 0.000006 Training, Epoch: 0047, Batch: 004796, Sample Num: 76736, Cur Loss: 0.00006643, Cur Avg Loss: 0.00003680, Log Avg loss: 0.00008091, Global Avg Loss: 0.00216604, Time: 0.4066 Steps: 233600, Updated lr: 0.000006 ***** Running evaluation checkpoint-233778 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-233778 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1233.545839, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.002003, "eval_total_loss": 2.134855, "eval_acc": 0.999739, "eval_jaccard": 0.988809, "eval_prec": 0.989891, "eval_recall": 0.990356, "eval_f1": 0.989741, "eval_pr_auc": 0.995972, "eval_roc_auc": 0.999459, "eval_fmax": 0.994857, "eval_pmax": 0.997254, "eval_rmax": 0.992471, "eval_tmax": 0.07, "update_flag": false, "test_avg_loss": 0.002497, "test_total_loss": 2.661616, "test_acc": 0.999719, "test_jaccard": 0.987159, "test_prec": 0.98797, "test_recall": 0.988976, "test_f1": 0.988072, "test_pr_auc": 0.994853, "test_roc_auc": 0.999135, "test_fmax": 0.99428, "test_pmax": 0.996802, "test_rmax": 0.991771, "test_tmax": 0.05, "lr": 6.024222850222043e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.002164469824916807, "train_cur_epoch_loss": 0.19609984581239814, "train_cur_epoch_avg_loss": 3.942497905355813e-05, "train_cur_epoch_time": 1233.5458388328552, "train_cur_epoch_avg_time": 0.24799876132546345, "epoch": 47, "step": 233778} ################################################## Training, Epoch: 0048, Batch: 000022, Sample Num: 352, Cur Loss: 0.00000025, Cur Avg Loss: 0.00000321, Log Avg loss: 0.00009847, Global Avg Loss: 0.00216427, Time: 0.2899 Steps: 233800, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000222, Sample Num: 3552, Cur Loss: 0.00000016, Cur Avg Loss: 0.00001512, Log Avg loss: 0.00001643, Global Avg Loss: 0.00216243, Time: 0.2196 Steps: 234000, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000422, Sample Num: 6752, Cur Loss: 0.00000038, Cur Avg Loss: 0.00002821, Log Avg loss: 0.00004273, Global Avg Loss: 0.00216062, Time: 0.3125 Steps: 234200, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000622, Sample Num: 9952, Cur Loss: 0.00000002, Cur Avg Loss: 0.00002982, Log Avg loss: 0.00003322, Global Avg Loss: 0.00215881, Time: 0.2173 Steps: 234400, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 000822, Sample Num: 13152, Cur Loss: 0.00000009, Cur Avg Loss: 0.00003863, Log Avg loss: 0.00006603, Global Avg Loss: 0.00215702, Time: 0.2246 Steps: 234600, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 001022, Sample Num: 16352, Cur Loss: 0.00000021, Cur Avg Loss: 0.00003574, Log Avg loss: 0.00002385, Global Avg Loss: 0.00215520, Time: 0.3150 Steps: 234800, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 001222, Sample Num: 19552, Cur Loss: 0.00000439, Cur Avg Loss: 0.00003900, Log Avg loss: 0.00005568, Global Avg Loss: 0.00215342, Time: 0.2214 Steps: 235000, Updated lr: 0.000006 Training, Epoch: 0048, Batch: 001422, Sample Num: 22752, Cur Loss: 0.00000166, Cur Avg Loss: 0.00003594, Log Avg loss: 0.00001726, Global Avg Loss: 0.00215160, Time: 0.0958 Steps: 235200, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 001622, Sample Num: 25952, Cur Loss: 0.00000011, Cur Avg Loss: 0.00003474, Log Avg loss: 0.00002614, Global Avg Loss: 0.00214980, Time: 0.2183 Steps: 235400, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 001822, Sample Num: 29152, Cur Loss: 0.00000023, Cur Avg Loss: 0.00003152, Log Avg loss: 0.00000547, Global Avg Loss: 0.00214797, Time: 0.2177 Steps: 235600, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002022, Sample Num: 32352, Cur Loss: 0.00000065, Cur Avg Loss: 0.00003053, Log Avg loss: 0.00002153, Global Avg Loss: 0.00214617, Time: 0.0852 Steps: 235800, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002222, Sample Num: 35552, Cur Loss: 0.00024864, Cur Avg Loss: 0.00003225, Log Avg loss: 0.00004961, Global Avg Loss: 0.00214439, Time: 0.3989 Steps: 236000, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002422, Sample Num: 38752, Cur Loss: 0.00013896, Cur Avg Loss: 0.00003210, Log Avg loss: 0.00003039, Global Avg Loss: 0.00214260, Time: 0.4068 Steps: 236200, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002622, Sample Num: 41952, Cur Loss: 0.00000004, Cur Avg Loss: 0.00003019, Log Avg loss: 0.00000710, Global Avg Loss: 0.00214080, Time: 0.2116 Steps: 236400, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 002822, Sample Num: 45152, Cur Loss: 0.00000710, Cur Avg Loss: 0.00003095, Log Avg loss: 0.00004090, Global Avg Loss: 0.00213902, Time: 0.2200 Steps: 236600, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003022, Sample Num: 48352, Cur Loss: 0.00000000, Cur Avg Loss: 0.00003110, Log Avg loss: 0.00003317, Global Avg Loss: 0.00213724, Time: 0.2170 Steps: 236800, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003222, Sample Num: 51552, Cur Loss: 0.00000194, Cur Avg Loss: 0.00003153, Log Avg loss: 0.00003802, Global Avg Loss: 0.00213547, Time: 0.3635 Steps: 237000, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003422, Sample Num: 54752, Cur Loss: 0.00000014, Cur Avg Loss: 0.00003222, Log Avg loss: 0.00004345, Global Avg Loss: 0.00213371, Time: 0.3823 Steps: 237200, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003622, Sample Num: 57952, Cur Loss: 0.00000003, Cur Avg Loss: 0.00003139, Log Avg loss: 0.00001704, Global Avg Loss: 0.00213193, Time: 0.2215 Steps: 237400, Updated lr: 0.000005 Training, Epoch: 0048, Batch: 003822, Sample Num: 61152, Cur Loss: 0.00000010, Cur Avg Loss: 0.00002989, Log Avg loss: 0.00000283, Global Avg Loss: 0.00213013, Time: 0.2175 Steps: 237600, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004022, Sample Num: 64352, Cur Loss: 0.00000222, Cur Avg Loss: 0.00002959, Log Avg loss: 0.00002375, Global Avg Loss: 0.00212836, Time: 0.2170 Steps: 237800, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004222, Sample Num: 67552, Cur Loss: 0.00000079, Cur Avg Loss: 0.00002934, Log Avg loss: 0.00002444, Global Avg Loss: 0.00212659, Time: 0.2149 Steps: 238000, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004422, Sample Num: 70752, Cur Loss: 0.00000122, Cur Avg Loss: 0.00002875, Log Avg loss: 0.00001629, Global Avg Loss: 0.00212482, Time: 0.4087 Steps: 238200, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004622, Sample Num: 73952, Cur Loss: 0.00007860, Cur Avg Loss: 0.00003022, Log Avg loss: 0.00006270, Global Avg Loss: 0.00212309, Time: 0.3636 Steps: 238400, Updated lr: 0.000004 Training, Epoch: 0048, Batch: 004822, Sample Num: 77152, Cur Loss: 0.00000269, Cur Avg Loss: 0.00003160, Log Avg loss: 0.00006354, Global Avg Loss: 0.00212137, Time: 0.3272 Steps: 238600, Updated lr: 0.000004 ***** Running evaluation checkpoint-238752 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-238752 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1241.614559, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.002, "eval_total_loss": 2.131923, "eval_acc": 0.999738, "eval_jaccard": 0.98882, "eval_prec": 0.989872, "eval_recall": 0.990434, "eval_f1": 0.989767, "eval_pr_auc": 0.996109, "eval_roc_auc": 0.99947, "eval_fmax": 0.994876, "eval_pmax": 0.99696, "eval_rmax": 0.9928, "eval_tmax": 0.04, "update_flag": false, "test_avg_loss": 0.002506, "test_total_loss": 2.670885, "test_acc": 0.999717, "test_jaccard": 0.98705, "test_prec": 0.987822, "test_recall": 0.988839, "test_f1": 0.98794, "test_pr_auc": 0.994777, "test_roc_auc": 0.999134, "test_fmax": 0.994311, "test_pmax": 0.996391, "test_rmax": 0.99224, "test_tmax": 0.03, "lr": 4.0161485668146955e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0021200659576270774, "train_cur_epoch_loss": 0.1645607859770067, "train_cur_epoch_avg_loss": 3.3084195009450486e-05, "train_cur_epoch_time": 1241.6145589351654, "train_cur_epoch_avg_time": 0.2496209406785616, "epoch": 48, "step": 238752} ################################################## Training, Epoch: 0049, Batch: 000048, Sample Num: 768, Cur Loss: 0.00000024, Cur Avg Loss: 0.00000546, Log Avg loss: 0.00006215, Global Avg Loss: 0.00211964, Time: 0.2843 Steps: 238800, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000248, Sample Num: 3968, Cur Loss: 0.00000004, Cur Avg Loss: 0.00001750, Log Avg loss: 0.00002039, Global Avg Loss: 0.00211788, Time: 0.2187 Steps: 239000, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000448, Sample Num: 7168, Cur Loss: 0.00000188, Cur Avg Loss: 0.00003162, Log Avg loss: 0.00004912, Global Avg Loss: 0.00211615, Time: 0.2578 Steps: 239200, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000648, Sample Num: 10368, Cur Loss: 0.00000002, Cur Avg Loss: 0.00002960, Log Avg loss: 0.00002507, Global Avg Loss: 0.00211441, Time: 0.2237 Steps: 239400, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 000848, Sample Num: 13568, Cur Loss: 0.00000021, Cur Avg Loss: 0.00004084, Log Avg loss: 0.00007729, Global Avg Loss: 0.00211271, Time: 0.2212 Steps: 239600, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 001048, Sample Num: 16768, Cur Loss: 0.00000809, Cur Avg Loss: 0.00003945, Log Avg loss: 0.00003354, Global Avg Loss: 0.00211097, Time: 0.2194 Steps: 239800, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 001248, Sample Num: 19968, Cur Loss: 0.00000703, Cur Avg Loss: 0.00004291, Log Avg loss: 0.00006107, Global Avg Loss: 0.00210926, Time: 0.2337 Steps: 240000, Updated lr: 0.000004 Training, Epoch: 0049, Batch: 001448, Sample Num: 23168, Cur Loss: 0.00000237, Cur Avg Loss: 0.00003823, Log Avg loss: 0.00000898, Global Avg Loss: 0.00210752, Time: 0.2304 Steps: 240200, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 001648, Sample Num: 26368, Cur Loss: 0.00000030, Cur Avg Loss: 0.00003684, Log Avg loss: 0.00002676, Global Avg Loss: 0.00210578, Time: 0.1312 Steps: 240400, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 001848, Sample Num: 29568, Cur Loss: 0.00000020, Cur Avg Loss: 0.00003372, Log Avg loss: 0.00000804, Global Avg Loss: 0.00210404, Time: 0.3948 Steps: 240600, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002048, Sample Num: 32768, Cur Loss: 0.00001049, Cur Avg Loss: 0.00003316, Log Avg loss: 0.00002804, Global Avg Loss: 0.00210232, Time: 0.2276 Steps: 240800, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002248, Sample Num: 35968, Cur Loss: 0.00000279, Cur Avg Loss: 0.00003874, Log Avg loss: 0.00009584, Global Avg Loss: 0.00210065, Time: 0.2153 Steps: 241000, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002448, Sample Num: 39168, Cur Loss: 0.00000005, Cur Avg Loss: 0.00004023, Log Avg loss: 0.00005692, Global Avg Loss: 0.00209896, Time: 0.2153 Steps: 241200, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002648, Sample Num: 42368, Cur Loss: 0.00018904, Cur Avg Loss: 0.00003752, Log Avg loss: 0.00000437, Global Avg Loss: 0.00209722, Time: 0.4062 Steps: 241400, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 002848, Sample Num: 45568, Cur Loss: 0.00000022, Cur Avg Loss: 0.00003774, Log Avg loss: 0.00004067, Global Avg Loss: 0.00209552, Time: 0.2516 Steps: 241600, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003048, Sample Num: 48768, Cur Loss: 0.00000120, Cur Avg Loss: 0.00004057, Log Avg loss: 0.00008090, Global Avg Loss: 0.00209385, Time: 0.3922 Steps: 241800, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003248, Sample Num: 51968, Cur Loss: 0.00000274, Cur Avg Loss: 0.00003846, Log Avg loss: 0.00000626, Global Avg Loss: 0.00209213, Time: 0.4551 Steps: 242000, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003448, Sample Num: 55168, Cur Loss: 0.00000006, Cur Avg Loss: 0.00003851, Log Avg loss: 0.00003930, Global Avg Loss: 0.00209043, Time: 0.2184 Steps: 242200, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003648, Sample Num: 58368, Cur Loss: 0.00000013, Cur Avg Loss: 0.00003722, Log Avg loss: 0.00001496, Global Avg Loss: 0.00208872, Time: 0.3385 Steps: 242400, Updated lr: 0.000003 Training, Epoch: 0049, Batch: 003848, Sample Num: 61568, Cur Loss: 0.00000035, Cur Avg Loss: 0.00003552, Log Avg loss: 0.00000466, Global Avg Loss: 0.00208700, Time: 0.2157 Steps: 242600, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004048, Sample Num: 64768, Cur Loss: 0.00000200, Cur Avg Loss: 0.00003431, Log Avg loss: 0.00001090, Global Avg Loss: 0.00208529, Time: 0.2189 Steps: 242800, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004248, Sample Num: 67968, Cur Loss: 0.00000026, Cur Avg Loss: 0.00003345, Log Avg loss: 0.00001612, Global Avg Loss: 0.00208359, Time: 0.2856 Steps: 243000, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004448, Sample Num: 71168, Cur Loss: 0.00000005, Cur Avg Loss: 0.00003233, Log Avg loss: 0.00000850, Global Avg Loss: 0.00208188, Time: 0.2596 Steps: 243200, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004648, Sample Num: 74368, Cur Loss: 0.00000001, Cur Avg Loss: 0.00003391, Log Avg loss: 0.00006908, Global Avg Loss: 0.00208023, Time: 0.2217 Steps: 243400, Updated lr: 0.000002 Training, Epoch: 0049, Batch: 004848, Sample Num: 77568, Cur Loss: 0.00000007, Cur Avg Loss: 0.00003587, Log Avg loss: 0.00008144, Global Avg Loss: 0.00207859, Time: 0.2183 Steps: 243600, Updated lr: 0.000002 ***** Running evaluation checkpoint-243726 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-243726 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1237.593740, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.002002, "eval_total_loss": 2.134662, "eval_acc": 0.999741, "eval_jaccard": 0.988977, "eval_prec": 0.990028, "eval_recall": 0.990551, "eval_f1": 0.989908, "eval_pr_auc": 0.996012, "eval_roc_auc": 0.999461, "eval_fmax": 0.994905, "eval_pmax": 0.996774, "eval_rmax": 0.993043, "eval_tmax": 0.03, "update_flag": false, "test_avg_loss": 0.002502, "test_total_loss": 2.6673, "test_acc": 0.999722, "test_jaccard": 0.987401, "test_prec": 0.988174, "test_recall": 0.989249, "test_f1": 0.988311, "test_pr_auc": 0.99479, "test_roc_auc": 0.999136, "test_fmax": 0.994317, "test_pmax": 0.996463, "test_rmax": 0.992181, "test_tmax": 0.03, "lr": 2.0080742834073478e-06, "cur_epoch_step": 4974, "train_global_avg_loss": 0.00207754772056077, "train_cur_epoch_loss": 0.18240822601371237, "train_cur_epoch_avg_loss": 3.6672341377907595e-05, "train_cur_epoch_time": 1237.5937399864197, "train_cur_epoch_avg_time": 0.24881257337885399, "epoch": 49, "step": 243726} ################################################## Training, Epoch: 0050, Batch: 000074, Sample Num: 1184, Cur Loss: 0.00000024, Cur Avg Loss: 0.00000444, Log Avg loss: 0.00004415, Global Avg Loss: 0.00207692, Time: 0.2219 Steps: 243800, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000274, Sample Num: 4384, Cur Loss: 0.00000069, Cur Avg Loss: 0.00001870, Log Avg loss: 0.00002398, Global Avg Loss: 0.00207524, Time: 0.1952 Steps: 244000, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000474, Sample Num: 7584, Cur Loss: 0.00000014, Cur Avg Loss: 0.00003145, Log Avg loss: 0.00004891, Global Avg Loss: 0.00207358, Time: 0.1126 Steps: 244200, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000674, Sample Num: 10784, Cur Loss: 0.00000018, Cur Avg Loss: 0.00002720, Log Avg loss: 0.00001712, Global Avg Loss: 0.00207189, Time: 0.2157 Steps: 244400, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 000874, Sample Num: 13984, Cur Loss: 0.00000029, Cur Avg Loss: 0.00003219, Log Avg loss: 0.00004901, Global Avg Loss: 0.00207024, Time: 0.2195 Steps: 244600, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 001074, Sample Num: 17184, Cur Loss: 0.00000021, Cur Avg Loss: 0.00003166, Log Avg loss: 0.00002936, Global Avg Loss: 0.00206857, Time: 0.2196 Steps: 244800, Updated lr: 0.000002 Training, Epoch: 0050, Batch: 001274, Sample Num: 20384, Cur Loss: 0.00000006, Cur Avg Loss: 0.00003469, Log Avg loss: 0.00005095, Global Avg Loss: 0.00206692, Time: 0.3171 Steps: 245000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 001474, Sample Num: 23584, Cur Loss: 0.00000378, Cur Avg Loss: 0.00003080, Log Avg loss: 0.00000603, Global Avg Loss: 0.00206524, Time: 0.2122 Steps: 245200, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 001674, Sample Num: 26784, Cur Loss: 0.00000005, Cur Avg Loss: 0.00002902, Log Avg loss: 0.00001593, Global Avg Loss: 0.00206357, Time: 0.2160 Steps: 245400, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 001874, Sample Num: 29984, Cur Loss: 0.00000000, Cur Avg Loss: 0.00002696, Log Avg loss: 0.00000970, Global Avg Loss: 0.00206190, Time: 0.0891 Steps: 245600, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002074, Sample Num: 33184, Cur Loss: 0.00000112, Cur Avg Loss: 0.00003068, Log Avg loss: 0.00006555, Global Avg Loss: 0.00206028, Time: 0.2172 Steps: 245800, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002274, Sample Num: 36384, Cur Loss: 0.00000031, Cur Avg Loss: 0.00003143, Log Avg loss: 0.00003912, Global Avg Loss: 0.00205863, Time: 0.5055 Steps: 246000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002474, Sample Num: 39584, Cur Loss: 0.00000012, Cur Avg Loss: 0.00003124, Log Avg loss: 0.00002919, Global Avg Loss: 0.00205698, Time: 0.2555 Steps: 246200, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002674, Sample Num: 42784, Cur Loss: 0.00000285, Cur Avg Loss: 0.00003108, Log Avg loss: 0.00002911, Global Avg Loss: 0.00205534, Time: 0.2204 Steps: 246400, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 002874, Sample Num: 45984, Cur Loss: 0.00000005, Cur Avg Loss: 0.00003270, Log Avg loss: 0.00005424, Global Avg Loss: 0.00205372, Time: 0.2082 Steps: 246600, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003074, Sample Num: 49184, Cur Loss: 0.00000617, Cur Avg Loss: 0.00003483, Log Avg loss: 0.00006545, Global Avg Loss: 0.00205210, Time: 0.2167 Steps: 246800, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003274, Sample Num: 52384, Cur Loss: 0.00000045, Cur Avg Loss: 0.00003325, Log Avg loss: 0.00000904, Global Avg Loss: 0.00205045, Time: 0.2196 Steps: 247000, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003474, Sample Num: 55584, Cur Loss: 0.00000424, Cur Avg Loss: 0.00003331, Log Avg loss: 0.00003427, Global Avg Loss: 0.00204882, Time: 0.3467 Steps: 247200, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003674, Sample Num: 58784, Cur Loss: 0.00000044, Cur Avg Loss: 0.00003234, Log Avg loss: 0.00001555, Global Avg Loss: 0.00204718, Time: 0.2527 Steps: 247400, Updated lr: 0.000001 Training, Epoch: 0050, Batch: 003874, Sample Num: 61984, Cur Loss: 0.00000036, Cur Avg Loss: 0.00003086, Log Avg loss: 0.00000360, Global Avg Loss: 0.00204552, Time: 0.2121 Steps: 247600, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004074, Sample Num: 65184, Cur Loss: 0.00000016, Cur Avg Loss: 0.00003017, Log Avg loss: 0.00001671, Global Avg Loss: 0.00204389, Time: 0.2871 Steps: 247800, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004274, Sample Num: 68384, Cur Loss: 0.00000012, Cur Avg Loss: 0.00003023, Log Avg loss: 0.00003145, Global Avg Loss: 0.00204226, Time: 0.2230 Steps: 248000, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004474, Sample Num: 71584, Cur Loss: 0.00217174, Cur Avg Loss: 0.00003000, Log Avg loss: 0.00002527, Global Avg Loss: 0.00204064, Time: 0.2314 Steps: 248200, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004674, Sample Num: 74784, Cur Loss: 0.00000110, Cur Avg Loss: 0.00003042, Log Avg loss: 0.00003962, Global Avg Loss: 0.00203903, Time: 0.3058 Steps: 248400, Updated lr: 0.000000 Training, Epoch: 0050, Batch: 004874, Sample Num: 77984, Cur Loss: 0.00000076, Cur Avg Loss: 0.00003364, Log Avg loss: 0.00010893, Global Avg Loss: 0.00203748, Time: 0.3011 Steps: 248600, Updated lr: 0.000000 ***** Running evaluation checkpoint-248700 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## ***** Running testing checkpoint-248700 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## Epoch Time: 1231.565325, Avg time per batch (s): 0.250000 {"eval_avg_loss": 0.002014, "eval_total_loss": 2.147099, "eval_acc": 0.999739, "eval_jaccard": 0.988869, "eval_prec": 0.98994, "eval_recall": 0.990473, "eval_f1": 0.989818, "eval_pr_auc": 0.996006, "eval_roc_auc": 0.999458, "eval_fmax": 0.994885, "eval_pmax": 0.996773, "eval_rmax": 0.993004, "eval_tmax": 0.03, "update_flag": false, "test_avg_loss": 0.002507, "test_total_loss": 2.672375, "test_acc": 0.99972, "test_jaccard": 0.987382, "test_prec": 0.988154, "test_recall": 0.989249, "test_f1": 0.9883, "test_pr_auc": 0.994796, "test_roc_auc": 0.999132, "test_fmax": 0.994324, "test_pmax": 0.996703, "test_rmax": 0.991956, "test_tmax": 0.05, "lr": 0.0, "cur_epoch_step": 4974, "train_global_avg_loss": 0.0020366829457550343, "train_cur_epoch_loss": 0.17065286788308148, "train_cur_epoch_avg_loss": 3.430898027404131e-05, "train_cur_epoch_time": 1231.5653250217438, "train_cur_epoch_avg_time": 0.24760058806227256, "epoch": 50, "step": 248700} ################################################## #########################Best Metric######################### {"epoch": 42, "global_step": 208908, "eval_avg_loss": 0.001937, "eval_total_loss": 2.065322, "eval_acc": 0.999744, "eval_jaccard": 0.989123, "eval_prec": 0.990058, "eval_recall": 0.990727, "eval_f1": 0.990021, "eval_pr_auc": 0.995959, "eval_roc_auc": 0.999467, "eval_fmax": 0.994792, "eval_pmax": 0.99691, "eval_rmax": 0.992683, "eval_tmax": 0.05, "update_flag": true, "test_avg_loss": 0.002457, "test_total_loss": 2.619151, "test_acc": 0.999716, "test_jaccard": 0.987177, "test_prec": 0.987988, "test_recall": 0.989034, "test_f1": 0.988104, "test_pr_auc": 0.994885, "test_roc_auc": 0.999142, "test_fmax": 0.994146, "test_pmax": 0.997006, "test_rmax": 0.991302, "test_tmax": 0.09} ################################################## Total Time: 429382.557008, Avg time per epoch(50 epochs): 8587.650000 ++++++++++++Validation+++++++++++++ best f1 global step: 208908 checkpoint path: ../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135254/checkpoint-208908 ***** Running evaluation checkpoint-208908 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 17053 ################################################## {"evaluation_avg_loss_208908": 0.001937, "evaluation_total_loss_208908": 2.065322, "evaluation_acc_208908": 0.999744, "evaluation_jaccard_208908": 0.989123, "evaluation_prec_208908": 0.990058, "evaluation_recall_208908": 0.990727, "evaluation_f1_208908": 0.990021, "evaluation_pr_auc_208908": 0.995959, "evaluation_roc_auc_208908": 0.999467, "evaluation_fmax_208908": 0.994792, "evaluation_pmax_208908": 0.99691, "evaluation_rmax_208908": 0.992683, "evaluation_tmax_208908": 0.05} ++++++++++++Testing+++++++++++++ best f1 global step: 208908 checkpoint path: ../models/VirusEC4/protein/multi_label/luca_base/matrix/20250501135254/checkpoint-208908 ***** Running testing checkpoint-208908 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [17053] ################################################## {"evaluation_avg_loss_208908": 0.002457, "evaluation_total_loss_208908": 2.619151, "evaluation_acc_208908": 0.999716, "evaluation_jaccard_208908": 0.987177, "evaluation_prec_208908": 0.987988, "evaluation_recall_208908": 0.989034, "evaluation_f1_208908": 0.988104, "evaluation_pr_auc_208908": 0.994885, "evaluation_roc_auc_208908": 0.999142, "evaluation_fmax_208908": 0.994146, "evaluation_pmax_208908": 0.997006, "evaluation_rmax_208908": 0.991302, "evaluation_tmax_208908": 0.09}