{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "acc", "beta1": 0.9, "beta2": 0.98, "buffer_size": 1024, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "ViralCapsid", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/ViralCapsid/protein/binary_class/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 2560, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "seq", "intermediate_size": 4096, "label_filepath": "../dataset/ViralCapsid/protein/binary_class/label.txt", "label_size": 2, "label_type": "ViralCapsid", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": "../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000", "llm_step": "3800000", "llm_task_level": "token_level,span_level,seq_level", "llm_time_str": "20240815023346", "llm_type": "lucaone_virus", "llm_version": "v1.0", "local_rank": -1, "log_dir": "../logs/ViralCapsid/protein/binary_class/luca_base/seq/20250103143044", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": true, "matrix_dirpath": "../matrices/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": false, "no_token_type_embeddings": true, "non_ignore": false, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 4, "num_hidden_layers": 2, "num_train_epochs": 10, "output_dir": "../models/ViralCapsid/protein/binary_class/luca_base/seq/20250103143044", "output_mode": "binary_class", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 1.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "null", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "binary_class", "tb_log_dir": "../tb-logs/ViralCapsid/protein/binary_class/luca_base/seq/20250103143044", "test_data_dir": "../dataset/ViralCapsid/protein/binary_class/test/", "time_str": "20250103143250", "train_data_dir": "../dataset/ViralCapsid/protein/binary_class/train/", "trunc_type": "right", "vector_dirpath": "../vectors/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 200, "weight": null, "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,seq ################################################## Encoder Config: {'llm_type': 'lucaone_virus', 'llm_version': 'v1.0', 'llm_step': '3800000', 'llm_dirpath': '../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000', 'input_type': 'seq', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'matrix_dirpath': '../matrices/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': True, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 2560, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4096, "no_position_embeddings": true, "no_token_embeddings": false, "no_token_type_embeddings": true, "num_attention_heads": 4, "num_hidden_layers": 2, "pad_token_id": 0, "pos_weight": 1.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": null, "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.29.0", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39 } ################################################## Mode Architecture: LucaBase( (seq_encoder): LucaTransformer( (embeddings): LucaEmbeddings( (word_embeddings): Embedding(39, 1024, padding_idx=0) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): ModuleList( (0): LucaTransformerLayer( (pre_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (self_attn): LucaMultiHeadAttention( (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) (rot_emb): RotaryEmbedding() ) (post_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) ) (1): LucaTransformerLayer( (pre_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (self_attn): LucaMultiHeadAttention( (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) (rot_emb): RotaryEmbedding() ) (post_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) ) ) (last_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (seq_pooler): GlobalMaskValueAttentionPooling1D (1024 -> 1024) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=1024, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=1, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 28511489 ################################################## {"total_num": "27.190000M", "total_size": "108.760000MB", "param_sum": "27.190000M", "param_size": "108.760000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "27.190675M", "trainable_size": "108.762699MB"} ################################################## Train dataset len: 325113, batch size: 16, batch num: 20320 Train dataset t_total: 203200, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 325113 Train Dataset Num Epochs = 10 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 203200 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.64808220, Cur Avg Loss: 0.40148001, Log Avg loss: 0.40148001, Global Avg Loss: 0.40148001, Time: 0.0543 Steps: 200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.29248101, Cur Avg Loss: 0.34863948, Log Avg loss: 0.29579894, Global Avg Loss: 0.34863948, Time: 0.0567 Steps: 400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.10132999, Cur Avg Loss: 0.31229667, Log Avg loss: 0.23961105, Global Avg Loss: 0.31229667, Time: 0.0985 Steps: 600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.13950729, Cur Avg Loss: 0.29335823, Log Avg loss: 0.23654293, Global Avg Loss: 0.29335823, Time: 0.1866 Steps: 800, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.17545666, Cur Avg Loss: 0.27670298, Log Avg loss: 0.21008197, Global Avg Loss: 0.27670298, Time: 0.0973 Steps: 1000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.26822850, Cur Avg Loss: 0.26671365, Log Avg loss: 0.21676699, Global Avg Loss: 0.26671365, Time: 0.0800 Steps: 1200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.82897341, Cur Avg Loss: 0.25604125, Log Avg loss: 0.19200686, Global Avg Loss: 0.25604125, Time: 0.0516 Steps: 1400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 1.27979147, Cur Avg Loss: 0.24788821, Log Avg loss: 0.19081693, Global Avg Loss: 0.24788821, Time: 0.0863 Steps: 1600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.19556960, Cur Avg Loss: 0.24247653, Log Avg loss: 0.19918308, Global Avg Loss: 0.24247653, Time: 0.1886 Steps: 1800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00582637, Cur Avg Loss: 0.23849391, Log Avg loss: 0.20265038, Global Avg Loss: 0.23849391, Time: 0.0508 Steps: 2000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.01589585, Cur Avg Loss: 0.23527713, Log Avg loss: 0.20310932, Global Avg Loss: 0.23527713, Time: 0.2029 Steps: 2200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.18379463, Cur Avg Loss: 0.23144049, Log Avg loss: 0.18923737, Global Avg Loss: 0.23144049, Time: 0.1778 Steps: 2400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.21953438, Cur Avg Loss: 0.22943790, Log Avg loss: 0.20540685, Global Avg Loss: 0.22943790, Time: 0.0839 Steps: 2600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00962763, Cur Avg Loss: 0.22610637, Log Avg loss: 0.18279651, Global Avg Loss: 0.22610637, Time: 0.0865 Steps: 2800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.47652316, Cur Avg Loss: 0.22282900, Log Avg loss: 0.17694574, Global Avg Loss: 0.22282900, Time: 0.0443 Steps: 3000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.04587676, Cur Avg Loss: 0.21988025, Log Avg loss: 0.17564912, Global Avg Loss: 0.21988025, Time: 0.1975 Steps: 3200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.21512105, Cur Avg Loss: 0.21708637, Log Avg loss: 0.17238429, Global Avg Loss: 0.21708637, Time: 0.1880 Steps: 3400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.02405072, Cur Avg Loss: 0.21489626, Log Avg loss: 0.17766432, Global Avg Loss: 0.21489626, Time: 0.3208 Steps: 3600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.09594756, Cur Avg Loss: 0.21348277, Log Avg loss: 0.18803999, Global Avg Loss: 0.21348277, Time: 0.0893 Steps: 3800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.15230618, Cur Avg Loss: 0.21087505, Log Avg loss: 0.16132840, Global Avg Loss: 0.21087505, Time: 0.0311 Steps: 4000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.13938347, Cur Avg Loss: 0.20910609, Log Avg loss: 0.17372684, Global Avg Loss: 0.20910609, Time: 0.0443 Steps: 4200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.26235282, Cur Avg Loss: 0.20648972, Log Avg loss: 0.15154595, Global Avg Loss: 0.20648972, Time: 0.0583 Steps: 4400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.01924840, Cur Avg Loss: 0.20507471, Log Avg loss: 0.17394439, Global Avg Loss: 0.20507471, Time: 0.0905 Steps: 4600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.33705810, Cur Avg Loss: 0.20414979, Log Avg loss: 0.18287661, Global Avg Loss: 0.20414979, Time: 0.1885 Steps: 4800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005000, Sample Num: 80000, Cur Loss: 0.20532796, Cur Avg Loss: 0.20301338, Log Avg loss: 0.17573972, Global Avg Loss: 0.20301338, Time: 0.1566 Steps: 5000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005200, Sample Num: 83200, Cur Loss: 0.12875487, Cur Avg Loss: 0.20113075, Log Avg loss: 0.15406501, Global Avg Loss: 0.20113075, Time: 0.0484 Steps: 5200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005400, Sample Num: 86400, Cur Loss: 0.12175190, Cur Avg Loss: 0.19984491, Log Avg loss: 0.16641297, Global Avg Loss: 0.19984491, Time: 0.0560 Steps: 5400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 005600, Sample Num: 89600, Cur Loss: 0.10338078, Cur Avg Loss: 0.19823920, Log Avg loss: 0.15488498, Global Avg Loss: 0.19823920, Time: 0.3053 Steps: 5600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 005800, Sample Num: 92800, Cur Loss: 0.06628632, Cur Avg Loss: 0.19735615, Log Avg loss: 0.17263085, Global Avg Loss: 0.19735615, Time: 0.0311 Steps: 5800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006000, Sample Num: 96000, Cur Loss: 0.17718686, Cur Avg Loss: 0.19607844, Log Avg loss: 0.15902490, Global Avg Loss: 0.19607844, Time: 0.1885 Steps: 6000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006200, Sample Num: 99200, Cur Loss: 0.23280247, Cur Avg Loss: 0.19480398, Log Avg loss: 0.15657018, Global Avg Loss: 0.19480398, Time: 0.0792 Steps: 6200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006400, Sample Num: 102400, Cur Loss: 0.20187314, Cur Avg Loss: 0.19426829, Log Avg loss: 0.17766182, Global Avg Loss: 0.19426829, Time: 0.0500 Steps: 6400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006600, Sample Num: 105600, Cur Loss: 0.39434195, Cur Avg Loss: 0.19307202, Log Avg loss: 0.15479139, Global Avg Loss: 0.19307202, Time: 0.2033 Steps: 6600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006800, Sample Num: 108800, Cur Loss: 0.02927214, Cur Avg Loss: 0.19159129, Log Avg loss: 0.14272717, Global Avg Loss: 0.19159129, Time: 0.0518 Steps: 6800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007000, Sample Num: 112000, Cur Loss: 0.01504154, Cur Avg Loss: 0.19017445, Log Avg loss: 0.14200189, Global Avg Loss: 0.19017445, Time: 0.0570 Steps: 7000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007200, Sample Num: 115200, Cur Loss: 0.23900086, Cur Avg Loss: 0.18922810, Log Avg loss: 0.15610596, Global Avg Loss: 0.18922810, Time: 0.0522 Steps: 7200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007400, Sample Num: 118400, Cur Loss: 0.27354768, Cur Avg Loss: 0.18802064, Log Avg loss: 0.14455208, Global Avg Loss: 0.18802064, Time: 0.0514 Steps: 7400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 007600, Sample Num: 121600, Cur Loss: 0.02900572, Cur Avg Loss: 0.18649088, Log Avg loss: 0.12988972, Global Avg Loss: 0.18649088, Time: 0.3129 Steps: 7600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 007800, Sample Num: 124800, Cur Loss: 0.15665999, Cur Avg Loss: 0.18570900, Log Avg loss: 0.15599758, Global Avg Loss: 0.18570900, Time: 0.0568 Steps: 7800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00984454, Cur Avg Loss: 0.18490514, Log Avg loss: 0.15355434, Global Avg Loss: 0.18490514, Time: 0.1769 Steps: 8000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008200, Sample Num: 131200, Cur Loss: 0.34945589, Cur Avg Loss: 0.18442594, Log Avg loss: 0.16525803, Global Avg Loss: 0.18442594, Time: 0.0796 Steps: 8200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008400, Sample Num: 134400, Cur Loss: 0.06354355, Cur Avg Loss: 0.18336985, Log Avg loss: 0.14007040, Global Avg Loss: 0.18336985, Time: 0.0899 Steps: 8400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008600, Sample Num: 137600, Cur Loss: 0.01860002, Cur Avg Loss: 0.18284961, Log Avg loss: 0.16099934, Global Avg Loss: 0.18284961, Time: 0.0592 Steps: 8600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00174786, Cur Avg Loss: 0.18228427, Log Avg loss: 0.15797459, Global Avg Loss: 0.18228427, Time: 0.1869 Steps: 8800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009000, Sample Num: 144000, Cur Loss: 0.10406580, Cur Avg Loss: 0.18135212, Log Avg loss: 0.14033767, Global Avg Loss: 0.18135212, Time: 0.1580 Steps: 9000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00457323, Cur Avg Loss: 0.18075741, Log Avg loss: 0.15399522, Global Avg Loss: 0.18075741, Time: 0.0570 Steps: 9200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00629526, Cur Avg Loss: 0.17979292, Log Avg loss: 0.13542642, Global Avg Loss: 0.17979292, Time: 0.0947 Steps: 9400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00756528, Cur Avg Loss: 0.17926003, Log Avg loss: 0.15421419, Global Avg Loss: 0.17926003, Time: 0.0600 Steps: 9600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 009800, Sample Num: 156800, Cur Loss: 0.03101701, Cur Avg Loss: 0.17829889, Log Avg loss: 0.13216425, Global Avg Loss: 0.17829889, Time: 0.1875 Steps: 9800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00892306, Cur Avg Loss: 0.17743408, Log Avg loss: 0.13505844, Global Avg Loss: 0.17743408, Time: 0.0451 Steps: 10000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010200, Sample Num: 163200, Cur Loss: 0.17613372, Cur Avg Loss: 0.17673473, Log Avg loss: 0.14176720, Global Avg Loss: 0.17673473, Time: 0.0803 Steps: 10200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010400, Sample Num: 166400, Cur Loss: 0.53111756, Cur Avg Loss: 0.17591499, Log Avg loss: 0.13410826, Global Avg Loss: 0.17591499, Time: 0.1890 Steps: 10400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00304370, Cur Avg Loss: 0.17514758, Log Avg loss: 0.13524231, Global Avg Loss: 0.17514758, Time: 0.0547 Steps: 10600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010800, Sample Num: 172800, Cur Loss: 0.05258955, Cur Avg Loss: 0.17432503, Log Avg loss: 0.13073010, Global Avg Loss: 0.17432503, Time: 0.2045 Steps: 10800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011000, Sample Num: 176000, Cur Loss: 0.05631693, Cur Avg Loss: 0.17374625, Log Avg loss: 0.14249184, Global Avg Loss: 0.17374625, Time: 0.0749 Steps: 11000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00350573, Cur Avg Loss: 0.17289609, Log Avg loss: 0.12613745, Global Avg Loss: 0.17289609, Time: 0.0871 Steps: 11200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011400, Sample Num: 182400, Cur Loss: 0.02021101, Cur Avg Loss: 0.17242288, Log Avg loss: 0.14592324, Global Avg Loss: 0.17242288, Time: 0.0957 Steps: 11400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 011600, Sample Num: 185600, Cur Loss: 0.15044969, Cur Avg Loss: 0.17154681, Log Avg loss: 0.12161033, Global Avg Loss: 0.17154681, Time: 0.2984 Steps: 11600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 011800, Sample Num: 188800, Cur Loss: 0.21386400, Cur Avg Loss: 0.17081178, Log Avg loss: 0.12818061, Global Avg Loss: 0.17081178, Time: 0.0308 Steps: 11800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012000, Sample Num: 192000, Cur Loss: 0.11160865, Cur Avg Loss: 0.17022964, Log Avg loss: 0.13588321, Global Avg Loss: 0.17022964, Time: 0.1759 Steps: 12000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012200, Sample Num: 195200, Cur Loss: 0.10568172, Cur Avg Loss: 0.16927300, Log Avg loss: 0.11187432, Global Avg Loss: 0.16927300, Time: 0.1884 Steps: 12200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012400, Sample Num: 198400, Cur Loss: 0.23057412, Cur Avg Loss: 0.16867447, Log Avg loss: 0.13216446, Global Avg Loss: 0.16867447, Time: 0.1023 Steps: 12400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012600, Sample Num: 201600, Cur Loss: 0.05801089, Cur Avg Loss: 0.16812165, Log Avg loss: 0.13384688, Global Avg Loss: 0.16812165, Time: 0.3148 Steps: 12600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012800, Sample Num: 204800, Cur Loss: 0.01404317, Cur Avg Loss: 0.16757268, Log Avg loss: 0.13298711, Global Avg Loss: 0.16757268, Time: 0.0923 Steps: 12800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013000, Sample Num: 208000, Cur Loss: 0.02951513, Cur Avg Loss: 0.16709483, Log Avg loss: 0.13651247, Global Avg Loss: 0.16709483, Time: 0.0499 Steps: 13000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013200, Sample Num: 211200, Cur Loss: 0.56578565, Cur Avg Loss: 0.16677038, Log Avg loss: 0.14568108, Global Avg Loss: 0.16677038, Time: 0.2980 Steps: 13200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013400, Sample Num: 214400, Cur Loss: 0.00883069, Cur Avg Loss: 0.16636739, Log Avg loss: 0.13977062, Global Avg Loss: 0.16636739, Time: 0.0511 Steps: 13400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 013600, Sample Num: 217600, Cur Loss: 0.12040567, Cur Avg Loss: 0.16582810, Log Avg loss: 0.12969515, Global Avg Loss: 0.16582810, Time: 0.3143 Steps: 13600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 013800, Sample Num: 220800, Cur Loss: 0.00291228, Cur Avg Loss: 0.16526516, Log Avg loss: 0.12698566, Global Avg Loss: 0.16526516, Time: 0.1870 Steps: 13800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014000, Sample Num: 224000, Cur Loss: 0.21468113, Cur Avg Loss: 0.16484914, Log Avg loss: 0.13614374, Global Avg Loss: 0.16484914, Time: 0.1701 Steps: 14000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014200, Sample Num: 227200, Cur Loss: 0.04806960, Cur Avg Loss: 0.16441162, Log Avg loss: 0.13378490, Global Avg Loss: 0.16441162, Time: 0.0370 Steps: 14200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014400, Sample Num: 230400, Cur Loss: 0.22066465, Cur Avg Loss: 0.16408874, Log Avg loss: 0.14116447, Global Avg Loss: 0.16408874, Time: 0.0315 Steps: 14400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014600, Sample Num: 233600, Cur Loss: 0.02139662, Cur Avg Loss: 0.16377085, Log Avg loss: 0.14088295, Global Avg Loss: 0.16377085, Time: 0.0437 Steps: 14600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014800, Sample Num: 236800, Cur Loss: 0.08527422, Cur Avg Loss: 0.16319725, Log Avg loss: 0.12132398, Global Avg Loss: 0.16319725, Time: 0.0593 Steps: 14800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015000, Sample Num: 240000, Cur Loss: 0.21759358, Cur Avg Loss: 0.16275481, Log Avg loss: 0.13001410, Global Avg Loss: 0.16275481, Time: 0.0347 Steps: 15000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015200, Sample Num: 243200, Cur Loss: 0.27492490, Cur Avg Loss: 0.16226920, Log Avg loss: 0.12584864, Global Avg Loss: 0.16226920, Time: 0.0894 Steps: 15200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015400, Sample Num: 246400, Cur Loss: 0.01130292, Cur Avg Loss: 0.16198849, Log Avg loss: 0.14065469, Global Avg Loss: 0.16198849, Time: 0.1887 Steps: 15400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015600, Sample Num: 249600, Cur Loss: 0.12554041, Cur Avg Loss: 0.16149325, Log Avg loss: 0.12335986, Global Avg Loss: 0.16149325, Time: 0.0340 Steps: 15600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 015800, Sample Num: 252800, Cur Loss: 0.08381360, Cur Avg Loss: 0.16097130, Log Avg loss: 0.12025871, Global Avg Loss: 0.16097130, Time: 0.0955 Steps: 15800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016000, Sample Num: 256000, Cur Loss: 0.10954742, Cur Avg Loss: 0.16069044, Log Avg loss: 0.13850314, Global Avg Loss: 0.16069044, Time: 0.0729 Steps: 16000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016200, Sample Num: 259200, Cur Loss: 0.00117556, Cur Avg Loss: 0.16036128, Log Avg loss: 0.13402791, Global Avg Loss: 0.16036128, Time: 0.1865 Steps: 16200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016400, Sample Num: 262400, Cur Loss: 0.00113464, Cur Avg Loss: 0.15974352, Log Avg loss: 0.10970520, Global Avg Loss: 0.15974352, Time: 0.0405 Steps: 16400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016600, Sample Num: 265600, Cur Loss: 0.11580677, Cur Avg Loss: 0.15931087, Log Avg loss: 0.12383328, Global Avg Loss: 0.15931087, Time: 0.0972 Steps: 16600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016800, Sample Num: 268800, Cur Loss: 0.17266318, Cur Avg Loss: 0.15884795, Log Avg loss: 0.12042593, Global Avg Loss: 0.15884795, Time: 0.1883 Steps: 16800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017000, Sample Num: 272000, Cur Loss: 0.01218872, Cur Avg Loss: 0.15838197, Log Avg loss: 0.11923953, Global Avg Loss: 0.15838197, Time: 0.0884 Steps: 17000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017200, Sample Num: 275200, Cur Loss: 0.00815976, Cur Avg Loss: 0.15803915, Log Avg loss: 0.12889963, Global Avg Loss: 0.15803915, Time: 0.1056 Steps: 17200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017400, Sample Num: 278400, Cur Loss: 0.03648641, Cur Avg Loss: 0.15752916, Log Avg loss: 0.11366998, Global Avg Loss: 0.15752916, Time: 0.0855 Steps: 17400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017600, Sample Num: 281600, Cur Loss: 0.68777645, Cur Avg Loss: 0.15736645, Log Avg loss: 0.14321079, Global Avg Loss: 0.15736645, Time: 0.0508 Steps: 17600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 017800, Sample Num: 284800, Cur Loss: 0.00158493, Cur Avg Loss: 0.15683795, Log Avg loss: 0.11033005, Global Avg Loss: 0.15683795, Time: 0.0613 Steps: 17800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018000, Sample Num: 288000, Cur Loss: 0.13885887, Cur Avg Loss: 0.15652434, Log Avg loss: 0.12861285, Global Avg Loss: 0.15652434, Time: 0.0536 Steps: 18000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018200, Sample Num: 291200, Cur Loss: 0.13217564, Cur Avg Loss: 0.15606300, Log Avg loss: 0.11454222, Global Avg Loss: 0.15606300, Time: 0.0743 Steps: 18200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018400, Sample Num: 294400, Cur Loss: 0.00468826, Cur Avg Loss: 0.15557511, Log Avg loss: 0.11117764, Global Avg Loss: 0.15557511, Time: 0.0680 Steps: 18400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018600, Sample Num: 297600, Cur Loss: 0.00423494, Cur Avg Loss: 0.15526240, Log Avg loss: 0.12649266, Global Avg Loss: 0.15526240, Time: 0.2009 Steps: 18600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018800, Sample Num: 300800, Cur Loss: 0.48413834, Cur Avg Loss: 0.15510697, Log Avg loss: 0.14065210, Global Avg Loss: 0.15510697, Time: 0.0382 Steps: 18800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019000, Sample Num: 304000, Cur Loss: 0.03570086, Cur Avg Loss: 0.15480974, Log Avg loss: 0.12687048, Global Avg Loss: 0.15480974, Time: 0.0541 Steps: 19000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019200, Sample Num: 307200, Cur Loss: 0.01033264, Cur Avg Loss: 0.15448647, Log Avg loss: 0.12377510, Global Avg Loss: 0.15448647, Time: 0.0797 Steps: 19200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019400, Sample Num: 310400, Cur Loss: 0.01261618, Cur Avg Loss: 0.15419831, Log Avg loss: 0.12653488, Global Avg Loss: 0.15419831, Time: 0.0743 Steps: 19400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019600, Sample Num: 313600, Cur Loss: 0.01830202, Cur Avg Loss: 0.15378980, Log Avg loss: 0.11416430, Global Avg Loss: 0.15378980, Time: 0.0324 Steps: 19600, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 019800, Sample Num: 316800, Cur Loss: 0.02009680, Cur Avg Loss: 0.15345779, Log Avg loss: 0.12092102, Global Avg Loss: 0.15345779, Time: 0.0547 Steps: 19800, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 020000, Sample Num: 320000, Cur Loss: 0.29796124, Cur Avg Loss: 0.15312588, Log Avg loss: 0.12026677, Global Avg Loss: 0.15312588, Time: 0.2994 Steps: 20000, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 020200, Sample Num: 323200, Cur Loss: 0.26915106, Cur Avg Loss: 0.15269592, Log Avg loss: 0.10970002, Global Avg Loss: 0.15269592, Time: 0.0746 Steps: 20200, Updated lr: 0.000090 ***** Running evaluation checkpoint-20320 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-20320 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2774.472027, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.103727, "eval_total_loss": 281.099157, "eval_acc": 0.96602, "eval_prec": 0.967925, "eval_recall": 0.964138, "eval_f1": 0.966028, "eval_roc_auc": 0.993452, "eval_pr_auc": 0.991221, "eval_confusion_matrix": {"tn": 20933, "fp": 694, "fn": 779, "tp": 20943}, "eval_mcc2": 0.932047, "eval_mcc": 0.932047, "eval_sn": 0.964138, "eval_sp": 0.96791, "update_flag": true, "test_avg_loss": 0.103995, "test_total_loss": 422.636221, "test_acc": 0.966427, "test_prec": 0.967384, "test_recall": 0.96539, "test_f1": 0.966386, "test_roc_auc": 0.993408, "test_pr_auc": 0.99117, "test_confusion_matrix": {"tn": 31459, "fp": 1058, "fn": 1125, "tp": 31380}, "test_mcc2": 0.932855, "test_mcc": 0.932855, "test_sn": 0.96539, "test_sp": 0.967463, "lr": 9.008866995073892e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.152668972341872, "train_cur_epoch_loss": 3102.2335179868387, "train_cur_epoch_avg_loss": 0.152668972341872, "train_cur_epoch_time": 2774.4720273017883, "train_cur_epoch_avg_time": 0.1365389777215447, "epoch": 1, "step": 20320} ################################################## Training, Epoch: 0002, Batch: 000080, Sample Num: 1280, Cur Loss: 0.00395781, Cur Avg Loss: 0.11476596, Log Avg loss: 0.13478618, Global Avg Loss: 0.15252033, Time: 0.0888 Steps: 20400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000280, Sample Num: 4480, Cur Loss: 0.16762950, Cur Avg Loss: 0.10798937, Log Avg loss: 0.10527873, Global Avg Loss: 0.15206168, Time: 0.0459 Steps: 20600, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000480, Sample Num: 7680, Cur Loss: 0.00140357, Cur Avg Loss: 0.10970302, Log Avg loss: 0.11210213, Global Avg Loss: 0.15167745, Time: 0.1908 Steps: 20800, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000680, Sample Num: 10880, Cur Loss: 0.21723068, Cur Avg Loss: 0.11380210, Log Avg loss: 0.12363988, Global Avg Loss: 0.15141043, Time: 0.0853 Steps: 21000, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000880, Sample Num: 14080, Cur Loss: 0.14639109, Cur Avg Loss: 0.11310615, Log Avg loss: 0.11073993, Global Avg Loss: 0.15102674, Time: 0.0858 Steps: 21200, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 001080, Sample Num: 17280, Cur Loss: 0.00286593, Cur Avg Loss: 0.11439957, Log Avg loss: 0.12009062, Global Avg Loss: 0.15073762, Time: 0.1895 Steps: 21400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 001280, Sample Num: 20480, Cur Loss: 0.00118824, Cur Avg Loss: 0.11636784, Log Avg loss: 0.12699647, Global Avg Loss: 0.15051779, Time: 0.3112 Steps: 21600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001480, Sample Num: 23680, Cur Loss: 0.16987361, Cur Avg Loss: 0.11732267, Log Avg loss: 0.12343362, Global Avg Loss: 0.15026932, Time: 0.2150 Steps: 21800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001680, Sample Num: 26880, Cur Loss: 0.00212852, Cur Avg Loss: 0.11855517, Log Avg loss: 0.12767563, Global Avg Loss: 0.15006392, Time: 0.1458 Steps: 22000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001880, Sample Num: 30080, Cur Loss: 0.00797351, Cur Avg Loss: 0.11889320, Log Avg loss: 0.12173268, Global Avg Loss: 0.14980868, Time: 0.0517 Steps: 22200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002080, Sample Num: 33280, Cur Loss: 0.03776523, Cur Avg Loss: 0.11894351, Log Avg loss: 0.11941640, Global Avg Loss: 0.14953732, Time: 0.2958 Steps: 22400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002280, Sample Num: 36480, Cur Loss: 0.09901439, Cur Avg Loss: 0.11853052, Log Avg loss: 0.11423542, Global Avg Loss: 0.14922492, Time: 0.0555 Steps: 22600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002480, Sample Num: 39680, Cur Loss: 0.01226296, Cur Avg Loss: 0.11938708, Log Avg loss: 0.12915189, Global Avg Loss: 0.14904884, Time: 0.0754 Steps: 22800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002680, Sample Num: 42880, Cur Loss: 0.00489242, Cur Avg Loss: 0.12082565, Log Avg loss: 0.13866390, Global Avg Loss: 0.14895853, Time: 0.0888 Steps: 23000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002880, Sample Num: 46080, Cur Loss: 0.02133809, Cur Avg Loss: 0.11950736, Log Avg loss: 0.10184235, Global Avg Loss: 0.14855236, Time: 0.1315 Steps: 23200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 003080, Sample Num: 49280, Cur Loss: 0.00259756, Cur Avg Loss: 0.11901980, Log Avg loss: 0.11199890, Global Avg Loss: 0.14823994, Time: 0.1048 Steps: 23400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 003280, Sample Num: 52480, Cur Loss: 0.00385328, Cur Avg Loss: 0.11869344, Log Avg loss: 0.11366745, Global Avg Loss: 0.14794695, Time: 0.2110 Steps: 23600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003480, Sample Num: 55680, Cur Loss: 0.12803665, Cur Avg Loss: 0.11861877, Log Avg loss: 0.11739428, Global Avg Loss: 0.14769020, Time: 0.1886 Steps: 23800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003680, Sample Num: 58880, Cur Loss: 0.00322087, Cur Avg Loss: 0.11831321, Log Avg loss: 0.11299638, Global Avg Loss: 0.14740109, Time: 0.0900 Steps: 24000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003880, Sample Num: 62080, Cur Loss: 0.06358353, Cur Avg Loss: 0.11785396, Log Avg loss: 0.10940387, Global Avg Loss: 0.14708706, Time: 0.1760 Steps: 24200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004080, Sample Num: 65280, Cur Loss: 0.00725005, Cur Avg Loss: 0.11831098, Log Avg loss: 0.12717708, Global Avg Loss: 0.14692387, Time: 0.0497 Steps: 24400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004280, Sample Num: 68480, Cur Loss: 0.00355352, Cur Avg Loss: 0.11802174, Log Avg loss: 0.11212116, Global Avg Loss: 0.14664092, Time: 0.0762 Steps: 24600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004480, Sample Num: 71680, Cur Loss: 0.14712244, Cur Avg Loss: 0.11833650, Log Avg loss: 0.12507241, Global Avg Loss: 0.14646698, Time: 0.3718 Steps: 24800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004680, Sample Num: 74880, Cur Loss: 0.00931692, Cur Avg Loss: 0.11849575, Log Avg loss: 0.12206304, Global Avg Loss: 0.14627175, Time: 0.2033 Steps: 25000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004880, Sample Num: 78080, Cur Loss: 0.00336923, Cur Avg Loss: 0.11820429, Log Avg loss: 0.11138399, Global Avg Loss: 0.14599486, Time: 0.1870 Steps: 25200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 005080, Sample Num: 81280, Cur Loss: 0.29914266, Cur Avg Loss: 0.11832946, Log Avg loss: 0.12138360, Global Avg Loss: 0.14580107, Time: 0.0552 Steps: 25400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 005280, Sample Num: 84480, Cur Loss: 0.03449215, Cur Avg Loss: 0.11839480, Log Avg loss: 0.12005447, Global Avg Loss: 0.14559992, Time: 0.3423 Steps: 25600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005480, Sample Num: 87680, Cur Loss: 0.32432640, Cur Avg Loss: 0.11816336, Log Avg loss: 0.11205353, Global Avg Loss: 0.14533987, Time: 0.0596 Steps: 25800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005680, Sample Num: 90880, Cur Loss: 0.13537018, Cur Avg Loss: 0.11806678, Log Avg loss: 0.11542025, Global Avg Loss: 0.14510972, Time: 0.2043 Steps: 26000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005880, Sample Num: 94080, Cur Loss: 0.00254600, Cur Avg Loss: 0.11820060, Log Avg loss: 0.12200110, Global Avg Loss: 0.14493332, Time: 0.0895 Steps: 26200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006080, Sample Num: 97280, Cur Loss: 0.00361744, Cur Avg Loss: 0.11840571, Log Avg loss: 0.12443611, Global Avg Loss: 0.14477804, Time: 0.2991 Steps: 26400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006280, Sample Num: 100480, Cur Loss: 0.17565484, Cur Avg Loss: 0.11836996, Log Avg loss: 0.11728320, Global Avg Loss: 0.14457131, Time: 0.2019 Steps: 26600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006480, Sample Num: 103680, Cur Loss: 0.19469102, Cur Avg Loss: 0.11843398, Log Avg loss: 0.12044411, Global Avg Loss: 0.14439126, Time: 0.3140 Steps: 26800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006680, Sample Num: 106880, Cur Loss: 0.10213099, Cur Avg Loss: 0.11739274, Log Avg loss: 0.08365650, Global Avg Loss: 0.14394137, Time: 0.0525 Steps: 27000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006880, Sample Num: 110080, Cur Loss: 0.00396138, Cur Avg Loss: 0.11716879, Log Avg loss: 0.10968885, Global Avg Loss: 0.14368951, Time: 0.0959 Steps: 27200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007080, Sample Num: 113280, Cur Loss: 0.27290031, Cur Avg Loss: 0.11689288, Log Avg loss: 0.10740161, Global Avg Loss: 0.14342464, Time: 0.2039 Steps: 27400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007280, Sample Num: 116480, Cur Loss: 0.00078234, Cur Avg Loss: 0.11689981, Log Avg loss: 0.11714516, Global Avg Loss: 0.14323421, Time: 0.0775 Steps: 27600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007480, Sample Num: 119680, Cur Loss: 0.06050779, Cur Avg Loss: 0.11642825, Log Avg loss: 0.09926362, Global Avg Loss: 0.14291787, Time: 0.0847 Steps: 27800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 007680, Sample Num: 122880, Cur Loss: 0.00865698, Cur Avg Loss: 0.11628682, Log Avg loss: 0.11099718, Global Avg Loss: 0.14268987, Time: 0.0588 Steps: 28000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 007880, Sample Num: 126080, Cur Loss: 0.17819712, Cur Avg Loss: 0.11644863, Log Avg loss: 0.12266209, Global Avg Loss: 0.14254783, Time: 0.0493 Steps: 28200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008080, Sample Num: 129280, Cur Loss: 0.00426740, Cur Avg Loss: 0.11614140, Log Avg loss: 0.10403657, Global Avg Loss: 0.14227662, Time: 0.2948 Steps: 28400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008280, Sample Num: 132480, Cur Loss: 0.02035175, Cur Avg Loss: 0.11644130, Log Avg loss: 0.12855736, Global Avg Loss: 0.14218068, Time: 0.1896 Steps: 28600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008480, Sample Num: 135680, Cur Loss: 0.24258748, Cur Avg Loss: 0.11638338, Log Avg loss: 0.11398540, Global Avg Loss: 0.14198488, Time: 0.0347 Steps: 28800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008680, Sample Num: 138880, Cur Loss: 0.01736907, Cur Avg Loss: 0.11626123, Log Avg loss: 0.11108216, Global Avg Loss: 0.14177176, Time: 0.0306 Steps: 29000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008880, Sample Num: 142080, Cur Loss: 0.15613776, Cur Avg Loss: 0.11641098, Log Avg loss: 0.12291008, Global Avg Loss: 0.14164257, Time: 0.1895 Steps: 29200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009080, Sample Num: 145280, Cur Loss: 0.31136280, Cur Avg Loss: 0.11595187, Log Avg loss: 0.09556742, Global Avg Loss: 0.14132913, Time: 0.2039 Steps: 29400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009280, Sample Num: 148480, Cur Loss: 0.07762240, Cur Avg Loss: 0.11558564, Log Avg loss: 0.09895855, Global Avg Loss: 0.14104285, Time: 0.3115 Steps: 29600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009480, Sample Num: 151680, Cur Loss: 0.02486900, Cur Avg Loss: 0.11553814, Log Avg loss: 0.11333427, Global Avg Loss: 0.14085688, Time: 0.3093 Steps: 29800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 009680, Sample Num: 154880, Cur Loss: 0.17253155, Cur Avg Loss: 0.11535871, Log Avg loss: 0.10685391, Global Avg Loss: 0.14063020, Time: 0.0868 Steps: 30000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 009880, Sample Num: 158080, Cur Loss: 0.00309062, Cur Avg Loss: 0.11525763, Log Avg loss: 0.11036538, Global Avg Loss: 0.14042977, Time: 0.0582 Steps: 30200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010080, Sample Num: 161280, Cur Loss: 0.00794394, Cur Avg Loss: 0.11495793, Log Avg loss: 0.10015242, Global Avg Loss: 0.14016478, Time: 0.2178 Steps: 30400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010280, Sample Num: 164480, Cur Loss: 0.11802208, Cur Avg Loss: 0.11466747, Log Avg loss: 0.10002860, Global Avg Loss: 0.13990246, Time: 0.0488 Steps: 30600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010480, Sample Num: 167680, Cur Loss: 0.00482970, Cur Avg Loss: 0.11433282, Log Avg loss: 0.09713147, Global Avg Loss: 0.13962472, Time: 0.2985 Steps: 30800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010680, Sample Num: 170880, Cur Loss: 0.09273405, Cur Avg Loss: 0.11414011, Log Avg loss: 0.10404229, Global Avg Loss: 0.13939516, Time: 0.0515 Steps: 31000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010880, Sample Num: 174080, Cur Loss: 0.00113000, Cur Avg Loss: 0.11367041, Log Avg loss: 0.08858839, Global Avg Loss: 0.13906947, Time: 0.1985 Steps: 31200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011080, Sample Num: 177280, Cur Loss: 0.00527938, Cur Avg Loss: 0.11380965, Log Avg loss: 0.12138441, Global Avg Loss: 0.13895683, Time: 0.0531 Steps: 31400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011280, Sample Num: 180480, Cur Loss: 0.00169910, Cur Avg Loss: 0.11357524, Log Avg loss: 0.10058871, Global Avg Loss: 0.13871399, Time: 0.2982 Steps: 31600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011480, Sample Num: 183680, Cur Loss: 0.07929727, Cur Avg Loss: 0.11340365, Log Avg loss: 0.10372625, Global Avg Loss: 0.13849394, Time: 0.0512 Steps: 31800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 011680, Sample Num: 186880, Cur Loss: 0.00897201, Cur Avg Loss: 0.11328089, Log Avg loss: 0.10623446, Global Avg Loss: 0.13829232, Time: 0.2986 Steps: 32000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 011880, Sample Num: 190080, Cur Loss: 0.01742578, Cur Avg Loss: 0.11303329, Log Avg loss: 0.09857315, Global Avg Loss: 0.13804562, Time: 0.0468 Steps: 32200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012080, Sample Num: 193280, Cur Loss: 0.00355922, Cur Avg Loss: 0.11261685, Log Avg loss: 0.08788066, Global Avg Loss: 0.13773596, Time: 0.0883 Steps: 32400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012280, Sample Num: 196480, Cur Loss: 0.27231970, Cur Avg Loss: 0.11239772, Log Avg loss: 0.09916222, Global Avg Loss: 0.13749931, Time: 0.0308 Steps: 32600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012480, Sample Num: 199680, Cur Loss: 0.01210989, Cur Avg Loss: 0.11221711, Log Avg loss: 0.10112767, Global Avg Loss: 0.13727753, Time: 0.0341 Steps: 32800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012680, Sample Num: 202880, Cur Loss: 0.00966122, Cur Avg Loss: 0.11195550, Log Avg loss: 0.09563095, Global Avg Loss: 0.13702513, Time: 0.0630 Steps: 33000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012880, Sample Num: 206080, Cur Loss: 0.00137119, Cur Avg Loss: 0.11178260, Log Avg loss: 0.10082100, Global Avg Loss: 0.13680703, Time: 0.0949 Steps: 33200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013080, Sample Num: 209280, Cur Loss: 0.02110461, Cur Avg Loss: 0.11178600, Log Avg loss: 0.11200444, Global Avg Loss: 0.13665851, Time: 0.0573 Steps: 33400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013280, Sample Num: 212480, Cur Loss: 0.00141023, Cur Avg Loss: 0.11186032, Log Avg loss: 0.11672080, Global Avg Loss: 0.13653984, Time: 0.0647 Steps: 33600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013480, Sample Num: 215680, Cur Loss: 0.27380216, Cur Avg Loss: 0.11172530, Log Avg loss: 0.10276051, Global Avg Loss: 0.13633996, Time: 0.0952 Steps: 33800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 013680, Sample Num: 218880, Cur Loss: 0.00372108, Cur Avg Loss: 0.11130700, Log Avg loss: 0.08311308, Global Avg Loss: 0.13602686, Time: 0.0515 Steps: 34000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 013880, Sample Num: 222080, Cur Loss: 0.05302943, Cur Avg Loss: 0.11116054, Log Avg loss: 0.10114312, Global Avg Loss: 0.13582286, Time: 0.0612 Steps: 34200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014080, Sample Num: 225280, Cur Loss: 0.22863644, Cur Avg Loss: 0.11128764, Log Avg loss: 0.12010782, Global Avg Loss: 0.13573149, Time: 0.0873 Steps: 34400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014280, Sample Num: 228480, Cur Loss: 0.00273178, Cur Avg Loss: 0.11118884, Log Avg loss: 0.10423357, Global Avg Loss: 0.13554943, Time: 0.2987 Steps: 34600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014480, Sample Num: 231680, Cur Loss: 0.16194250, Cur Avg Loss: 0.11101817, Log Avg loss: 0.09883257, Global Avg Loss: 0.13533841, Time: 0.1020 Steps: 34800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014680, Sample Num: 234880, Cur Loss: 0.07453015, Cur Avg Loss: 0.11076681, Log Avg loss: 0.09256814, Global Avg Loss: 0.13509401, Time: 0.1884 Steps: 35000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014880, Sample Num: 238080, Cur Loss: 0.00463596, Cur Avg Loss: 0.11070657, Log Avg loss: 0.10628503, Global Avg Loss: 0.13493032, Time: 0.1567 Steps: 35200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015080, Sample Num: 241280, Cur Loss: 0.01784889, Cur Avg Loss: 0.11046581, Log Avg loss: 0.09255342, Global Avg Loss: 0.13469090, Time: 0.0365 Steps: 35400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015280, Sample Num: 244480, Cur Loss: 0.00447302, Cur Avg Loss: 0.11035322, Log Avg loss: 0.10186342, Global Avg Loss: 0.13450648, Time: 0.0834 Steps: 35600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015480, Sample Num: 247680, Cur Loss: 0.01049120, Cur Avg Loss: 0.11033287, Log Avg loss: 0.10877868, Global Avg Loss: 0.13436275, Time: 0.1881 Steps: 35800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 015680, Sample Num: 250880, Cur Loss: 0.00389248, Cur Avg Loss: 0.11009315, Log Avg loss: 0.09153832, Global Avg Loss: 0.13412483, Time: 0.0546 Steps: 36000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 015880, Sample Num: 254080, Cur Loss: 0.00876391, Cur Avg Loss: 0.10996881, Log Avg loss: 0.10022051, Global Avg Loss: 0.13393752, Time: 0.1899 Steps: 36200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016080, Sample Num: 257280, Cur Loss: 0.01005019, Cur Avg Loss: 0.11005696, Log Avg loss: 0.11705646, Global Avg Loss: 0.13384477, Time: 0.1822 Steps: 36400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016280, Sample Num: 260480, Cur Loss: 0.01583827, Cur Avg Loss: 0.11003797, Log Avg loss: 0.10851072, Global Avg Loss: 0.13370633, Time: 0.0992 Steps: 36600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016480, Sample Num: 263680, Cur Loss: 0.03043424, Cur Avg Loss: 0.10977362, Log Avg loss: 0.08825575, Global Avg Loss: 0.13345931, Time: 0.3147 Steps: 36800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016680, Sample Num: 266880, Cur Loss: 0.15648574, Cur Avg Loss: 0.10971244, Log Avg loss: 0.10467161, Global Avg Loss: 0.13330370, Time: 0.0578 Steps: 37000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016880, Sample Num: 270080, Cur Loss: 0.00332507, Cur Avg Loss: 0.10954541, Log Avg loss: 0.09561527, Global Avg Loss: 0.13310108, Time: 0.3141 Steps: 37200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017080, Sample Num: 273280, Cur Loss: 0.00156888, Cur Avg Loss: 0.10953863, Log Avg loss: 0.10896622, Global Avg Loss: 0.13297202, Time: 0.0686 Steps: 37400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017280, Sample Num: 276480, Cur Loss: 0.00073607, Cur Avg Loss: 0.10957562, Log Avg loss: 0.11273451, Global Avg Loss: 0.13286437, Time: 0.1895 Steps: 37600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017480, Sample Num: 279680, Cur Loss: 0.28779715, Cur Avg Loss: 0.10953761, Log Avg loss: 0.10625334, Global Avg Loss: 0.13272357, Time: 0.0954 Steps: 37800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 017680, Sample Num: 282880, Cur Loss: 0.32033473, Cur Avg Loss: 0.10968189, Log Avg loss: 0.12229201, Global Avg Loss: 0.13266867, Time: 0.0931 Steps: 38000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 017880, Sample Num: 286080, Cur Loss: 0.08060064, Cur Avg Loss: 0.10947148, Log Avg loss: 0.09087152, Global Avg Loss: 0.13244983, Time: 0.0920 Steps: 38200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018080, Sample Num: 289280, Cur Loss: 0.02298199, Cur Avg Loss: 0.10955529, Log Avg loss: 0.11704786, Global Avg Loss: 0.13236961, Time: 0.3046 Steps: 38400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018280, Sample Num: 292480, Cur Loss: 0.00106047, Cur Avg Loss: 0.10968862, Log Avg loss: 0.12174120, Global Avg Loss: 0.13231455, Time: 0.0620 Steps: 38600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018480, Sample Num: 295680, Cur Loss: 0.00439481, Cur Avg Loss: 0.10965165, Log Avg loss: 0.10627256, Global Avg Loss: 0.13218031, Time: 0.2013 Steps: 38800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018680, Sample Num: 298880, Cur Loss: 0.00295794, Cur Avg Loss: 0.10958463, Log Avg loss: 0.10339244, Global Avg Loss: 0.13203268, Time: 0.0517 Steps: 39000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018880, Sample Num: 302080, Cur Loss: 0.26645678, Cur Avg Loss: 0.10976644, Log Avg loss: 0.12674702, Global Avg Loss: 0.13200571, Time: 0.2038 Steps: 39200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019080, Sample Num: 305280, Cur Loss: 0.00309661, Cur Avg Loss: 0.10975798, Log Avg loss: 0.10895918, Global Avg Loss: 0.13188872, Time: 0.0894 Steps: 39400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019280, Sample Num: 308480, Cur Loss: 0.00112430, Cur Avg Loss: 0.10970916, Log Avg loss: 0.10505183, Global Avg Loss: 0.13175318, Time: 0.0310 Steps: 39600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019480, Sample Num: 311680, Cur Loss: 0.01173894, Cur Avg Loss: 0.10957593, Log Avg loss: 0.09673254, Global Avg Loss: 0.13157720, Time: 0.0861 Steps: 39800, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 019680, Sample Num: 314880, Cur Loss: 0.00379920, Cur Avg Loss: 0.10944742, Log Avg loss: 0.09693048, Global Avg Loss: 0.13140397, Time: 0.0594 Steps: 40000, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 019880, Sample Num: 318080, Cur Loss: 0.35119167, Cur Avg Loss: 0.10931472, Log Avg loss: 0.09625753, Global Avg Loss: 0.13122911, Time: 0.1876 Steps: 40200, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 020080, Sample Num: 321280, Cur Loss: 0.03389803, Cur Avg Loss: 0.10904958, Log Avg loss: 0.08269497, Global Avg Loss: 0.13098884, Time: 0.0548 Steps: 40400, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 020280, Sample Num: 324480, Cur Loss: 0.47252283, Cur Avg Loss: 0.10910736, Log Avg loss: 0.11490781, Global Avg Loss: 0.13090962, Time: 0.0584 Steps: 40600, Updated lr: 0.000080 ***** Running evaluation checkpoint-40640 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-40640 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2767.119551, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.084041, "eval_total_loss": 227.749756, "eval_acc": 0.974532, "eval_prec": 0.974457, "eval_recall": 0.974726, "eval_f1": 0.974591, "eval_roc_auc": 0.995822, "eval_pr_auc": 0.994867, "eval_confusion_matrix": {"tn": 21072, "fp": 555, "fn": 549, "tp": 21173}, "eval_mcc2": 0.949064, "eval_mcc": 0.949064, "eval_sn": 0.974726, "eval_sp": 0.974338, "update_flag": true, "test_avg_loss": 0.082822, "test_total_loss": 336.587098, "test_acc": 0.97547, "test_prec": 0.975568, "test_recall": 0.975358, "test_f1": 0.975463, "test_roc_auc": 0.995826, "test_pr_auc": 0.994865, "test_confusion_matrix": {"tn": 31723, "fp": 794, "fn": 801, "tp": 31704}, "test_mcc2": 0.95094, "test_mcc": 0.95094, "test_sn": 0.975358, "test_sp": 0.975582, "lr": 8.007881773399016e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.13085179088269167, "train_cur_epoch_loss": 2215.5832634857506, "train_cur_epoch_avg_loss": 0.10903460942351136, "train_cur_epoch_time": 2767.1195509433746, "train_cur_epoch_avg_time": 0.13617714325508734, "epoch": 2, "step": 40640} ################################################## Training, Epoch: 0003, Batch: 000160, Sample Num: 2560, Cur Loss: 0.00097973, Cur Avg Loss: 0.08900471, Log Avg loss: 0.08563407, Global Avg Loss: 0.13068768, Time: 0.0792 Steps: 40800, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000360, Sample Num: 5760, Cur Loss: 0.49179530, Cur Avg Loss: 0.09670902, Log Avg loss: 0.10287246, Global Avg Loss: 0.13055200, Time: 0.0571 Steps: 41000, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00300925, Cur Avg Loss: 0.09638596, Log Avg loss: 0.09580446, Global Avg Loss: 0.13038332, Time: 0.0997 Steps: 41200, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000760, Sample Num: 12160, Cur Loss: 0.11376596, Cur Avg Loss: 0.09801260, Log Avg loss: 0.10256720, Global Avg Loss: 0.13024895, Time: 0.1575 Steps: 41400, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000960, Sample Num: 15360, Cur Loss: 0.20149904, Cur Avg Loss: 0.09779713, Log Avg loss: 0.09697832, Global Avg Loss: 0.13008899, Time: 0.0367 Steps: 41600, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 001160, Sample Num: 18560, Cur Loss: 0.01847120, Cur Avg Loss: 0.09806655, Log Avg loss: 0.09935978, Global Avg Loss: 0.12994196, Time: 0.1885 Steps: 41800, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00417920, Cur Avg Loss: 0.09590271, Log Avg loss: 0.08335241, Global Avg Loss: 0.12972011, Time: 0.0365 Steps: 42000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001560, Sample Num: 24960, Cur Loss: 0.29708424, Cur Avg Loss: 0.09550769, Log Avg loss: 0.09282159, Global Avg Loss: 0.12954523, Time: 0.0867 Steps: 42200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001760, Sample Num: 28160, Cur Loss: 0.01343754, Cur Avg Loss: 0.09676818, Log Avg loss: 0.10659999, Global Avg Loss: 0.12943700, Time: 0.0810 Steps: 42400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001960, Sample Num: 31360, Cur Loss: 0.01087959, Cur Avg Loss: 0.09645814, Log Avg loss: 0.09372982, Global Avg Loss: 0.12926936, Time: 0.0996 Steps: 42600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002160, Sample Num: 34560, Cur Loss: 0.08516777, Cur Avg Loss: 0.09779575, Log Avg loss: 0.11090427, Global Avg Loss: 0.12918354, Time: 0.2982 Steps: 42800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00082409, Cur Avg Loss: 0.09752323, Log Avg loss: 0.09457999, Global Avg Loss: 0.12902260, Time: 0.2455 Steps: 43000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002560, Sample Num: 40960, Cur Loss: 0.28105286, Cur Avg Loss: 0.09779707, Log Avg loss: 0.10102840, Global Avg Loss: 0.12889299, Time: 0.2678 Steps: 43200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002760, Sample Num: 44160, Cur Loss: 0.04123560, Cur Avg Loss: 0.09838189, Log Avg loss: 0.10586761, Global Avg Loss: 0.12878688, Time: 0.2361 Steps: 43400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002960, Sample Num: 47360, Cur Loss: 0.03395471, Cur Avg Loss: 0.09766544, Log Avg loss: 0.08777840, Global Avg Loss: 0.12859877, Time: 0.0504 Steps: 43600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00158754, Cur Avg Loss: 0.09674589, Log Avg loss: 0.08313655, Global Avg Loss: 0.12839118, Time: 0.0886 Steps: 43800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 003360, Sample Num: 53760, Cur Loss: 0.07294620, Cur Avg Loss: 0.09686928, Log Avg loss: 0.09881887, Global Avg Loss: 0.12825676, Time: 0.1030 Steps: 44000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003560, Sample Num: 56960, Cur Loss: 0.05435934, Cur Avg Loss: 0.09687654, Log Avg loss: 0.09699845, Global Avg Loss: 0.12811532, Time: 0.0583 Steps: 44200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003760, Sample Num: 60160, Cur Loss: 0.02949356, Cur Avg Loss: 0.09783869, Log Avg loss: 0.11496497, Global Avg Loss: 0.12805609, Time: 0.1871 Steps: 44400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003960, Sample Num: 63360, Cur Loss: 0.05044907, Cur Avg Loss: 0.09735195, Log Avg loss: 0.08820127, Global Avg Loss: 0.12787737, Time: 0.0349 Steps: 44600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00220874, Cur Avg Loss: 0.09751798, Log Avg loss: 0.10080531, Global Avg Loss: 0.12775651, Time: 0.0513 Steps: 44800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004360, Sample Num: 69760, Cur Loss: 0.03002650, Cur Avg Loss: 0.09761303, Log Avg loss: 0.09959008, Global Avg Loss: 0.12763132, Time: 0.3144 Steps: 45000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004560, Sample Num: 72960, Cur Loss: 0.06976607, Cur Avg Loss: 0.09760953, Log Avg loss: 0.09753333, Global Avg Loss: 0.12749815, Time: 0.0833 Steps: 45200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004760, Sample Num: 76160, Cur Loss: 0.00253864, Cur Avg Loss: 0.09758705, Log Avg loss: 0.09707450, Global Avg Loss: 0.12736412, Time: 0.0974 Steps: 45400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004960, Sample Num: 79360, Cur Loss: 0.11173484, Cur Avg Loss: 0.09764350, Log Avg loss: 0.09898686, Global Avg Loss: 0.12723966, Time: 0.0867 Steps: 45600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 005160, Sample Num: 82560, Cur Loss: 0.01570157, Cur Avg Loss: 0.09792665, Log Avg loss: 0.10494890, Global Avg Loss: 0.12714232, Time: 0.0968 Steps: 45800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 005360, Sample Num: 85760, Cur Loss: 0.01808508, Cur Avg Loss: 0.09803094, Log Avg loss: 0.10072148, Global Avg Loss: 0.12702745, Time: 0.1868 Steps: 46000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00043463, Cur Avg Loss: 0.09728172, Log Avg loss: 0.07720267, Global Avg Loss: 0.12681176, Time: 0.1025 Steps: 46200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005760, Sample Num: 92160, Cur Loss: 0.03184886, Cur Avg Loss: 0.09767714, Log Avg loss: 0.10866976, Global Avg Loss: 0.12673356, Time: 0.1867 Steps: 46400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00216847, Cur Avg Loss: 0.09771625, Log Avg loss: 0.09884273, Global Avg Loss: 0.12661385, Time: 0.1878 Steps: 46600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006160, Sample Num: 98560, Cur Loss: 0.05335069, Cur Avg Loss: 0.09789099, Log Avg loss: 0.10309811, Global Avg Loss: 0.12651336, Time: 0.1868 Steps: 46800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006360, Sample Num: 101760, Cur Loss: 0.07053708, Cur Avg Loss: 0.09826065, Log Avg loss: 0.10964627, Global Avg Loss: 0.12644159, Time: 0.2528 Steps: 47000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006560, Sample Num: 104960, Cur Loss: 0.00072058, Cur Avg Loss: 0.09821173, Log Avg loss: 0.09665607, Global Avg Loss: 0.12631538, Time: 0.1846 Steps: 47200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006760, Sample Num: 108160, Cur Loss: 0.49015072, Cur Avg Loss: 0.09797033, Log Avg loss: 0.09005242, Global Avg Loss: 0.12616237, Time: 0.0630 Steps: 47400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006960, Sample Num: 111360, Cur Loss: 0.20930740, Cur Avg Loss: 0.09774520, Log Avg loss: 0.09013576, Global Avg Loss: 0.12601099, Time: 0.0671 Steps: 47600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 007160, Sample Num: 114560, Cur Loss: 0.47604495, Cur Avg Loss: 0.09783012, Log Avg loss: 0.10078561, Global Avg Loss: 0.12590545, Time: 0.0419 Steps: 47800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 007360, Sample Num: 117760, Cur Loss: 0.00193704, Cur Avg Loss: 0.09801249, Log Avg loss: 0.10454121, Global Avg Loss: 0.12581643, Time: 0.3145 Steps: 48000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007560, Sample Num: 120960, Cur Loss: 0.01894837, Cur Avg Loss: 0.09755906, Log Avg loss: 0.08087269, Global Avg Loss: 0.12562994, Time: 0.1886 Steps: 48200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00507991, Cur Avg Loss: 0.09769023, Log Avg loss: 0.10264841, Global Avg Loss: 0.12553498, Time: 0.1862 Steps: 48400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007960, Sample Num: 127360, Cur Loss: 0.22505835, Cur Avg Loss: 0.09771644, Log Avg loss: 0.09873369, Global Avg Loss: 0.12542468, Time: 0.2184 Steps: 48600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00345233, Cur Avg Loss: 0.09797770, Log Avg loss: 0.10837566, Global Avg Loss: 0.12535481, Time: 0.0972 Steps: 48800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00229347, Cur Avg Loss: 0.09782740, Log Avg loss: 0.09169518, Global Avg Loss: 0.12521743, Time: 0.0304 Steps: 49000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008560, Sample Num: 136960, Cur Loss: 0.00078682, Cur Avg Loss: 0.09795658, Log Avg loss: 0.10335631, Global Avg Loss: 0.12512856, Time: 0.0803 Steps: 49200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008760, Sample Num: 140160, Cur Loss: 0.17723961, Cur Avg Loss: 0.09794550, Log Avg loss: 0.09747143, Global Avg Loss: 0.12501659, Time: 0.0676 Steps: 49400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008960, Sample Num: 143360, Cur Loss: 0.02287053, Cur Avg Loss: 0.09763820, Log Avg loss: 0.08417827, Global Avg Loss: 0.12485192, Time: 0.0497 Steps: 49600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 009160, Sample Num: 146560, Cur Loss: 0.00582559, Cur Avg Loss: 0.09780546, Log Avg loss: 0.10529858, Global Avg Loss: 0.12477339, Time: 0.2348 Steps: 49800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00044945, Cur Avg Loss: 0.09740878, Log Avg loss: 0.07924115, Global Avg Loss: 0.12459126, Time: 0.0926 Steps: 50000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009560, Sample Num: 152960, Cur Loss: 0.00227816, Cur Avg Loss: 0.09762140, Log Avg loss: 0.10757208, Global Avg Loss: 0.12452345, Time: 0.0266 Steps: 50200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009760, Sample Num: 156160, Cur Loss: 0.00877023, Cur Avg Loss: 0.09748752, Log Avg loss: 0.09108773, Global Avg Loss: 0.12439077, Time: 0.0872 Steps: 50400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009960, Sample Num: 159360, Cur Loss: 0.00183620, Cur Avg Loss: 0.09751574, Log Avg loss: 0.09889313, Global Avg Loss: 0.12428999, Time: 0.0994 Steps: 50600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010160, Sample Num: 162560, Cur Loss: 0.04874129, Cur Avg Loss: 0.09732167, Log Avg loss: 0.08765666, Global Avg Loss: 0.12414577, Time: 0.0419 Steps: 50800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00154729, Cur Avg Loss: 0.09708568, Log Avg loss: 0.08509750, Global Avg Loss: 0.12399264, Time: 0.0552 Steps: 51000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010560, Sample Num: 168960, Cur Loss: 0.11319283, Cur Avg Loss: 0.09714402, Log Avg loss: 0.10016619, Global Avg Loss: 0.12389956, Time: 0.0433 Steps: 51200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010760, Sample Num: 172160, Cur Loss: 0.08463704, Cur Avg Loss: 0.09704224, Log Avg loss: 0.09166820, Global Avg Loss: 0.12377415, Time: 0.1904 Steps: 51400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010960, Sample Num: 175360, Cur Loss: 0.05854387, Cur Avg Loss: 0.09683039, Log Avg loss: 0.08543264, Global Avg Loss: 0.12362554, Time: 0.0545 Steps: 51600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00711319, Cur Avg Loss: 0.09672246, Log Avg loss: 0.09080802, Global Avg Loss: 0.12349883, Time: 0.2044 Steps: 51800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 011360, Sample Num: 181760, Cur Loss: 0.02466363, Cur Avg Loss: 0.09686805, Log Avg loss: 0.10499191, Global Avg Loss: 0.12342765, Time: 0.0351 Steps: 52000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00352705, Cur Avg Loss: 0.09667805, Log Avg loss: 0.08588640, Global Avg Loss: 0.12328381, Time: 0.3106 Steps: 52200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00085900, Cur Avg Loss: 0.09637848, Log Avg loss: 0.07906292, Global Avg Loss: 0.12311503, Time: 0.3108 Steps: 52400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011960, Sample Num: 191360, Cur Loss: 0.00448048, Cur Avg Loss: 0.09636410, Log Avg loss: 0.09551893, Global Avg Loss: 0.12301010, Time: 0.3102 Steps: 52600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012160, Sample Num: 194560, Cur Loss: 0.14254390, Cur Avg Loss: 0.09608213, Log Avg loss: 0.07922019, Global Avg Loss: 0.12284423, Time: 0.0614 Steps: 52800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012360, Sample Num: 197760, Cur Loss: 0.03301695, Cur Avg Loss: 0.09587038, Log Avg loss: 0.08299606, Global Avg Loss: 0.12269386, Time: 0.0867 Steps: 53000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012560, Sample Num: 200960, Cur Loss: 0.15682520, Cur Avg Loss: 0.09575802, Log Avg loss: 0.08881403, Global Avg Loss: 0.12256649, Time: 0.2979 Steps: 53200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012760, Sample Num: 204160, Cur Loss: 0.01646149, Cur Avg Loss: 0.09563736, Log Avg loss: 0.08805961, Global Avg Loss: 0.12243726, Time: 0.0794 Steps: 53400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012960, Sample Num: 207360, Cur Loss: 0.00275763, Cur Avg Loss: 0.09552559, Log Avg loss: 0.08839492, Global Avg Loss: 0.12231023, Time: 0.0762 Steps: 53600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 013160, Sample Num: 210560, Cur Loss: 0.06935944, Cur Avg Loss: 0.09560112, Log Avg loss: 0.10049574, Global Avg Loss: 0.12222914, Time: 0.0601 Steps: 53800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 013360, Sample Num: 213760, Cur Loss: 0.01211385, Cur Avg Loss: 0.09575191, Log Avg loss: 0.10567347, Global Avg Loss: 0.12216782, Time: 0.1651 Steps: 54000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013560, Sample Num: 216960, Cur Loss: 0.00156499, Cur Avg Loss: 0.09562818, Log Avg loss: 0.08736323, Global Avg Loss: 0.12203939, Time: 0.3138 Steps: 54200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013760, Sample Num: 220160, Cur Loss: 0.00359468, Cur Avg Loss: 0.09565488, Log Avg loss: 0.09746532, Global Avg Loss: 0.12194904, Time: 0.1882 Steps: 54400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013960, Sample Num: 223360, Cur Loss: 0.02623656, Cur Avg Loss: 0.09542185, Log Avg loss: 0.07938894, Global Avg Loss: 0.12179315, Time: 0.1681 Steps: 54600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014160, Sample Num: 226560, Cur Loss: 0.12716252, Cur Avg Loss: 0.09544530, Log Avg loss: 0.09708245, Global Avg Loss: 0.12170296, Time: 0.1876 Steps: 54800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014360, Sample Num: 229760, Cur Loss: 0.05576231, Cur Avg Loss: 0.09531123, Log Avg loss: 0.08581908, Global Avg Loss: 0.12157247, Time: 0.1065 Steps: 55000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014560, Sample Num: 232960, Cur Loss: 0.00553550, Cur Avg Loss: 0.09518865, Log Avg loss: 0.08638754, Global Avg Loss: 0.12144499, Time: 0.0871 Steps: 55200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014760, Sample Num: 236160, Cur Loss: 0.46832922, Cur Avg Loss: 0.09510125, Log Avg loss: 0.08873794, Global Avg Loss: 0.12132692, Time: 0.0416 Steps: 55400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014960, Sample Num: 239360, Cur Loss: 0.00148135, Cur Avg Loss: 0.09505390, Log Avg loss: 0.09155962, Global Avg Loss: 0.12121984, Time: 0.2668 Steps: 55600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015160, Sample Num: 242560, Cur Loss: 0.00131956, Cur Avg Loss: 0.09498113, Log Avg loss: 0.08953813, Global Avg Loss: 0.12110629, Time: 0.1870 Steps: 55800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015360, Sample Num: 245760, Cur Loss: 0.00372774, Cur Avg Loss: 0.09525594, Log Avg loss: 0.11608616, Global Avg Loss: 0.12108836, Time: 0.3145 Steps: 56000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015560, Sample Num: 248960, Cur Loss: 0.00159383, Cur Avg Loss: 0.09517317, Log Avg loss: 0.08881699, Global Avg Loss: 0.12097351, Time: 0.1892 Steps: 56200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 015760, Sample Num: 252160, Cur Loss: 0.02774425, Cur Avg Loss: 0.09495876, Log Avg loss: 0.07827727, Global Avg Loss: 0.12082211, Time: 0.0372 Steps: 56400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 015960, Sample Num: 255360, Cur Loss: 0.00117086, Cur Avg Loss: 0.09506009, Log Avg loss: 0.10304486, Global Avg Loss: 0.12075929, Time: 0.0871 Steps: 56600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016160, Sample Num: 258560, Cur Loss: 0.00216903, Cur Avg Loss: 0.09500799, Log Avg loss: 0.09085061, Global Avg Loss: 0.12065398, Time: 0.0716 Steps: 56800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016360, Sample Num: 261760, Cur Loss: 0.05817460, Cur Avg Loss: 0.09494886, Log Avg loss: 0.09017107, Global Avg Loss: 0.12054702, Time: 0.2029 Steps: 57000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016560, Sample Num: 264960, Cur Loss: 0.00108196, Cur Avg Loss: 0.09484328, Log Avg loss: 0.08620682, Global Avg Loss: 0.12042695, Time: 0.0786 Steps: 57200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016760, Sample Num: 268160, Cur Loss: 0.00023920, Cur Avg Loss: 0.09478628, Log Avg loss: 0.09006708, Global Avg Loss: 0.12032117, Time: 0.2985 Steps: 57400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016960, Sample Num: 271360, Cur Loss: 0.00337409, Cur Avg Loss: 0.09486832, Log Avg loss: 0.10174284, Global Avg Loss: 0.12025666, Time: 0.0883 Steps: 57600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017160, Sample Num: 274560, Cur Loss: 0.00086957, Cur Avg Loss: 0.09494200, Log Avg loss: 0.10119063, Global Avg Loss: 0.12019068, Time: 0.1865 Steps: 57800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017360, Sample Num: 277760, Cur Loss: 0.00235913, Cur Avg Loss: 0.09483496, Log Avg loss: 0.08565068, Global Avg Loss: 0.12007158, Time: 0.0305 Steps: 58000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017560, Sample Num: 280960, Cur Loss: 0.02221940, Cur Avg Loss: 0.09485500, Log Avg loss: 0.09659458, Global Avg Loss: 0.11999090, Time: 0.3623 Steps: 58200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 017760, Sample Num: 284160, Cur Loss: 0.00042279, Cur Avg Loss: 0.09483805, Log Avg loss: 0.09334933, Global Avg Loss: 0.11989967, Time: 0.0517 Steps: 58400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 017960, Sample Num: 287360, Cur Loss: 0.02385895, Cur Avg Loss: 0.09474355, Log Avg loss: 0.08635206, Global Avg Loss: 0.11978517, Time: 0.0882 Steps: 58600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018160, Sample Num: 290560, Cur Loss: 0.53925663, Cur Avg Loss: 0.09464822, Log Avg loss: 0.08608789, Global Avg Loss: 0.11967055, Time: 0.2038 Steps: 58800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018360, Sample Num: 293760, Cur Loss: 0.65239155, Cur Avg Loss: 0.09470669, Log Avg loss: 0.10001571, Global Avg Loss: 0.11960393, Time: 0.0805 Steps: 59000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018560, Sample Num: 296960, Cur Loss: 0.00168490, Cur Avg Loss: 0.09475909, Log Avg loss: 0.09956943, Global Avg Loss: 0.11953624, Time: 0.1060 Steps: 59200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018760, Sample Num: 300160, Cur Loss: 0.01105108, Cur Avg Loss: 0.09478646, Log Avg loss: 0.09732640, Global Avg Loss: 0.11946146, Time: 0.1884 Steps: 59400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018960, Sample Num: 303360, Cur Loss: 0.00137423, Cur Avg Loss: 0.09466190, Log Avg loss: 0.08297841, Global Avg Loss: 0.11933903, Time: 0.0891 Steps: 59600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019160, Sample Num: 306560, Cur Loss: 0.07675923, Cur Avg Loss: 0.09470569, Log Avg loss: 0.09885706, Global Avg Loss: 0.11927053, Time: 0.1813 Steps: 59800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019360, Sample Num: 309760, Cur Loss: 0.00263472, Cur Avg Loss: 0.09476940, Log Avg loss: 0.10087214, Global Avg Loss: 0.11920921, Time: 0.0867 Steps: 60000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019560, Sample Num: 312960, Cur Loss: 0.00055248, Cur Avg Loss: 0.09464113, Log Avg loss: 0.08222520, Global Avg Loss: 0.11908633, Time: 0.2567 Steps: 60200, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 019760, Sample Num: 316160, Cur Loss: 0.00148466, Cur Avg Loss: 0.09472532, Log Avg loss: 0.10295886, Global Avg Loss: 0.11903293, Time: 0.3047 Steps: 60400, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 019960, Sample Num: 319360, Cur Loss: 0.12307043, Cur Avg Loss: 0.09465221, Log Avg loss: 0.08742840, Global Avg Loss: 0.11892863, Time: 0.0560 Steps: 60600, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 020160, Sample Num: 322560, Cur Loss: 0.47031820, Cur Avg Loss: 0.09470504, Log Avg loss: 0.09997792, Global Avg Loss: 0.11886629, Time: 0.1882 Steps: 60800, Updated lr: 0.000070 ***** Running evaluation checkpoint-60960 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-60960 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2771.790784, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.109253, "eval_total_loss": 296.075812, "eval_acc": 0.967658, "eval_prec": 0.948288, "eval_recall": 0.989412, "eval_f1": 0.968413, "eval_roc_auc": 0.996569, "eval_pr_auc": 0.996072, "eval_confusion_matrix": {"tn": 20455, "fp": 1172, "fn": 230, "tp": 21492}, "eval_mcc2": 0.936194, "eval_mcc": 0.936194, "eval_sn": 0.989412, "eval_sp": 0.945808, "update_flag": false, "test_avg_loss": 0.109626, "test_total_loss": 445.521608, "test_acc": 0.968318, "test_prec": 0.948578, "test_recall": 0.990309, "test_f1": 0.968995, "test_roc_auc": 0.996548, "test_pr_auc": 0.995831, "test_confusion_matrix": {"tn": 30772, "fp": 1745, "fn": 315, "tp": 32190}, "test_mcc2": 0.937545, "test_mcc": 0.937545, "test_sn": 0.990309, "test_sp": 0.946336, "lr": 7.006896551724137e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.11882221260266712, "train_cur_epoch_loss": 1925.585298785998, "train_cur_epoch_avg_loss": 0.09476305604261802, "train_cur_epoch_time": 2771.7907836437225, "train_cur_epoch_avg_time": 0.1364070267541202, "epoch": 3, "step": 60960} ################################################## Training, Epoch: 0004, Batch: 000040, Sample Num: 640, Cur Loss: 0.00046553, Cur Avg Loss: 0.07053622, Log Avg loss: 0.09576570, Global Avg Loss: 0.11879055, Time: 0.1941 Steps: 61000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00643103, Cur Avg Loss: 0.07966891, Log Avg loss: 0.08149544, Global Avg Loss: 0.11866867, Time: 0.0593 Steps: 61200, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000440, Sample Num: 7040, Cur Loss: 0.01291939, Cur Avg Loss: 0.08693221, Log Avg loss: 0.09564818, Global Avg Loss: 0.11859368, Time: 0.0802 Steps: 61400, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000640, Sample Num: 10240, Cur Loss: 0.00574785, Cur Avg Loss: 0.09064202, Log Avg loss: 0.09880361, Global Avg Loss: 0.11852943, Time: 0.1065 Steps: 61600, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000840, Sample Num: 13440, Cur Loss: 0.29885995, Cur Avg Loss: 0.09209914, Log Avg loss: 0.09676190, Global Avg Loss: 0.11845899, Time: 0.0905 Steps: 61800, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00049966, Cur Avg Loss: 0.09091725, Log Avg loss: 0.08595334, Global Avg Loss: 0.11835413, Time: 0.0494 Steps: 62000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 001240, Sample Num: 19840, Cur Loss: 0.01542820, Cur Avg Loss: 0.08948562, Log Avg loss: 0.08204114, Global Avg Loss: 0.11823737, Time: 0.1881 Steps: 62200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00019783, Cur Avg Loss: 0.08842804, Log Avg loss: 0.08187102, Global Avg Loss: 0.11812081, Time: 0.0545 Steps: 62400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00709821, Cur Avg Loss: 0.08987118, Log Avg loss: 0.10026183, Global Avg Loss: 0.11806375, Time: 0.0309 Steps: 62600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001840, Sample Num: 29440, Cur Loss: 0.01834124, Cur Avg Loss: 0.09011739, Log Avg loss: 0.09213631, Global Avg Loss: 0.11798118, Time: 0.0725 Steps: 62800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00740540, Cur Avg Loss: 0.09012922, Log Avg loss: 0.09023804, Global Avg Loss: 0.11789311, Time: 0.0497 Steps: 63000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002240, Sample Num: 35840, Cur Loss: 0.02758168, Cur Avg Loss: 0.08971109, Log Avg loss: 0.08544611, Global Avg Loss: 0.11779043, Time: 0.0996 Steps: 63200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002440, Sample Num: 39040, Cur Loss: 0.16601518, Cur Avg Loss: 0.08934399, Log Avg loss: 0.08523254, Global Avg Loss: 0.11768772, Time: 0.2349 Steps: 63400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002640, Sample Num: 42240, Cur Loss: 0.34352374, Cur Avg Loss: 0.09020343, Log Avg loss: 0.10068863, Global Avg Loss: 0.11763426, Time: 0.0400 Steps: 63600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00140463, Cur Avg Loss: 0.08987356, Log Avg loss: 0.08551916, Global Avg Loss: 0.11753359, Time: 0.0886 Steps: 63800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00292479, Cur Avg Loss: 0.09004703, Log Avg loss: 0.09251032, Global Avg Loss: 0.11745539, Time: 0.0513 Steps: 64000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 003240, Sample Num: 51840, Cur Loss: 0.14370026, Cur Avg Loss: 0.08911433, Log Avg loss: 0.07493731, Global Avg Loss: 0.11732294, Time: 0.0892 Steps: 64200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003440, Sample Num: 55040, Cur Loss: 0.24572220, Cur Avg Loss: 0.08944036, Log Avg loss: 0.09472206, Global Avg Loss: 0.11725275, Time: 0.1878 Steps: 64400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003640, Sample Num: 58240, Cur Loss: 0.00120600, Cur Avg Loss: 0.08965455, Log Avg loss: 0.09333855, Global Avg Loss: 0.11717871, Time: 0.0863 Steps: 64600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003840, Sample Num: 61440, Cur Loss: 0.03485980, Cur Avg Loss: 0.08983876, Log Avg loss: 0.09319137, Global Avg Loss: 0.11710467, Time: 0.1869 Steps: 64800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00538957, Cur Avg Loss: 0.08973653, Log Avg loss: 0.08777391, Global Avg Loss: 0.11701443, Time: 0.0434 Steps: 65000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004240, Sample Num: 67840, Cur Loss: 0.01144624, Cur Avg Loss: 0.09004493, Log Avg loss: 0.09627455, Global Avg Loss: 0.11695081, Time: 0.0950 Steps: 65200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004440, Sample Num: 71040, Cur Loss: 0.03352278, Cur Avg Loss: 0.09045327, Log Avg loss: 0.09911012, Global Avg Loss: 0.11689625, Time: 0.1885 Steps: 65400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00457599, Cur Avg Loss: 0.09112329, Log Avg loss: 0.10599775, Global Avg Loss: 0.11686302, Time: 0.0554 Steps: 65600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00460178, Cur Avg Loss: 0.09114955, Log Avg loss: 0.09175876, Global Avg Loss: 0.11678672, Time: 0.2377 Steps: 65800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 005040, Sample Num: 80640, Cur Loss: 0.01454014, Cur Avg Loss: 0.09165242, Log Avg loss: 0.10382193, Global Avg Loss: 0.11674743, Time: 0.0901 Steps: 66000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 005240, Sample Num: 83840, Cur Loss: 0.35604548, Cur Avg Loss: 0.09214916, Log Avg loss: 0.10466700, Global Avg Loss: 0.11671093, Time: 0.2045 Steps: 66200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005440, Sample Num: 87040, Cur Loss: 0.23209891, Cur Avg Loss: 0.09199347, Log Avg loss: 0.08791419, Global Avg Loss: 0.11662419, Time: 0.0971 Steps: 66400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005640, Sample Num: 90240, Cur Loss: 0.00187539, Cur Avg Loss: 0.09146433, Log Avg loss: 0.07707174, Global Avg Loss: 0.11650542, Time: 0.3103 Steps: 66600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005840, Sample Num: 93440, Cur Loss: 0.00398334, Cur Avg Loss: 0.09176733, Log Avg loss: 0.10031209, Global Avg Loss: 0.11645694, Time: 0.0675 Steps: 66800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006040, Sample Num: 96640, Cur Loss: 0.00284765, Cur Avg Loss: 0.09192376, Log Avg loss: 0.09649130, Global Avg Loss: 0.11639734, Time: 0.0567 Steps: 67000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006240, Sample Num: 99840, Cur Loss: 0.00282143, Cur Avg Loss: 0.09217541, Log Avg loss: 0.09977539, Global Avg Loss: 0.11634787, Time: 0.1007 Steps: 67200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006440, Sample Num: 103040, Cur Loss: 0.51753467, Cur Avg Loss: 0.09244839, Log Avg loss: 0.10096545, Global Avg Loss: 0.11630222, Time: 0.0860 Steps: 67400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006640, Sample Num: 106240, Cur Loss: 0.00128535, Cur Avg Loss: 0.09183437, Log Avg loss: 0.07206297, Global Avg Loss: 0.11617134, Time: 0.1877 Steps: 67600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006840, Sample Num: 109440, Cur Loss: 0.00254920, Cur Avg Loss: 0.09201689, Log Avg loss: 0.09807632, Global Avg Loss: 0.11611796, Time: 0.0365 Steps: 67800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007040, Sample Num: 112640, Cur Loss: 0.00213965, Cur Avg Loss: 0.09166981, Log Avg loss: 0.07979958, Global Avg Loss: 0.11601114, Time: 0.1882 Steps: 68000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007240, Sample Num: 115840, Cur Loss: 0.22896124, Cur Avg Loss: 0.09177809, Log Avg loss: 0.09558960, Global Avg Loss: 0.11595125, Time: 0.3144 Steps: 68200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007440, Sample Num: 119040, Cur Loss: 0.44689310, Cur Avg Loss: 0.09188929, Log Avg loss: 0.09591489, Global Avg Loss: 0.11589267, Time: 0.0554 Steps: 68400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 007640, Sample Num: 122240, Cur Loss: 0.02490378, Cur Avg Loss: 0.09137186, Log Avg loss: 0.07212325, Global Avg Loss: 0.11576506, Time: 0.3036 Steps: 68600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 007840, Sample Num: 125440, Cur Loss: 0.17404242, Cur Avg Loss: 0.09168569, Log Avg loss: 0.10367405, Global Avg Loss: 0.11572991, Time: 0.1879 Steps: 68800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008040, Sample Num: 128640, Cur Loss: 0.00049284, Cur Avg Loss: 0.09142779, Log Avg loss: 0.08131832, Global Avg Loss: 0.11563017, Time: 0.0551 Steps: 69000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008240, Sample Num: 131840, Cur Loss: 0.00485386, Cur Avg Loss: 0.09137800, Log Avg loss: 0.08937639, Global Avg Loss: 0.11555429, Time: 0.3025 Steps: 69200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008440, Sample Num: 135040, Cur Loss: 0.43490398, Cur Avg Loss: 0.09163113, Log Avg loss: 0.10206002, Global Avg Loss: 0.11551540, Time: 0.1883 Steps: 69400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008640, Sample Num: 138240, Cur Loss: 0.03831734, Cur Avg Loss: 0.09173169, Log Avg loss: 0.09597523, Global Avg Loss: 0.11545925, Time: 0.0885 Steps: 69600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008840, Sample Num: 141440, Cur Loss: 0.00041067, Cur Avg Loss: 0.09217821, Log Avg loss: 0.11146806, Global Avg Loss: 0.11544781, Time: 0.0832 Steps: 69800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009040, Sample Num: 144640, Cur Loss: 0.61325002, Cur Avg Loss: 0.09191417, Log Avg loss: 0.08024366, Global Avg Loss: 0.11534723, Time: 0.0568 Steps: 70000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009240, Sample Num: 147840, Cur Loss: 0.05891429, Cur Avg Loss: 0.09212840, Log Avg loss: 0.10181144, Global Avg Loss: 0.11530867, Time: 0.0951 Steps: 70200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009440, Sample Num: 151040, Cur Loss: 0.01404910, Cur Avg Loss: 0.09222581, Log Avg loss: 0.09672631, Global Avg Loss: 0.11525588, Time: 0.0541 Steps: 70400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 009640, Sample Num: 154240, Cur Loss: 0.04749625, Cur Avg Loss: 0.09241899, Log Avg loss: 0.10153702, Global Avg Loss: 0.11521701, Time: 0.1860 Steps: 70600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 009840, Sample Num: 157440, Cur Loss: 0.04683495, Cur Avg Loss: 0.09251920, Log Avg loss: 0.09734914, Global Avg Loss: 0.11516654, Time: 0.3144 Steps: 70800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010040, Sample Num: 160640, Cur Loss: 0.01550608, Cur Avg Loss: 0.09225727, Log Avg loss: 0.07937027, Global Avg Loss: 0.11506570, Time: 0.0714 Steps: 71000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010240, Sample Num: 163840, Cur Loss: 0.32718712, Cur Avg Loss: 0.09212537, Log Avg loss: 0.08550398, Global Avg Loss: 0.11498267, Time: 0.3139 Steps: 71200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010440, Sample Num: 167040, Cur Loss: 0.00190626, Cur Avg Loss: 0.09195416, Log Avg loss: 0.08318821, Global Avg Loss: 0.11489361, Time: 0.3089 Steps: 71400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010640, Sample Num: 170240, Cur Loss: 0.01098448, Cur Avg Loss: 0.09200907, Log Avg loss: 0.09487536, Global Avg Loss: 0.11483769, Time: 0.3425 Steps: 71600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010840, Sample Num: 173440, Cur Loss: 0.00078899, Cur Avg Loss: 0.09143906, Log Avg loss: 0.06111473, Global Avg Loss: 0.11468804, Time: 0.1877 Steps: 71800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011040, Sample Num: 176640, Cur Loss: 0.65686166, Cur Avg Loss: 0.09128187, Log Avg loss: 0.08276237, Global Avg Loss: 0.11459936, Time: 0.0587 Steps: 72000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011240, Sample Num: 179840, Cur Loss: 0.34536165, Cur Avg Loss: 0.09127443, Log Avg loss: 0.09086335, Global Avg Loss: 0.11453361, Time: 0.1885 Steps: 72200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011440, Sample Num: 183040, Cur Loss: 0.00068613, Cur Avg Loss: 0.09115405, Log Avg loss: 0.08438888, Global Avg Loss: 0.11445034, Time: 0.0674 Steps: 72400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 011640, Sample Num: 186240, Cur Loss: 0.00058715, Cur Avg Loss: 0.09095907, Log Avg loss: 0.07980609, Global Avg Loss: 0.11435490, Time: 0.1984 Steps: 72600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 011840, Sample Num: 189440, Cur Loss: 0.00200333, Cur Avg Loss: 0.09094685, Log Avg loss: 0.09023546, Global Avg Loss: 0.11428864, Time: 0.0944 Steps: 72800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012040, Sample Num: 192640, Cur Loss: 0.00501489, Cur Avg Loss: 0.09069646, Log Avg loss: 0.07587358, Global Avg Loss: 0.11418339, Time: 0.1881 Steps: 73000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012240, Sample Num: 195840, Cur Loss: 0.22905058, Cur Avg Loss: 0.09038620, Log Avg loss: 0.07170837, Global Avg Loss: 0.11406734, Time: 0.0848 Steps: 73200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012440, Sample Num: 199040, Cur Loss: 0.08249930, Cur Avg Loss: 0.09022989, Log Avg loss: 0.08066394, Global Avg Loss: 0.11397632, Time: 0.0506 Steps: 73400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012640, Sample Num: 202240, Cur Loss: 0.26160499, Cur Avg Loss: 0.09007112, Log Avg loss: 0.08019555, Global Avg Loss: 0.11388452, Time: 0.1889 Steps: 73600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012840, Sample Num: 205440, Cur Loss: 0.30091605, Cur Avg Loss: 0.08987252, Log Avg loss: 0.07732099, Global Avg Loss: 0.11378544, Time: 0.0435 Steps: 73800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013040, Sample Num: 208640, Cur Loss: 0.00377642, Cur Avg Loss: 0.08997970, Log Avg loss: 0.09686091, Global Avg Loss: 0.11373969, Time: 0.2039 Steps: 74000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013240, Sample Num: 211840, Cur Loss: 0.25545260, Cur Avg Loss: 0.09015860, Log Avg loss: 0.10182257, Global Avg Loss: 0.11370757, Time: 0.1188 Steps: 74200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013440, Sample Num: 215040, Cur Loss: 0.04766457, Cur Avg Loss: 0.09023209, Log Avg loss: 0.09509756, Global Avg Loss: 0.11365755, Time: 0.0846 Steps: 74400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 013640, Sample Num: 218240, Cur Loss: 0.13629803, Cur Avg Loss: 0.09010933, Log Avg loss: 0.08185956, Global Avg Loss: 0.11357230, Time: 0.0688 Steps: 74600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 013840, Sample Num: 221440, Cur Loss: 0.00075380, Cur Avg Loss: 0.09015232, Log Avg loss: 0.09308427, Global Avg Loss: 0.11351752, Time: 0.1260 Steps: 74800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014040, Sample Num: 224640, Cur Loss: 0.67330664, Cur Avg Loss: 0.09018331, Log Avg loss: 0.09232816, Global Avg Loss: 0.11346101, Time: 0.0869 Steps: 75000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014240, Sample Num: 227840, Cur Loss: 0.00269150, Cur Avg Loss: 0.09023804, Log Avg loss: 0.09407960, Global Avg Loss: 0.11340946, Time: 0.0539 Steps: 75200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014440, Sample Num: 231040, Cur Loss: 0.01150474, Cur Avg Loss: 0.09031274, Log Avg loss: 0.09563156, Global Avg Loss: 0.11336231, Time: 0.0853 Steps: 75400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014640, Sample Num: 234240, Cur Loss: 0.14090656, Cur Avg Loss: 0.09029510, Log Avg loss: 0.08902173, Global Avg Loss: 0.11329792, Time: 0.1820 Steps: 75600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014840, Sample Num: 237440, Cur Loss: 0.00109805, Cur Avg Loss: 0.09016663, Log Avg loss: 0.08076257, Global Avg Loss: 0.11321207, Time: 0.0982 Steps: 75800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015040, Sample Num: 240640, Cur Loss: 0.09295314, Cur Avg Loss: 0.09007689, Log Avg loss: 0.08341809, Global Avg Loss: 0.11313366, Time: 0.3073 Steps: 76000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015240, Sample Num: 243840, Cur Loss: 0.01933503, Cur Avg Loss: 0.09016790, Log Avg loss: 0.09701193, Global Avg Loss: 0.11309135, Time: 0.1021 Steps: 76200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015440, Sample Num: 247040, Cur Loss: 0.00535446, Cur Avg Loss: 0.09019525, Log Avg loss: 0.09227876, Global Avg Loss: 0.11303687, Time: 0.1548 Steps: 76400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 015640, Sample Num: 250240, Cur Loss: 0.00144695, Cur Avg Loss: 0.08994253, Log Avg loss: 0.07043265, Global Avg Loss: 0.11292563, Time: 0.0720 Steps: 76600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 015840, Sample Num: 253440, Cur Loss: 0.00015272, Cur Avg Loss: 0.08986655, Log Avg loss: 0.08392506, Global Avg Loss: 0.11285011, Time: 0.0991 Steps: 76800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016040, Sample Num: 256640, Cur Loss: 0.00039650, Cur Avg Loss: 0.08984895, Log Avg loss: 0.08845491, Global Avg Loss: 0.11278674, Time: 0.0524 Steps: 77000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016240, Sample Num: 259840, Cur Loss: 0.00059364, Cur Avg Loss: 0.08983101, Log Avg loss: 0.08839281, Global Avg Loss: 0.11272355, Time: 0.0670 Steps: 77200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016440, Sample Num: 263040, Cur Loss: 0.00056595, Cur Avg Loss: 0.08980555, Log Avg loss: 0.08773787, Global Avg Loss: 0.11265898, Time: 0.0867 Steps: 77400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016640, Sample Num: 266240, Cur Loss: 0.00030356, Cur Avg Loss: 0.08972236, Log Avg loss: 0.08288419, Global Avg Loss: 0.11258224, Time: 0.0507 Steps: 77600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016840, Sample Num: 269440, Cur Loss: 0.00346189, Cur Avg Loss: 0.08961266, Log Avg loss: 0.08048517, Global Avg Loss: 0.11249973, Time: 0.1360 Steps: 77800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017040, Sample Num: 272640, Cur Loss: 0.00034490, Cur Avg Loss: 0.08955498, Log Avg loss: 0.08469898, Global Avg Loss: 0.11242845, Time: 0.0541 Steps: 78000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017240, Sample Num: 275840, Cur Loss: 0.00076953, Cur Avg Loss: 0.08969608, Log Avg loss: 0.10171760, Global Avg Loss: 0.11240106, Time: 0.1867 Steps: 78200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017440, Sample Num: 279040, Cur Loss: 0.03865220, Cur Avg Loss: 0.08976426, Log Avg loss: 0.09564134, Global Avg Loss: 0.11235830, Time: 0.0452 Steps: 78400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 017640, Sample Num: 282240, Cur Loss: 0.33010852, Cur Avg Loss: 0.08978146, Log Avg loss: 0.09128112, Global Avg Loss: 0.11230467, Time: 0.0615 Steps: 78600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 017840, Sample Num: 285440, Cur Loss: 0.00709100, Cur Avg Loss: 0.08957859, Log Avg loss: 0.07168547, Global Avg Loss: 0.11220157, Time: 0.1874 Steps: 78800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018040, Sample Num: 288640, Cur Loss: 0.00207933, Cur Avg Loss: 0.08965216, Log Avg loss: 0.09621440, Global Avg Loss: 0.11216110, Time: 0.0567 Steps: 79000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018240, Sample Num: 291840, Cur Loss: 0.00102854, Cur Avg Loss: 0.08957468, Log Avg loss: 0.08258645, Global Avg Loss: 0.11208642, Time: 0.0541 Steps: 79200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018440, Sample Num: 295040, Cur Loss: 0.35476413, Cur Avg Loss: 0.08962483, Log Avg loss: 0.09419861, Global Avg Loss: 0.11204136, Time: 0.3258 Steps: 79400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018640, Sample Num: 298240, Cur Loss: 0.27590999, Cur Avg Loss: 0.08969096, Log Avg loss: 0.09578745, Global Avg Loss: 0.11200052, Time: 0.3220 Steps: 79600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018840, Sample Num: 301440, Cur Loss: 0.00730941, Cur Avg Loss: 0.08962243, Log Avg loss: 0.08323570, Global Avg Loss: 0.11192843, Time: 0.0793 Steps: 79800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019040, Sample Num: 304640, Cur Loss: 0.00945600, Cur Avg Loss: 0.08963177, Log Avg loss: 0.09051129, Global Avg Loss: 0.11187489, Time: 0.0664 Steps: 80000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019240, Sample Num: 307840, Cur Loss: 0.01156862, Cur Avg Loss: 0.08970700, Log Avg loss: 0.09686952, Global Avg Loss: 0.11183747, Time: 0.1655 Steps: 80200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019440, Sample Num: 311040, Cur Loss: 0.00044202, Cur Avg Loss: 0.08977465, Log Avg loss: 0.09628197, Global Avg Loss: 0.11179877, Time: 0.3138 Steps: 80400, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 019640, Sample Num: 314240, Cur Loss: 0.02046733, Cur Avg Loss: 0.08968218, Log Avg loss: 0.08069471, Global Avg Loss: 0.11172159, Time: 0.0533 Steps: 80600, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 019840, Sample Num: 317440, Cur Loss: 0.00118043, Cur Avg Loss: 0.08954429, Log Avg loss: 0.07600376, Global Avg Loss: 0.11163318, Time: 0.0530 Steps: 80800, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 020040, Sample Num: 320640, Cur Loss: 0.00039282, Cur Avg Loss: 0.08947525, Log Avg loss: 0.08262649, Global Avg Loss: 0.11156156, Time: 0.0357 Steps: 81000, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 020240, Sample Num: 323840, Cur Loss: 0.07689531, Cur Avg Loss: 0.08939862, Log Avg loss: 0.08172031, Global Avg Loss: 0.11148806, Time: 0.3210 Steps: 81200, Updated lr: 0.000060 ***** Running evaluation checkpoint-81280 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-81280 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2763.652492, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.082534, "eval_total_loss": 223.665825, "eval_acc": 0.975893, "eval_prec": 0.967235, "eval_recall": 0.985268, "eval_f1": 0.976168, "eval_roc_auc": 0.996815, "eval_pr_auc": 0.996693, "eval_confusion_matrix": {"tn": 20902, "fp": 725, "fn": 320, "tp": 21402}, "eval_mcc2": 0.951951, "eval_mcc": 0.951951, "eval_sn": 0.985268, "eval_sp": 0.966477, "update_flag": true, "test_avg_loss": 0.083796, "test_total_loss": 340.545228, "test_acc": 0.975824, "test_prec": 0.966744, "test_recall": 0.985541, "test_f1": 0.976052, "test_roc_auc": 0.99683, "test_pr_auc": 0.996249, "test_confusion_matrix": {"tn": 31415, "fp": 1102, "fn": 470, "tp": 32035}, "test_mcc2": 0.951827, "test_mcc": 0.951827, "test_sn": 0.985541, "test_sp": 0.96611, "lr": 6.005911330049261e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.11147451843293124, "train_cur_epoch_loss": 1817.2467779700637, "train_cur_epoch_avg_loss": 0.0894314359237236, "train_cur_epoch_time": 2763.652492284775, "train_cur_epoch_avg_time": 0.13600652028960505, "epoch": 4, "step": 81280} ################################################## Training, Epoch: 0005, Batch: 000120, Sample Num: 1920, Cur Loss: 0.25411293, Cur Avg Loss: 0.06972937, Log Avg loss: 0.08093071, Global Avg Loss: 0.11141298, Time: 0.3211 Steps: 81400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000320, Sample Num: 5120, Cur Loss: 0.00180547, Cur Avg Loss: 0.08301193, Log Avg loss: 0.09098147, Global Avg Loss: 0.11136290, Time: 0.2980 Steps: 81600, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000520, Sample Num: 8320, Cur Loss: 0.37437657, Cur Avg Loss: 0.08657406, Log Avg loss: 0.09227346, Global Avg Loss: 0.11131623, Time: 0.4248 Steps: 81800, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000720, Sample Num: 11520, Cur Loss: 0.51410562, Cur Avg Loss: 0.08562431, Log Avg loss: 0.08315498, Global Avg Loss: 0.11124754, Time: 0.0413 Steps: 82000, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000920, Sample Num: 14720, Cur Loss: 0.00361286, Cur Avg Loss: 0.08448262, Log Avg loss: 0.08037250, Global Avg Loss: 0.11117242, Time: 0.1842 Steps: 82200, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 001120, Sample Num: 17920, Cur Loss: 0.61278492, Cur Avg Loss: 0.08263520, Log Avg loss: 0.07413711, Global Avg Loss: 0.11108253, Time: 0.0412 Steps: 82400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 001320, Sample Num: 21120, Cur Loss: 0.04492354, Cur Avg Loss: 0.08312510, Log Avg loss: 0.08586851, Global Avg Loss: 0.11102148, Time: 0.0391 Steps: 82600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00351499, Cur Avg Loss: 0.08261718, Log Avg loss: 0.07926490, Global Avg Loss: 0.11094477, Time: 0.1864 Steps: 82800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001720, Sample Num: 27520, Cur Loss: 0.00104581, Cur Avg Loss: 0.08463896, Log Avg loss: 0.10000452, Global Avg Loss: 0.11091841, Time: 0.1868 Steps: 83000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00765396, Cur Avg Loss: 0.08402107, Log Avg loss: 0.07870718, Global Avg Loss: 0.11084098, Time: 0.0346 Steps: 83200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002120, Sample Num: 33920, Cur Loss: 0.03783432, Cur Avg Loss: 0.08525771, Log Avg loss: 0.09712944, Global Avg Loss: 0.11080810, Time: 0.0563 Steps: 83400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00104536, Cur Avg Loss: 0.08475343, Log Avg loss: 0.07940807, Global Avg Loss: 0.11073298, Time: 0.2728 Steps: 83600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002520, Sample Num: 40320, Cur Loss: 0.47830978, Cur Avg Loss: 0.08628344, Log Avg loss: 0.10403159, Global Avg Loss: 0.11071698, Time: 0.1864 Steps: 83800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00479865, Cur Avg Loss: 0.08690625, Log Avg loss: 0.09475364, Global Avg Loss: 0.11067897, Time: 0.0936 Steps: 84000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002920, Sample Num: 46720, Cur Loss: 0.00359664, Cur Avg Loss: 0.08622821, Log Avg loss: 0.07700689, Global Avg Loss: 0.11059899, Time: 0.1813 Steps: 84200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 003120, Sample Num: 49920, Cur Loss: 0.01073111, Cur Avg Loss: 0.08553207, Log Avg loss: 0.07536840, Global Avg Loss: 0.11051551, Time: 0.1568 Steps: 84400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 003320, Sample Num: 53120, Cur Loss: 0.04900060, Cur Avg Loss: 0.08530553, Log Avg loss: 0.08177157, Global Avg Loss: 0.11044756, Time: 0.0605 Steps: 84600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00568517, Cur Avg Loss: 0.08543415, Log Avg loss: 0.08756921, Global Avg Loss: 0.11039360, Time: 0.1868 Steps: 84800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003720, Sample Num: 59520, Cur Loss: 0.29634356, Cur Avg Loss: 0.08559874, Log Avg loss: 0.08849557, Global Avg Loss: 0.11034207, Time: 0.1524 Steps: 85000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003920, Sample Num: 62720, Cur Loss: 0.05135648, Cur Avg Loss: 0.08518077, Log Avg loss: 0.07740646, Global Avg Loss: 0.11026476, Time: 0.0751 Steps: 85200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00622640, Cur Avg Loss: 0.08496244, Log Avg loss: 0.08068325, Global Avg Loss: 0.11019548, Time: 0.1885 Steps: 85400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004320, Sample Num: 69120, Cur Loss: 0.00564931, Cur Avg Loss: 0.08577648, Log Avg loss: 0.10254555, Global Avg Loss: 0.11017761, Time: 0.1846 Steps: 85600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004520, Sample Num: 72320, Cur Loss: 0.01626341, Cur Avg Loss: 0.08645507, Log Avg loss: 0.10111261, Global Avg Loss: 0.11015648, Time: 0.0596 Steps: 85800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004720, Sample Num: 75520, Cur Loss: 0.05408169, Cur Avg Loss: 0.08653496, Log Avg loss: 0.08834045, Global Avg Loss: 0.11010574, Time: 0.0368 Steps: 86000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004920, Sample Num: 78720, Cur Loss: 0.11226324, Cur Avg Loss: 0.08636160, Log Avg loss: 0.08227040, Global Avg Loss: 0.11004116, Time: 0.2269 Steps: 86200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 005120, Sample Num: 81920, Cur Loss: 0.27192187, Cur Avg Loss: 0.08631638, Log Avg loss: 0.08520385, Global Avg Loss: 0.10998367, Time: 0.0986 Steps: 86400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 005320, Sample Num: 85120, Cur Loss: 0.00436134, Cur Avg Loss: 0.08647634, Log Avg loss: 0.09057141, Global Avg Loss: 0.10993883, Time: 0.2038 Steps: 86600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005520, Sample Num: 88320, Cur Loss: 0.05684949, Cur Avg Loss: 0.08658545, Log Avg loss: 0.08948781, Global Avg Loss: 0.10989171, Time: 0.1933 Steps: 86800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005720, Sample Num: 91520, Cur Loss: 0.01074147, Cur Avg Loss: 0.08647866, Log Avg loss: 0.08353123, Global Avg Loss: 0.10983111, Time: 0.2131 Steps: 87000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005920, Sample Num: 94720, Cur Loss: 0.00083768, Cur Avg Loss: 0.08637596, Log Avg loss: 0.08343877, Global Avg Loss: 0.10977058, Time: 0.0584 Steps: 87200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006120, Sample Num: 97920, Cur Loss: 0.20402634, Cur Avg Loss: 0.08657278, Log Avg loss: 0.09239849, Global Avg Loss: 0.10973083, Time: 0.1191 Steps: 87400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006320, Sample Num: 101120, Cur Loss: 0.02563468, Cur Avg Loss: 0.08659485, Log Avg loss: 0.08727030, Global Avg Loss: 0.10967955, Time: 0.2983 Steps: 87600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006520, Sample Num: 104320, Cur Loss: 0.44258067, Cur Avg Loss: 0.08649322, Log Avg loss: 0.08328188, Global Avg Loss: 0.10961942, Time: 0.2982 Steps: 87800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006720, Sample Num: 107520, Cur Loss: 0.01259253, Cur Avg Loss: 0.08595033, Log Avg loss: 0.06825184, Global Avg Loss: 0.10952540, Time: 0.2030 Steps: 88000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006920, Sample Num: 110720, Cur Loss: 0.00607155, Cur Avg Loss: 0.08583446, Log Avg loss: 0.08194128, Global Avg Loss: 0.10946285, Time: 0.1877 Steps: 88200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 007120, Sample Num: 113920, Cur Loss: 0.42091146, Cur Avg Loss: 0.08553356, Log Avg loss: 0.07512248, Global Avg Loss: 0.10938516, Time: 0.2311 Steps: 88400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 007320, Sample Num: 117120, Cur Loss: 0.00065891, Cur Avg Loss: 0.08583973, Log Avg loss: 0.09673953, Global Avg Loss: 0.10935661, Time: 0.0521 Steps: 88600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007520, Sample Num: 120320, Cur Loss: 0.00227159, Cur Avg Loss: 0.08558200, Log Avg loss: 0.07614882, Global Avg Loss: 0.10928182, Time: 0.0748 Steps: 88800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007720, Sample Num: 123520, Cur Loss: 0.00500494, Cur Avg Loss: 0.08536882, Log Avg loss: 0.07735335, Global Avg Loss: 0.10921007, Time: 0.0821 Steps: 89000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007920, Sample Num: 126720, Cur Loss: 0.33967432, Cur Avg Loss: 0.08539215, Log Avg loss: 0.08629283, Global Avg Loss: 0.10915869, Time: 0.0736 Steps: 89200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008120, Sample Num: 129920, Cur Loss: 0.01147114, Cur Avg Loss: 0.08541285, Log Avg loss: 0.08623258, Global Avg Loss: 0.10910740, Time: 0.1876 Steps: 89400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008320, Sample Num: 133120, Cur Loss: 0.04735721, Cur Avg Loss: 0.08523427, Log Avg loss: 0.07798373, Global Avg Loss: 0.10903792, Time: 0.3122 Steps: 89600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008520, Sample Num: 136320, Cur Loss: 0.00095427, Cur Avg Loss: 0.08531461, Log Avg loss: 0.08865687, Global Avg Loss: 0.10899253, Time: 0.0925 Steps: 89800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008720, Sample Num: 139520, Cur Loss: 0.11027380, Cur Avg Loss: 0.08554436, Log Avg loss: 0.09533168, Global Avg Loss: 0.10896217, Time: 0.0721 Steps: 90000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008920, Sample Num: 142720, Cur Loss: 0.06792304, Cur Avg Loss: 0.08561002, Log Avg loss: 0.08847286, Global Avg Loss: 0.10891674, Time: 0.3037 Steps: 90200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 009120, Sample Num: 145920, Cur Loss: 0.00224554, Cur Avg Loss: 0.08538887, Log Avg loss: 0.07552575, Global Avg Loss: 0.10884287, Time: 0.0856 Steps: 90400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 009320, Sample Num: 149120, Cur Loss: 0.01100793, Cur Avg Loss: 0.08526429, Log Avg loss: 0.07958344, Global Avg Loss: 0.10877828, Time: 0.1856 Steps: 90600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009520, Sample Num: 152320, Cur Loss: 0.00061968, Cur Avg Loss: 0.08564427, Log Avg loss: 0.10335127, Global Avg Loss: 0.10876633, Time: 0.3017 Steps: 90800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009720, Sample Num: 155520, Cur Loss: 0.49826142, Cur Avg Loss: 0.08556646, Log Avg loss: 0.08186234, Global Avg Loss: 0.10870720, Time: 0.2764 Steps: 91000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009920, Sample Num: 158720, Cur Loss: 0.00308952, Cur Avg Loss: 0.08541569, Log Avg loss: 0.07808837, Global Avg Loss: 0.10864005, Time: 0.1864 Steps: 91200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010120, Sample Num: 161920, Cur Loss: 0.15807377, Cur Avg Loss: 0.08509731, Log Avg loss: 0.06930602, Global Avg Loss: 0.10855398, Time: 0.3038 Steps: 91400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010320, Sample Num: 165120, Cur Loss: 0.01605032, Cur Avg Loss: 0.08477787, Log Avg loss: 0.06861403, Global Avg Loss: 0.10846677, Time: 0.0947 Steps: 91600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010520, Sample Num: 168320, Cur Loss: 0.01422659, Cur Avg Loss: 0.08446123, Log Avg loss: 0.06812232, Global Avg Loss: 0.10837888, Time: 0.0854 Steps: 91800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010720, Sample Num: 171520, Cur Loss: 0.00077056, Cur Avg Loss: 0.08426548, Log Avg loss: 0.07396928, Global Avg Loss: 0.10830407, Time: 0.1867 Steps: 92000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010920, Sample Num: 174720, Cur Loss: 0.00015548, Cur Avg Loss: 0.08403542, Log Avg loss: 0.07170440, Global Avg Loss: 0.10822468, Time: 0.0542 Steps: 92200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 011120, Sample Num: 177920, Cur Loss: 0.00108580, Cur Avg Loss: 0.08399611, Log Avg loss: 0.08184939, Global Avg Loss: 0.10816759, Time: 0.0635 Steps: 92400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 011320, Sample Num: 181120, Cur Loss: 0.41929910, Cur Avg Loss: 0.08414056, Log Avg loss: 0.09217215, Global Avg Loss: 0.10813305, Time: 0.0856 Steps: 92600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011520, Sample Num: 184320, Cur Loss: 0.00308664, Cur Avg Loss: 0.08414592, Log Avg loss: 0.08444930, Global Avg Loss: 0.10808200, Time: 0.1863 Steps: 92800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011720, Sample Num: 187520, Cur Loss: 0.00031499, Cur Avg Loss: 0.08390663, Log Avg loss: 0.07012342, Global Avg Loss: 0.10800037, Time: 0.1046 Steps: 93000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011920, Sample Num: 190720, Cur Loss: 0.00099212, Cur Avg Loss: 0.08378533, Log Avg loss: 0.07667739, Global Avg Loss: 0.10793315, Time: 0.0958 Steps: 93200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012120, Sample Num: 193920, Cur Loss: 0.00316544, Cur Avg Loss: 0.08360310, Log Avg loss: 0.07274198, Global Avg Loss: 0.10785780, Time: 0.0606 Steps: 93400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012320, Sample Num: 197120, Cur Loss: 0.07054545, Cur Avg Loss: 0.08313478, Log Avg loss: 0.05475457, Global Avg Loss: 0.10774433, Time: 0.1883 Steps: 93600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012520, Sample Num: 200320, Cur Loss: 0.02690338, Cur Avg Loss: 0.08318505, Log Avg loss: 0.08628171, Global Avg Loss: 0.10769857, Time: 0.0571 Steps: 93800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012720, Sample Num: 203520, Cur Loss: 0.18970093, Cur Avg Loss: 0.08286784, Log Avg loss: 0.06301061, Global Avg Loss: 0.10760349, Time: 0.2210 Steps: 94000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012920, Sample Num: 206720, Cur Loss: 0.00068524, Cur Avg Loss: 0.08244739, Log Avg loss: 0.05570703, Global Avg Loss: 0.10749330, Time: 0.3640 Steps: 94200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 013120, Sample Num: 209920, Cur Loss: 0.01600775, Cur Avg Loss: 0.08260562, Log Avg loss: 0.09282690, Global Avg Loss: 0.10746223, Time: 0.2040 Steps: 94400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 013320, Sample Num: 213120, Cur Loss: 0.00852424, Cur Avg Loss: 0.08284198, Log Avg loss: 0.09834737, Global Avg Loss: 0.10744296, Time: 0.0902 Steps: 94600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013520, Sample Num: 216320, Cur Loss: 0.19693632, Cur Avg Loss: 0.08291409, Log Avg loss: 0.08771632, Global Avg Loss: 0.10740134, Time: 0.2531 Steps: 94800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013720, Sample Num: 219520, Cur Loss: 0.34647086, Cur Avg Loss: 0.08291834, Log Avg loss: 0.08320594, Global Avg Loss: 0.10735041, Time: 0.0500 Steps: 95000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013920, Sample Num: 222720, Cur Loss: 0.00891845, Cur Avg Loss: 0.08268202, Log Avg loss: 0.06647019, Global Avg Loss: 0.10726452, Time: 0.1831 Steps: 95200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014120, Sample Num: 225920, Cur Loss: 0.22331183, Cur Avg Loss: 0.08283281, Log Avg loss: 0.09332821, Global Avg Loss: 0.10723531, Time: 0.1876 Steps: 95400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014320, Sample Num: 229120, Cur Loss: 0.37718913, Cur Avg Loss: 0.08277718, Log Avg loss: 0.07884970, Global Avg Loss: 0.10717592, Time: 0.1881 Steps: 95600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014520, Sample Num: 232320, Cur Loss: 0.00063537, Cur Avg Loss: 0.08274829, Log Avg loss: 0.08067925, Global Avg Loss: 0.10712061, Time: 0.3035 Steps: 95800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014720, Sample Num: 235520, Cur Loss: 0.02791328, Cur Avg Loss: 0.08286912, Log Avg loss: 0.09164133, Global Avg Loss: 0.10708836, Time: 0.1895 Steps: 96000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014920, Sample Num: 238720, Cur Loss: 0.00094255, Cur Avg Loss: 0.08272531, Log Avg loss: 0.07214125, Global Avg Loss: 0.10701570, Time: 0.0538 Steps: 96200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015120, Sample Num: 241920, Cur Loss: 0.34182349, Cur Avg Loss: 0.08276097, Log Avg loss: 0.08542137, Global Avg Loss: 0.10697090, Time: 0.3067 Steps: 96400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015320, Sample Num: 245120, Cur Loss: 0.00174856, Cur Avg Loss: 0.08276128, Log Avg loss: 0.08278427, Global Avg Loss: 0.10692082, Time: 0.0446 Steps: 96600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015520, Sample Num: 248320, Cur Loss: 0.00400509, Cur Avg Loss: 0.08280139, Log Avg loss: 0.08587423, Global Avg Loss: 0.10687734, Time: 0.0664 Steps: 96800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 015720, Sample Num: 251520, Cur Loss: 0.00065386, Cur Avg Loss: 0.08259426, Log Avg loss: 0.06652045, Global Avg Loss: 0.10679413, Time: 0.0516 Steps: 97000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 015920, Sample Num: 254720, Cur Loss: 0.00641946, Cur Avg Loss: 0.08265952, Log Avg loss: 0.08778940, Global Avg Loss: 0.10675503, Time: 0.3132 Steps: 97200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016120, Sample Num: 257920, Cur Loss: 0.00034915, Cur Avg Loss: 0.08262852, Log Avg loss: 0.08016048, Global Avg Loss: 0.10670042, Time: 0.0673 Steps: 97400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016320, Sample Num: 261120, Cur Loss: 0.01525729, Cur Avg Loss: 0.08262549, Log Avg loss: 0.08238191, Global Avg Loss: 0.10665058, Time: 0.2396 Steps: 97600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016520, Sample Num: 264320, Cur Loss: 0.00167924, Cur Avg Loss: 0.08241747, Log Avg loss: 0.06544274, Global Avg Loss: 0.10656631, Time: 0.1876 Steps: 97800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016720, Sample Num: 267520, Cur Loss: 0.00082550, Cur Avg Loss: 0.08241190, Log Avg loss: 0.08195201, Global Avg Loss: 0.10651608, Time: 0.1752 Steps: 98000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016920, Sample Num: 270720, Cur Loss: 0.57533568, Cur Avg Loss: 0.08241050, Log Avg loss: 0.08229330, Global Avg Loss: 0.10646675, Time: 0.1816 Steps: 98200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017120, Sample Num: 273920, Cur Loss: 0.13436338, Cur Avg Loss: 0.08255676, Log Avg loss: 0.09493060, Global Avg Loss: 0.10644330, Time: 0.0567 Steps: 98400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017320, Sample Num: 277120, Cur Loss: 0.05720409, Cur Avg Loss: 0.08266948, Log Avg loss: 0.09231840, Global Avg Loss: 0.10641465, Time: 0.0922 Steps: 98600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017520, Sample Num: 280320, Cur Loss: 0.73271924, Cur Avg Loss: 0.08268870, Log Avg loss: 0.08435305, Global Avg Loss: 0.10636999, Time: 0.2588 Steps: 98800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 017720, Sample Num: 283520, Cur Loss: 0.14787985, Cur Avg Loss: 0.08283187, Log Avg loss: 0.09537287, Global Avg Loss: 0.10634777, Time: 0.0998 Steps: 99000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 017920, Sample Num: 286720, Cur Loss: 0.00189393, Cur Avg Loss: 0.08263573, Log Avg loss: 0.06525794, Global Avg Loss: 0.10626493, Time: 0.2035 Steps: 99200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018120, Sample Num: 289920, Cur Loss: 0.00315519, Cur Avg Loss: 0.08267865, Log Avg loss: 0.08652457, Global Avg Loss: 0.10622521, Time: 0.0500 Steps: 99400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018320, Sample Num: 293120, Cur Loss: 0.11239763, Cur Avg Loss: 0.08273367, Log Avg loss: 0.08771835, Global Avg Loss: 0.10618805, Time: 0.2291 Steps: 99600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018520, Sample Num: 296320, Cur Loss: 0.01500863, Cur Avg Loss: 0.08279855, Log Avg loss: 0.08874190, Global Avg Loss: 0.10615309, Time: 0.0679 Steps: 99800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018720, Sample Num: 299520, Cur Loss: 0.30902535, Cur Avg Loss: 0.08275960, Log Avg loss: 0.07915273, Global Avg Loss: 0.10609909, Time: 0.0589 Steps: 100000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018920, Sample Num: 302720, Cur Loss: 0.00935462, Cur Avg Loss: 0.08275411, Log Avg loss: 0.08224049, Global Avg Loss: 0.10605146, Time: 0.2029 Steps: 100200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019120, Sample Num: 305920, Cur Loss: 0.00252810, Cur Avg Loss: 0.08285276, Log Avg loss: 0.09218463, Global Avg Loss: 0.10602384, Time: 0.1973 Steps: 100400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019320, Sample Num: 309120, Cur Loss: 0.23183371, Cur Avg Loss: 0.08301865, Log Avg loss: 0.09887797, Global Avg Loss: 0.10600963, Time: 0.0862 Steps: 100600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019520, Sample Num: 312320, Cur Loss: 0.00119646, Cur Avg Loss: 0.08287036, Log Avg loss: 0.06854536, Global Avg Loss: 0.10593530, Time: 0.0366 Steps: 100800, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 019720, Sample Num: 315520, Cur Loss: 0.06706768, Cur Avg Loss: 0.08275230, Log Avg loss: 0.07122969, Global Avg Loss: 0.10586658, Time: 0.0816 Steps: 101000, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 019920, Sample Num: 318720, Cur Loss: 0.13915789, Cur Avg Loss: 0.08267479, Log Avg loss: 0.07503221, Global Avg Loss: 0.10580564, Time: 0.2922 Steps: 101200, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 020120, Sample Num: 321920, Cur Loss: 0.00307677, Cur Avg Loss: 0.08260402, Log Avg loss: 0.07555497, Global Avg Loss: 0.10574597, Time: 0.0386 Steps: 101400, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 020320, Sample Num: 325113, Cur Loss: 0.00267062, Cur Avg Loss: 0.08256022, Log Avg loss: 0.07815464, Global Avg Loss: 0.10569166, Time: 0.1102 Steps: 101600, Updated lr: 0.000050 ***** Running evaluation checkpoint-101600 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-101600 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2761.965097, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.067393, "eval_total_loss": 182.635337, "eval_acc": 0.979815, "eval_prec": 0.977595, "eval_recall": 0.98223, "eval_f1": 0.979907, "eval_roc_auc": 0.997394, "eval_pr_auc": 0.997385, "eval_confusion_matrix": {"tn": 21138, "fp": 489, "fn": 386, "tp": 21336}, "eval_mcc2": 0.95964, "eval_mcc": 0.95964, "eval_sn": 0.98223, "eval_sp": 0.977389, "update_flag": true, "test_avg_loss": 0.066342, "test_total_loss": 269.613844, "test_acc": 0.980422, "test_prec": 0.977729, "test_recall": 0.983233, "test_f1": 0.980473, "test_roc_auc": 0.997407, "test_pr_auc": 0.996885, "test_confusion_matrix": {"tn": 31789, "fp": 728, "fn": 545, "tp": 31960}, "test_mcc2": 0.960859, "test_mcc": 0.960859, "test_sn": 0.983233, "test_sp": 0.977612, "lr": 5.0049261083743846e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.10569165943627583, "train_cur_epoch_loss": 1677.6237404969725, "train_cur_epoch_avg_loss": 0.08256022344965416, "train_cur_epoch_time": 2761.9650971889496, "train_cur_epoch_avg_time": 0.13592347919236958, "epoch": 5, "step": 101600} ################################################## Training, Epoch: 0006, Batch: 000200, Sample Num: 3200, Cur Loss: 0.00120089, Cur Avg Loss: 0.07102521, Log Avg loss: 0.07102521, Global Avg Loss: 0.10562355, Time: 0.2984 Steps: 101800, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000400, Sample Num: 6400, Cur Loss: 0.00210994, Cur Avg Loss: 0.08024080, Log Avg loss: 0.08945639, Global Avg Loss: 0.10559185, Time: 0.0499 Steps: 102000, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000600, Sample Num: 9600, Cur Loss: 0.24287128, Cur Avg Loss: 0.08436411, Log Avg loss: 0.09261074, Global Avg Loss: 0.10556645, Time: 0.0517 Steps: 102200, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000800, Sample Num: 12800, Cur Loss: 0.00107364, Cur Avg Loss: 0.08397244, Log Avg loss: 0.08279743, Global Avg Loss: 0.10552198, Time: 0.1888 Steps: 102400, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 001000, Sample Num: 16000, Cur Loss: 0.00041870, Cur Avg Loss: 0.08088748, Log Avg loss: 0.06854763, Global Avg Loss: 0.10544990, Time: 0.0515 Steps: 102600, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 001200, Sample Num: 19200, Cur Loss: 0.01791146, Cur Avg Loss: 0.07955202, Log Avg loss: 0.07287472, Global Avg Loss: 0.10538653, Time: 0.2702 Steps: 102800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001400, Sample Num: 22400, Cur Loss: 0.09743925, Cur Avg Loss: 0.07881654, Log Avg loss: 0.07440364, Global Avg Loss: 0.10532637, Time: 0.1880 Steps: 103000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00027896, Cur Avg Loss: 0.07810622, Log Avg loss: 0.07313398, Global Avg Loss: 0.10526398, Time: 0.0805 Steps: 103200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001800, Sample Num: 28800, Cur Loss: 0.28258067, Cur Avg Loss: 0.07850652, Log Avg loss: 0.08170890, Global Avg Loss: 0.10521842, Time: 0.1543 Steps: 103400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00049582, Cur Avg Loss: 0.07717367, Log Avg loss: 0.06517806, Global Avg Loss: 0.10514112, Time: 0.0467 Steps: 103600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002200, Sample Num: 35200, Cur Loss: 0.07375062, Cur Avg Loss: 0.07809114, Log Avg loss: 0.08726585, Global Avg Loss: 0.10510668, Time: 0.0848 Steps: 103800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002400, Sample Num: 38400, Cur Loss: 0.17211531, Cur Avg Loss: 0.07710417, Log Avg loss: 0.06624746, Global Avg Loss: 0.10503195, Time: 0.0886 Steps: 104000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00095113, Cur Avg Loss: 0.07891857, Log Avg loss: 0.10069137, Global Avg Loss: 0.10502362, Time: 0.2957 Steps: 104200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00023471, Cur Avg Loss: 0.07846568, Log Avg loss: 0.07257819, Global Avg Loss: 0.10496146, Time: 0.0587 Steps: 104400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00037712, Cur Avg Loss: 0.07839463, Log Avg loss: 0.07739985, Global Avg Loss: 0.10490876, Time: 0.0538 Steps: 104600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00057525, Cur Avg Loss: 0.07669277, Log Avg loss: 0.05116496, Global Avg Loss: 0.10480620, Time: 0.1867 Steps: 104800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003400, Sample Num: 54400, Cur Loss: 0.19959535, Cur Avg Loss: 0.07672014, Log Avg loss: 0.07715799, Global Avg Loss: 0.10475353, Time: 0.0605 Steps: 105000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003600, Sample Num: 57600, Cur Loss: 0.09725064, Cur Avg Loss: 0.07651328, Log Avg loss: 0.07299662, Global Avg Loss: 0.10469316, Time: 0.0804 Steps: 105200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003800, Sample Num: 60800, Cur Loss: 0.00147788, Cur Avg Loss: 0.07632673, Log Avg loss: 0.07296882, Global Avg Loss: 0.10463296, Time: 0.0556 Steps: 105400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004000, Sample Num: 64000, Cur Loss: 0.12224810, Cur Avg Loss: 0.07664746, Log Avg loss: 0.08274148, Global Avg Loss: 0.10459150, Time: 0.0682 Steps: 105600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004200, Sample Num: 67200, Cur Loss: 0.24511316, Cur Avg Loss: 0.07687757, Log Avg loss: 0.08147964, Global Avg Loss: 0.10454781, Time: 0.3101 Steps: 105800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004400, Sample Num: 70400, Cur Loss: 0.37241110, Cur Avg Loss: 0.07634320, Log Avg loss: 0.06512154, Global Avg Loss: 0.10447342, Time: 0.0377 Steps: 106000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00762295, Cur Avg Loss: 0.07668782, Log Avg loss: 0.08426931, Global Avg Loss: 0.10443537, Time: 0.0846 Steps: 106200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004800, Sample Num: 76800, Cur Loss: 0.23786885, Cur Avg Loss: 0.07668518, Log Avg loss: 0.07662468, Global Avg Loss: 0.10438310, Time: 0.2064 Steps: 106400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 005000, Sample Num: 80000, Cur Loss: 0.00985410, Cur Avg Loss: 0.07632268, Log Avg loss: 0.06762253, Global Avg Loss: 0.10431413, Time: 0.0554 Steps: 106600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00091252, Cur Avg Loss: 0.07668984, Log Avg loss: 0.08586888, Global Avg Loss: 0.10427959, Time: 0.2978 Steps: 106800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00036312, Cur Avg Loss: 0.07638853, Log Avg loss: 0.06855440, Global Avg Loss: 0.10421281, Time: 0.1878 Steps: 107000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005600, Sample Num: 89600, Cur Loss: 0.30685243, Cur Avg Loss: 0.07594109, Log Avg loss: 0.06386039, Global Avg Loss: 0.10413753, Time: 0.1868 Steps: 107200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005800, Sample Num: 92800, Cur Loss: 0.02665894, Cur Avg Loss: 0.07544356, Log Avg loss: 0.06151267, Global Avg Loss: 0.10405815, Time: 0.2032 Steps: 107400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006000, Sample Num: 96000, Cur Loss: 0.20440924, Cur Avg Loss: 0.07571602, Log Avg loss: 0.08361722, Global Avg Loss: 0.10402016, Time: 0.0920 Steps: 107600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006200, Sample Num: 99200, Cur Loss: 0.05157486, Cur Avg Loss: 0.07576285, Log Avg loss: 0.07716769, Global Avg Loss: 0.10397034, Time: 0.2719 Steps: 107800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00696473, Cur Avg Loss: 0.07600102, Log Avg loss: 0.08338439, Global Avg Loss: 0.10393221, Time: 0.0953 Steps: 108000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006600, Sample Num: 105600, Cur Loss: 0.41309932, Cur Avg Loss: 0.07575075, Log Avg loss: 0.06774222, Global Avg Loss: 0.10386532, Time: 0.1885 Steps: 108200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006800, Sample Num: 108800, Cur Loss: 0.03331136, Cur Avg Loss: 0.07591922, Log Avg loss: 0.08147851, Global Avg Loss: 0.10382402, Time: 0.0948 Steps: 108400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00010882, Cur Avg Loss: 0.07535244, Log Avg loss: 0.05608189, Global Avg Loss: 0.10373609, Time: 0.0727 Steps: 108600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007200, Sample Num: 115200, Cur Loss: 0.05172043, Cur Avg Loss: 0.07558930, Log Avg loss: 0.08387963, Global Avg Loss: 0.10369959, Time: 0.0554 Steps: 108800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00119972, Cur Avg Loss: 0.07547785, Log Avg loss: 0.07146563, Global Avg Loss: 0.10364045, Time: 0.2974 Steps: 109000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00116142, Cur Avg Loss: 0.07509595, Log Avg loss: 0.06096577, Global Avg Loss: 0.10356229, Time: 0.2199 Steps: 109200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 007800, Sample Num: 124800, Cur Loss: 0.00152670, Cur Avg Loss: 0.07530714, Log Avg loss: 0.08333232, Global Avg Loss: 0.10352530, Time: 0.1865 Steps: 109400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008000, Sample Num: 128000, Cur Loss: 0.74363941, Cur Avg Loss: 0.07539619, Log Avg loss: 0.07886887, Global Avg Loss: 0.10348031, Time: 0.3709 Steps: 109600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00345260, Cur Avg Loss: 0.07539329, Log Avg loss: 0.07527729, Global Avg Loss: 0.10342894, Time: 0.3112 Steps: 109800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008400, Sample Num: 134400, Cur Loss: 0.00450029, Cur Avg Loss: 0.07528588, Log Avg loss: 0.07088211, Global Avg Loss: 0.10336976, Time: 0.1887 Steps: 110000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008600, Sample Num: 137600, Cur Loss: 0.01136134, Cur Avg Loss: 0.07543991, Log Avg loss: 0.08190939, Global Avg Loss: 0.10333082, Time: 0.2206 Steps: 110200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00406164, Cur Avg Loss: 0.07548476, Log Avg loss: 0.07741334, Global Avg Loss: 0.10328386, Time: 0.1864 Steps: 110400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00108360, Cur Avg Loss: 0.07541158, Log Avg loss: 0.07219147, Global Avg Loss: 0.10322764, Time: 0.0519 Steps: 110600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009200, Sample Num: 147200, Cur Loss: 0.19513348, Cur Avg Loss: 0.07555097, Log Avg loss: 0.08182363, Global Avg Loss: 0.10318900, Time: 0.0857 Steps: 110800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009400, Sample Num: 150400, Cur Loss: 0.11957604, Cur Avg Loss: 0.07530827, Log Avg loss: 0.06414394, Global Avg Loss: 0.10311865, Time: 0.3128 Steps: 111000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00595240, Cur Avg Loss: 0.07546091, Log Avg loss: 0.08263524, Global Avg Loss: 0.10308181, Time: 0.0830 Steps: 111200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 009800, Sample Num: 156800, Cur Loss: 0.36747316, Cur Avg Loss: 0.07528277, Log Avg loss: 0.06673206, Global Avg Loss: 0.10301655, Time: 0.1881 Steps: 111400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010000, Sample Num: 160000, Cur Loss: 0.01222452, Cur Avg Loss: 0.07518782, Log Avg loss: 0.07053510, Global Avg Loss: 0.10295834, Time: 0.1606 Steps: 111600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010200, Sample Num: 163200, Cur Loss: 0.09003446, Cur Avg Loss: 0.07491682, Log Avg loss: 0.06136653, Global Avg Loss: 0.10288394, Time: 0.1882 Steps: 111800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010400, Sample Num: 166400, Cur Loss: 0.45876485, Cur Avg Loss: 0.07474760, Log Avg loss: 0.06611781, Global Avg Loss: 0.10281828, Time: 0.1420 Steps: 112000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00340999, Cur Avg Loss: 0.07511577, Log Avg loss: 0.09426035, Global Avg Loss: 0.10280303, Time: 0.1602 Steps: 112200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00007772, Cur Avg Loss: 0.07461302, Log Avg loss: 0.04796737, Global Avg Loss: 0.10270546, Time: 0.1878 Steps: 112400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00233041, Cur Avg Loss: 0.07446789, Log Avg loss: 0.06663072, Global Avg Loss: 0.10264138, Time: 0.3057 Steps: 112600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011200, Sample Num: 179200, Cur Loss: 0.38011560, Cur Avg Loss: 0.07446412, Log Avg loss: 0.07425693, Global Avg Loss: 0.10259105, Time: 0.2978 Steps: 112800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00144148, Cur Avg Loss: 0.07446739, Log Avg loss: 0.07465020, Global Avg Loss: 0.10254160, Time: 0.0482 Steps: 113000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00144755, Cur Avg Loss: 0.07441703, Log Avg loss: 0.07154705, Global Avg Loss: 0.10248684, Time: 0.0536 Steps: 113200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 011800, Sample Num: 188800, Cur Loss: 0.01138078, Cur Avg Loss: 0.07434021, Log Avg loss: 0.06988457, Global Avg Loss: 0.10242934, Time: 0.0542 Steps: 113400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012000, Sample Num: 192000, Cur Loss: 0.00121729, Cur Avg Loss: 0.07429346, Log Avg loss: 0.07153501, Global Avg Loss: 0.10237495, Time: 0.0600 Steps: 113600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012200, Sample Num: 195200, Cur Loss: 0.00034851, Cur Avg Loss: 0.07403966, Log Avg loss: 0.05881151, Global Avg Loss: 0.10229839, Time: 0.0912 Steps: 113800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012400, Sample Num: 198400, Cur Loss: 0.00074713, Cur Avg Loss: 0.07399294, Log Avg loss: 0.07114309, Global Avg Loss: 0.10224373, Time: 0.0579 Steps: 114000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012600, Sample Num: 201600, Cur Loss: 0.01454851, Cur Avg Loss: 0.07378541, Log Avg loss: 0.06091837, Global Avg Loss: 0.10217135, Time: 0.0735 Steps: 114200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012800, Sample Num: 204800, Cur Loss: 0.00068294, Cur Avg Loss: 0.07377628, Log Avg loss: 0.07320119, Global Avg Loss: 0.10212071, Time: 0.0552 Steps: 114400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013000, Sample Num: 208000, Cur Loss: 0.00023182, Cur Avg Loss: 0.07365517, Log Avg loss: 0.06590409, Global Avg Loss: 0.10205750, Time: 0.0926 Steps: 114600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013200, Sample Num: 211200, Cur Loss: 0.00026744, Cur Avg Loss: 0.07386579, Log Avg loss: 0.08755630, Global Avg Loss: 0.10203224, Time: 0.0513 Steps: 114800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013400, Sample Num: 214400, Cur Loss: 0.02202464, Cur Avg Loss: 0.07399572, Log Avg loss: 0.08257111, Global Avg Loss: 0.10199839, Time: 0.1901 Steps: 115000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 013600, Sample Num: 217600, Cur Loss: 0.00017115, Cur Avg Loss: 0.07389493, Log Avg loss: 0.06714186, Global Avg Loss: 0.10193788, Time: 0.2026 Steps: 115200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 013800, Sample Num: 220800, Cur Loss: 0.00230160, Cur Avg Loss: 0.07375463, Log Avg loss: 0.06421451, Global Avg Loss: 0.10187250, Time: 0.1135 Steps: 115400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014000, Sample Num: 224000, Cur Loss: 0.00296774, Cur Avg Loss: 0.07365033, Log Avg loss: 0.06645379, Global Avg Loss: 0.10181122, Time: 0.1900 Steps: 115600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014200, Sample Num: 227200, Cur Loss: 0.02651858, Cur Avg Loss: 0.07358903, Log Avg loss: 0.06929803, Global Avg Loss: 0.10175507, Time: 0.0473 Steps: 115800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014400, Sample Num: 230400, Cur Loss: 0.06523409, Cur Avg Loss: 0.07359607, Log Avg loss: 0.07409531, Global Avg Loss: 0.10170738, Time: 0.0391 Steps: 116000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014600, Sample Num: 233600, Cur Loss: 0.00186061, Cur Avg Loss: 0.07344590, Log Avg loss: 0.06263423, Global Avg Loss: 0.10164013, Time: 0.3136 Steps: 116200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014800, Sample Num: 236800, Cur Loss: 0.00013749, Cur Avg Loss: 0.07343268, Log Avg loss: 0.07246727, Global Avg Loss: 0.10159000, Time: 0.0450 Steps: 116400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015000, Sample Num: 240000, Cur Loss: 0.00084624, Cur Avg Loss: 0.07351954, Log Avg loss: 0.07994760, Global Avg Loss: 0.10155288, Time: 0.3137 Steps: 116600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015200, Sample Num: 243200, Cur Loss: 0.00016847, Cur Avg Loss: 0.07354443, Log Avg loss: 0.07541067, Global Avg Loss: 0.10150812, Time: 0.2971 Steps: 116800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015400, Sample Num: 246400, Cur Loss: 0.00061297, Cur Avg Loss: 0.07344361, Log Avg loss: 0.06578177, Global Avg Loss: 0.10144704, Time: 0.1864 Steps: 117000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 015600, Sample Num: 249600, Cur Loss: 0.00062933, Cur Avg Loss: 0.07332435, Log Avg loss: 0.06414139, Global Avg Loss: 0.10138338, Time: 0.2984 Steps: 117200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 015800, Sample Num: 252800, Cur Loss: 0.00372166, Cur Avg Loss: 0.07314018, Log Avg loss: 0.05877458, Global Avg Loss: 0.10131080, Time: 0.0426 Steps: 117400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016000, Sample Num: 256000, Cur Loss: 0.00281447, Cur Avg Loss: 0.07340396, Log Avg loss: 0.09424232, Global Avg Loss: 0.10129877, Time: 0.0951 Steps: 117600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016200, Sample Num: 259200, Cur Loss: 0.00037708, Cur Avg Loss: 0.07326753, Log Avg loss: 0.06235318, Global Avg Loss: 0.10123265, Time: 0.0395 Steps: 117800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016400, Sample Num: 262400, Cur Loss: 0.00129768, Cur Avg Loss: 0.07314924, Log Avg loss: 0.06356831, Global Avg Loss: 0.10116882, Time: 0.2772 Steps: 118000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016600, Sample Num: 265600, Cur Loss: 0.03510735, Cur Avg Loss: 0.07300939, Log Avg loss: 0.06154145, Global Avg Loss: 0.10110176, Time: 0.0528 Steps: 118200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016800, Sample Num: 268800, Cur Loss: 0.37277177, Cur Avg Loss: 0.07316124, Log Avg loss: 0.08576472, Global Avg Loss: 0.10107586, Time: 0.2051 Steps: 118400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017000, Sample Num: 272000, Cur Loss: 0.58503652, Cur Avg Loss: 0.07325472, Log Avg loss: 0.08110663, Global Avg Loss: 0.10104218, Time: 0.0420 Steps: 118600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017200, Sample Num: 275200, Cur Loss: 0.26782823, Cur Avg Loss: 0.07334743, Log Avg loss: 0.08122833, Global Avg Loss: 0.10100883, Time: 0.0338 Steps: 118800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017400, Sample Num: 278400, Cur Loss: 0.00020297, Cur Avg Loss: 0.07332448, Log Avg loss: 0.07135022, Global Avg Loss: 0.10095898, Time: 0.0609 Steps: 119000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 017600, Sample Num: 281600, Cur Loss: 0.00193393, Cur Avg Loss: 0.07348448, Log Avg loss: 0.08740510, Global Avg Loss: 0.10093624, Time: 0.0985 Steps: 119200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 017800, Sample Num: 284800, Cur Loss: 0.00145679, Cur Avg Loss: 0.07337210, Log Avg loss: 0.06348254, Global Avg Loss: 0.10087350, Time: 0.3129 Steps: 119400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018000, Sample Num: 288000, Cur Loss: 0.00050447, Cur Avg Loss: 0.07334805, Log Avg loss: 0.07120762, Global Avg Loss: 0.10082389, Time: 0.1958 Steps: 119600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018200, Sample Num: 291200, Cur Loss: 0.03870056, Cur Avg Loss: 0.07333374, Log Avg loss: 0.07204560, Global Avg Loss: 0.10077585, Time: 0.2030 Steps: 119800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018400, Sample Num: 294400, Cur Loss: 0.13853553, Cur Avg Loss: 0.07317553, Log Avg loss: 0.05877833, Global Avg Loss: 0.10070585, Time: 0.2148 Steps: 120000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018600, Sample Num: 297600, Cur Loss: 0.00061692, Cur Avg Loss: 0.07328923, Log Avg loss: 0.08374932, Global Avg Loss: 0.10067764, Time: 0.0884 Steps: 120200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018800, Sample Num: 300800, Cur Loss: 0.00085072, Cur Avg Loss: 0.07328670, Log Avg loss: 0.07305218, Global Avg Loss: 0.10063175, Time: 0.0741 Steps: 120400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019000, Sample Num: 304000, Cur Loss: 0.00328819, Cur Avg Loss: 0.07331199, Log Avg loss: 0.07568874, Global Avg Loss: 0.10059038, Time: 0.0777 Steps: 120600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019200, Sample Num: 307200, Cur Loss: 0.01085723, Cur Avg Loss: 0.07327338, Log Avg loss: 0.06960600, Global Avg Loss: 0.10053909, Time: 0.1805 Steps: 120800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019400, Sample Num: 310400, Cur Loss: 0.00130593, Cur Avg Loss: 0.07320287, Log Avg loss: 0.06643373, Global Avg Loss: 0.10048271, Time: 0.0437 Steps: 121000, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 019600, Sample Num: 313600, Cur Loss: 0.00066528, Cur Avg Loss: 0.07316745, Log Avg loss: 0.06973184, Global Avg Loss: 0.10043197, Time: 0.1013 Steps: 121200, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 019800, Sample Num: 316800, Cur Loss: 0.46985409, Cur Avg Loss: 0.07315807, Log Avg loss: 0.07223846, Global Avg Loss: 0.10038552, Time: 0.1006 Steps: 121400, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 020000, Sample Num: 320000, Cur Loss: 0.01354317, Cur Avg Loss: 0.07313798, Log Avg loss: 0.07114869, Global Avg Loss: 0.10033744, Time: 0.0947 Steps: 121600, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 020200, Sample Num: 323200, Cur Loss: 0.00069932, Cur Avg Loss: 0.07296782, Log Avg loss: 0.05595182, Global Avg Loss: 0.10026455, Time: 0.0784 Steps: 121800, Updated lr: 0.000040 ***** Running evaluation checkpoint-121920 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-121920 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2769.527488, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.065815, "eval_total_loss": 178.357449, "eval_acc": 0.982099, "eval_prec": 0.97726, "eval_recall": 0.987248, "eval_f1": 0.982229, "eval_roc_auc": 0.997927, "eval_pr_auc": 0.997675, "eval_confusion_matrix": {"tn": 21128, "fp": 499, "fn": 277, "tp": 21445}, "eval_mcc2": 0.964247, "eval_mcc": 0.964247, "eval_sn": 0.987248, "eval_sp": 0.976927, "update_flag": true, "test_avg_loss": 0.06675, "test_total_loss": 271.273065, "test_acc": 0.981699, "test_prec": 0.976477, "test_recall": 0.987171, "test_f1": 0.981795, "test_roc_auc": 0.997819, "test_pr_auc": 0.997336, "test_confusion_matrix": {"tn": 31744, "fp": 773, "fn": 417, "tp": 32088}, "test_mcc2": 0.963455, "test_mcc": 0.963455, "test_sn": 0.987171, "test_sp": 0.976228, "lr": 4.003940886699508e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.10023986332842279, "train_cur_epoch_loss": 1482.9715382756822, "train_cur_epoch_avg_loss": 0.07298088278915758, "train_cur_epoch_time": 2769.527488231659, "train_cur_epoch_avg_time": 0.13629564410588874, "epoch": 6, "step": 121920} ################################################## Training, Epoch: 0007, Batch: 000080, Sample Num: 1280, Cur Loss: 0.45435444, Cur Avg Loss: 0.04843900, Log Avg loss: 0.06448389, Global Avg Loss: 0.10020590, Time: 0.2153 Steps: 122000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000280, Sample Num: 4480, Cur Loss: 0.00732539, Cur Avg Loss: 0.06946538, Log Avg loss: 0.07787594, Global Avg Loss: 0.10016935, Time: 0.0822 Steps: 122200, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000480, Sample Num: 7680, Cur Loss: 0.00308870, Cur Avg Loss: 0.07762133, Log Avg loss: 0.08903966, Global Avg Loss: 0.10015116, Time: 0.0367 Steps: 122400, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000680, Sample Num: 10880, Cur Loss: 0.00072169, Cur Avg Loss: 0.07864481, Log Avg loss: 0.08110116, Global Avg Loss: 0.10012009, Time: 0.0755 Steps: 122600, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000880, Sample Num: 14080, Cur Loss: 0.00070977, Cur Avg Loss: 0.07827540, Log Avg loss: 0.07701940, Global Avg Loss: 0.10008246, Time: 0.3138 Steps: 122800, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 001080, Sample Num: 17280, Cur Loss: 0.00671970, Cur Avg Loss: 0.07532349, Log Avg loss: 0.06233508, Global Avg Loss: 0.10002109, Time: 0.0499 Steps: 123000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 001280, Sample Num: 20480, Cur Loss: 0.00086333, Cur Avg Loss: 0.07530191, Log Avg loss: 0.07518541, Global Avg Loss: 0.09998077, Time: 0.1875 Steps: 123200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001480, Sample Num: 23680, Cur Loss: 0.00021837, Cur Avg Loss: 0.07565793, Log Avg loss: 0.07793646, Global Avg Loss: 0.09994504, Time: 0.0685 Steps: 123400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001680, Sample Num: 26880, Cur Loss: 0.00915196, Cur Avg Loss: 0.07517272, Log Avg loss: 0.07158211, Global Avg Loss: 0.09989914, Time: 0.3210 Steps: 123600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001880, Sample Num: 30080, Cur Loss: 0.03835330, Cur Avg Loss: 0.07447655, Log Avg loss: 0.06862874, Global Avg Loss: 0.09984863, Time: 0.1862 Steps: 123800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002080, Sample Num: 33280, Cur Loss: 0.01302053, Cur Avg Loss: 0.07266005, Log Avg loss: 0.05558495, Global Avg Loss: 0.09977723, Time: 0.3120 Steps: 124000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002280, Sample Num: 36480, Cur Loss: 0.00054070, Cur Avg Loss: 0.07239149, Log Avg loss: 0.06959847, Global Avg Loss: 0.09972864, Time: 0.1072 Steps: 124200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002480, Sample Num: 39680, Cur Loss: 0.00034115, Cur Avg Loss: 0.07157407, Log Avg loss: 0.06225549, Global Avg Loss: 0.09966839, Time: 0.0485 Steps: 124400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002680, Sample Num: 42880, Cur Loss: 0.00791319, Cur Avg Loss: 0.07284607, Log Avg loss: 0.08861890, Global Avg Loss: 0.09965065, Time: 0.1815 Steps: 124600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002880, Sample Num: 46080, Cur Loss: 0.17559028, Cur Avg Loss: 0.07184727, Log Avg loss: 0.05846328, Global Avg Loss: 0.09958465, Time: 0.0801 Steps: 124800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 003080, Sample Num: 49280, Cur Loss: 0.01848503, Cur Avg Loss: 0.07126254, Log Avg loss: 0.06284241, Global Avg Loss: 0.09952586, Time: 0.0581 Steps: 125000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 003280, Sample Num: 52480, Cur Loss: 0.00035685, Cur Avg Loss: 0.07052874, Log Avg loss: 0.05922826, Global Avg Loss: 0.09946149, Time: 0.1591 Steps: 125200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003480, Sample Num: 55680, Cur Loss: 0.00585235, Cur Avg Loss: 0.07083445, Log Avg loss: 0.07584814, Global Avg Loss: 0.09942383, Time: 0.0810 Steps: 125400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003680, Sample Num: 58880, Cur Loss: 0.00465708, Cur Avg Loss: 0.06995880, Log Avg loss: 0.05472250, Global Avg Loss: 0.09935265, Time: 0.0530 Steps: 125600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003880, Sample Num: 62080, Cur Loss: 0.00188271, Cur Avg Loss: 0.06987973, Log Avg loss: 0.06842479, Global Avg Loss: 0.09930348, Time: 0.2028 Steps: 125800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004080, Sample Num: 65280, Cur Loss: 0.00165711, Cur Avg Loss: 0.06963816, Log Avg loss: 0.06495171, Global Avg Loss: 0.09924895, Time: 0.0575 Steps: 126000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004280, Sample Num: 68480, Cur Loss: 0.00116416, Cur Avg Loss: 0.07027696, Log Avg loss: 0.08330858, Global Avg Loss: 0.09922369, Time: 0.1881 Steps: 126200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004480, Sample Num: 71680, Cur Loss: 0.01370034, Cur Avg Loss: 0.07035175, Log Avg loss: 0.07195206, Global Avg Loss: 0.09918054, Time: 0.0622 Steps: 126400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004680, Sample Num: 74880, Cur Loss: 0.00469070, Cur Avg Loss: 0.07039079, Log Avg loss: 0.07126549, Global Avg Loss: 0.09913644, Time: 0.1892 Steps: 126600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004880, Sample Num: 78080, Cur Loss: 0.41754389, Cur Avg Loss: 0.06997292, Log Avg loss: 0.06019467, Global Avg Loss: 0.09907502, Time: 0.0466 Steps: 126800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 005080, Sample Num: 81280, Cur Loss: 0.00033408, Cur Avg Loss: 0.06961288, Log Avg loss: 0.06082780, Global Avg Loss: 0.09901478, Time: 0.1876 Steps: 127000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 005280, Sample Num: 84480, Cur Loss: 0.00079321, Cur Avg Loss: 0.06927400, Log Avg loss: 0.06066657, Global Avg Loss: 0.09895449, Time: 0.1874 Steps: 127200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005480, Sample Num: 87680, Cur Loss: 0.00048552, Cur Avg Loss: 0.06882931, Log Avg loss: 0.05708943, Global Avg Loss: 0.09888877, Time: 0.1392 Steps: 127400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005680, Sample Num: 90880, Cur Loss: 0.28735411, Cur Avg Loss: 0.06862650, Log Avg loss: 0.06306952, Global Avg Loss: 0.09883262, Time: 0.0549 Steps: 127600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005880, Sample Num: 94080, Cur Loss: 0.20151643, Cur Avg Loss: 0.06868919, Log Avg loss: 0.07046945, Global Avg Loss: 0.09878824, Time: 0.0399 Steps: 127800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006080, Sample Num: 97280, Cur Loss: 0.00022151, Cur Avg Loss: 0.06873109, Log Avg loss: 0.06996301, Global Avg Loss: 0.09874320, Time: 0.1857 Steps: 128000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006280, Sample Num: 100480, Cur Loss: 0.22023441, Cur Avg Loss: 0.06889640, Log Avg loss: 0.07392176, Global Avg Loss: 0.09870447, Time: 0.0434 Steps: 128200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006480, Sample Num: 103680, Cur Loss: 0.26543838, Cur Avg Loss: 0.06871423, Log Avg loss: 0.06299418, Global Avg Loss: 0.09864885, Time: 0.0736 Steps: 128400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006680, Sample Num: 106880, Cur Loss: 0.00025626, Cur Avg Loss: 0.06839149, Log Avg loss: 0.05793471, Global Avg Loss: 0.09858553, Time: 0.0718 Steps: 128600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006880, Sample Num: 110080, Cur Loss: 0.04152703, Cur Avg Loss: 0.06830569, Log Avg loss: 0.06544001, Global Avg Loss: 0.09853406, Time: 0.0820 Steps: 128800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 007080, Sample Num: 113280, Cur Loss: 0.00068468, Cur Avg Loss: 0.06808482, Log Avg loss: 0.06048687, Global Avg Loss: 0.09847507, Time: 0.1887 Steps: 129000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 007280, Sample Num: 116480, Cur Loss: 0.72230446, Cur Avg Loss: 0.06823944, Log Avg loss: 0.07371303, Global Avg Loss: 0.09843674, Time: 0.2966 Steps: 129200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007480, Sample Num: 119680, Cur Loss: 0.13544887, Cur Avg Loss: 0.06823692, Log Avg loss: 0.06814518, Global Avg Loss: 0.09838992, Time: 0.0900 Steps: 129400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007680, Sample Num: 122880, Cur Loss: 0.23999761, Cur Avg Loss: 0.06832011, Log Avg loss: 0.07143156, Global Avg Loss: 0.09834832, Time: 0.1861 Steps: 129600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007880, Sample Num: 126080, Cur Loss: 0.00313673, Cur Avg Loss: 0.06845266, Log Avg loss: 0.07354235, Global Avg Loss: 0.09831010, Time: 0.1625 Steps: 129800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008080, Sample Num: 129280, Cur Loss: 0.00560351, Cur Avg Loss: 0.06863864, Log Avg loss: 0.07596627, Global Avg Loss: 0.09827573, Time: 0.3418 Steps: 130000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008280, Sample Num: 132480, Cur Loss: 0.00390933, Cur Avg Loss: 0.06859875, Log Avg loss: 0.06698721, Global Avg Loss: 0.09822766, Time: 0.1888 Steps: 130200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008480, Sample Num: 135680, Cur Loss: 0.00588720, Cur Avg Loss: 0.06843909, Log Avg loss: 0.06182946, Global Avg Loss: 0.09817184, Time: 0.2311 Steps: 130400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008680, Sample Num: 138880, Cur Loss: 0.00530633, Cur Avg Loss: 0.06846367, Log Avg loss: 0.06950586, Global Avg Loss: 0.09812794, Time: 0.0977 Steps: 130600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008880, Sample Num: 142080, Cur Loss: 0.00033343, Cur Avg Loss: 0.06811744, Log Avg loss: 0.05309077, Global Avg Loss: 0.09805907, Time: 0.0553 Steps: 130800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 009080, Sample Num: 145280, Cur Loss: 0.00054503, Cur Avg Loss: 0.06820558, Log Avg loss: 0.07211912, Global Avg Loss: 0.09801947, Time: 0.0564 Steps: 131000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 009280, Sample Num: 148480, Cur Loss: 0.00151394, Cur Avg Loss: 0.06822029, Log Avg loss: 0.06888831, Global Avg Loss: 0.09797506, Time: 0.0819 Steps: 131200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009480, Sample Num: 151680, Cur Loss: 0.01423765, Cur Avg Loss: 0.06835185, Log Avg loss: 0.07445587, Global Avg Loss: 0.09793927, Time: 0.0551 Steps: 131400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009680, Sample Num: 154880, Cur Loss: 0.11066471, Cur Avg Loss: 0.06840926, Log Avg loss: 0.07113080, Global Avg Loss: 0.09789852, Time: 0.0540 Steps: 131600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009880, Sample Num: 158080, Cur Loss: 0.20807640, Cur Avg Loss: 0.06840217, Log Avg loss: 0.06805864, Global Avg Loss: 0.09785324, Time: 0.0398 Steps: 131800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010080, Sample Num: 161280, Cur Loss: 0.00440642, Cur Avg Loss: 0.06812332, Log Avg loss: 0.05434818, Global Avg Loss: 0.09778733, Time: 0.0623 Steps: 132000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010280, Sample Num: 164480, Cur Loss: 0.17871571, Cur Avg Loss: 0.06791212, Log Avg loss: 0.05726758, Global Avg Loss: 0.09772603, Time: 0.1884 Steps: 132200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010480, Sample Num: 167680, Cur Loss: 0.00026573, Cur Avg Loss: 0.06779631, Log Avg loss: 0.06184385, Global Avg Loss: 0.09767182, Time: 0.2976 Steps: 132400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010680, Sample Num: 170880, Cur Loss: 0.00254002, Cur Avg Loss: 0.06771685, Log Avg loss: 0.06355342, Global Avg Loss: 0.09762036, Time: 0.3419 Steps: 132600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010880, Sample Num: 174080, Cur Loss: 0.00021674, Cur Avg Loss: 0.06732939, Log Avg loss: 0.04663883, Global Avg Loss: 0.09754358, Time: 0.0920 Steps: 132800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 011080, Sample Num: 177280, Cur Loss: 0.00112021, Cur Avg Loss: 0.06756382, Log Avg loss: 0.08031654, Global Avg Loss: 0.09751768, Time: 0.0808 Steps: 133000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 011280, Sample Num: 180480, Cur Loss: 0.00110404, Cur Avg Loss: 0.06744781, Log Avg loss: 0.06102083, Global Avg Loss: 0.09746288, Time: 0.0445 Steps: 133200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011480, Sample Num: 183680, Cur Loss: 0.00015319, Cur Avg Loss: 0.06744989, Log Avg loss: 0.06756741, Global Avg Loss: 0.09741806, Time: 0.1865 Steps: 133400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011680, Sample Num: 186880, Cur Loss: 0.00061859, Cur Avg Loss: 0.06726394, Log Avg loss: 0.05659065, Global Avg Loss: 0.09735694, Time: 0.1877 Steps: 133600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011880, Sample Num: 190080, Cur Loss: 0.00120555, Cur Avg Loss: 0.06728548, Log Avg loss: 0.06854314, Global Avg Loss: 0.09731387, Time: 0.0842 Steps: 133800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012080, Sample Num: 193280, Cur Loss: 0.00037424, Cur Avg Loss: 0.06727136, Log Avg loss: 0.06643263, Global Avg Loss: 0.09726778, Time: 0.0538 Steps: 134000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012280, Sample Num: 196480, Cur Loss: 0.04546465, Cur Avg Loss: 0.06693349, Log Avg loss: 0.04652599, Global Avg Loss: 0.09719216, Time: 0.0882 Steps: 134200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012480, Sample Num: 199680, Cur Loss: 0.00003397, Cur Avg Loss: 0.06704401, Log Avg loss: 0.07383019, Global Avg Loss: 0.09715739, Time: 0.0946 Steps: 134400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012680, Sample Num: 202880, Cur Loss: 0.00010832, Cur Avg Loss: 0.06684746, Log Avg loss: 0.05458256, Global Avg Loss: 0.09709413, Time: 0.0876 Steps: 134600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012880, Sample Num: 206080, Cur Loss: 0.00124605, Cur Avg Loss: 0.06673285, Log Avg loss: 0.05946655, Global Avg Loss: 0.09703830, Time: 0.2731 Steps: 134800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 013080, Sample Num: 209280, Cur Loss: 0.00008177, Cur Avg Loss: 0.06672367, Log Avg loss: 0.06613297, Global Avg Loss: 0.09699252, Time: 0.0361 Steps: 135000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 013280, Sample Num: 212480, Cur Loss: 0.00241524, Cur Avg Loss: 0.06691978, Log Avg loss: 0.07974491, Global Avg Loss: 0.09696700, Time: 0.0739 Steps: 135200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013480, Sample Num: 215680, Cur Loss: 0.02188325, Cur Avg Loss: 0.06697083, Log Avg loss: 0.07036090, Global Avg Loss: 0.09692770, Time: 0.2981 Steps: 135400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013680, Sample Num: 218880, Cur Loss: 0.00154243, Cur Avg Loss: 0.06695015, Log Avg loss: 0.06555600, Global Avg Loss: 0.09688143, Time: 0.1868 Steps: 135600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013880, Sample Num: 222080, Cur Loss: 0.00317979, Cur Avg Loss: 0.06678416, Log Avg loss: 0.05543043, Global Avg Loss: 0.09682038, Time: 0.2128 Steps: 135800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014080, Sample Num: 225280, Cur Loss: 0.00020288, Cur Avg Loss: 0.06675875, Log Avg loss: 0.06499559, Global Avg Loss: 0.09677358, Time: 0.0554 Steps: 136000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014280, Sample Num: 228480, Cur Loss: 0.36660716, Cur Avg Loss: 0.06670550, Log Avg loss: 0.06295684, Global Avg Loss: 0.09672393, Time: 0.1333 Steps: 136200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014480, Sample Num: 231680, Cur Loss: 0.05969268, Cur Avg Loss: 0.06656314, Log Avg loss: 0.05639839, Global Avg Loss: 0.09666480, Time: 0.2163 Steps: 136400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014680, Sample Num: 234880, Cur Loss: 0.00011343, Cur Avg Loss: 0.06651378, Log Avg loss: 0.06294033, Global Avg Loss: 0.09661542, Time: 0.1887 Steps: 136600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014880, Sample Num: 238080, Cur Loss: 0.00119492, Cur Avg Loss: 0.06642133, Log Avg loss: 0.05963518, Global Avg Loss: 0.09656136, Time: 0.2046 Steps: 136800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015080, Sample Num: 241280, Cur Loss: 0.00091185, Cur Avg Loss: 0.06625633, Log Avg loss: 0.05398021, Global Avg Loss: 0.09649919, Time: 0.2577 Steps: 137000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015280, Sample Num: 244480, Cur Loss: 0.00136709, Cur Avg Loss: 0.06631155, Log Avg loss: 0.07047530, Global Avg Loss: 0.09646126, Time: 0.0851 Steps: 137200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015480, Sample Num: 247680, Cur Loss: 0.00124314, Cur Avg Loss: 0.06632077, Log Avg loss: 0.06702487, Global Avg Loss: 0.09641841, Time: 0.3105 Steps: 137400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 015680, Sample Num: 250880, Cur Loss: 0.36175865, Cur Avg Loss: 0.06620914, Log Avg loss: 0.05756927, Global Avg Loss: 0.09636194, Time: 0.1570 Steps: 137600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 015880, Sample Num: 254080, Cur Loss: 0.01039142, Cur Avg Loss: 0.06609811, Log Avg loss: 0.05739316, Global Avg Loss: 0.09630539, Time: 0.1870 Steps: 137800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016080, Sample Num: 257280, Cur Loss: 0.01933229, Cur Avg Loss: 0.06606378, Log Avg loss: 0.06333799, Global Avg Loss: 0.09625761, Time: 0.0752 Steps: 138000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016280, Sample Num: 260480, Cur Loss: 0.10212038, Cur Avg Loss: 0.06594129, Log Avg loss: 0.05609336, Global Avg Loss: 0.09619948, Time: 0.0772 Steps: 138200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016480, Sample Num: 263680, Cur Loss: 0.00058817, Cur Avg Loss: 0.06588295, Log Avg loss: 0.06113389, Global Avg Loss: 0.09614881, Time: 0.0511 Steps: 138400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016680, Sample Num: 266880, Cur Loss: 0.00220133, Cur Avg Loss: 0.06592793, Log Avg loss: 0.06963454, Global Avg Loss: 0.09611055, Time: 0.0954 Steps: 138600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016880, Sample Num: 270080, Cur Loss: 0.00066089, Cur Avg Loss: 0.06586926, Log Avg loss: 0.06097635, Global Avg Loss: 0.09605992, Time: 0.0957 Steps: 138800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017080, Sample Num: 273280, Cur Loss: 0.00425574, Cur Avg Loss: 0.06611423, Log Avg loss: 0.08678941, Global Avg Loss: 0.09604658, Time: 0.0732 Steps: 139000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017280, Sample Num: 276480, Cur Loss: 0.00406568, Cur Avg Loss: 0.06611801, Log Avg loss: 0.06644074, Global Avg Loss: 0.09600405, Time: 0.2165 Steps: 139200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017480, Sample Num: 279680, Cur Loss: 0.00060116, Cur Avg Loss: 0.06594562, Log Avg loss: 0.05105088, Global Avg Loss: 0.09593955, Time: 0.1601 Steps: 139400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 017680, Sample Num: 282880, Cur Loss: 0.00145610, Cur Avg Loss: 0.06622000, Log Avg loss: 0.09020119, Global Avg Loss: 0.09593133, Time: 0.0480 Steps: 139600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 017880, Sample Num: 286080, Cur Loss: 0.00483835, Cur Avg Loss: 0.06591812, Log Avg loss: 0.03923224, Global Avg Loss: 0.09585022, Time: 0.0655 Steps: 139800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018080, Sample Num: 289280, Cur Loss: 0.00448441, Cur Avg Loss: 0.06600829, Log Avg loss: 0.07406949, Global Avg Loss: 0.09581910, Time: 0.0539 Steps: 140000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018280, Sample Num: 292480, Cur Loss: 0.00014634, Cur Avg Loss: 0.06595606, Log Avg loss: 0.06123392, Global Avg Loss: 0.09576976, Time: 0.3425 Steps: 140200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018480, Sample Num: 295680, Cur Loss: 0.00425171, Cur Avg Loss: 0.06593920, Log Avg loss: 0.06439813, Global Avg Loss: 0.09572507, Time: 0.0649 Steps: 140400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018680, Sample Num: 298880, Cur Loss: 0.00249832, Cur Avg Loss: 0.06595490, Log Avg loss: 0.06740603, Global Avg Loss: 0.09568479, Time: 0.3072 Steps: 140600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018880, Sample Num: 302080, Cur Loss: 0.00418754, Cur Avg Loss: 0.06598969, Log Avg loss: 0.06923928, Global Avg Loss: 0.09564723, Time: 0.1069 Steps: 140800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019080, Sample Num: 305280, Cur Loss: 0.18879229, Cur Avg Loss: 0.06598845, Log Avg loss: 0.06587096, Global Avg Loss: 0.09560499, Time: 0.3103 Steps: 141000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019280, Sample Num: 308480, Cur Loss: 0.00716191, Cur Avg Loss: 0.06597919, Log Avg loss: 0.06509549, Global Avg Loss: 0.09556178, Time: 0.0958 Steps: 141200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019480, Sample Num: 311680, Cur Loss: 0.00859388, Cur Avg Loss: 0.06580112, Log Avg loss: 0.04863504, Global Avg Loss: 0.09549540, Time: 0.1886 Steps: 141400, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 019680, Sample Num: 314880, Cur Loss: 0.00059685, Cur Avg Loss: 0.06574548, Log Avg loss: 0.06032703, Global Avg Loss: 0.09544573, Time: 0.0542 Steps: 141600, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 019880, Sample Num: 318080, Cur Loss: 0.00020094, Cur Avg Loss: 0.06561773, Log Avg loss: 0.05304664, Global Avg Loss: 0.09538593, Time: 0.1880 Steps: 141800, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 020080, Sample Num: 321280, Cur Loss: 0.00113102, Cur Avg Loss: 0.06551893, Log Avg loss: 0.05569836, Global Avg Loss: 0.09533003, Time: 0.0866 Steps: 142000, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 020280, Sample Num: 324480, Cur Loss: 0.02935378, Cur Avg Loss: 0.06552929, Log Avg loss: 0.06656953, Global Avg Loss: 0.09528958, Time: 0.0880 Steps: 142200, Updated lr: 0.000030 ***** Running evaluation checkpoint-142240 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-142240 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2757.056243, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.074982, "eval_total_loss": 203.201584, "eval_acc": 0.980715, "eval_prec": 0.970999, "eval_recall": 0.991115, "eval_f1": 0.980954, "eval_roc_auc": 0.998325, "eval_pr_auc": 0.998112, "eval_confusion_matrix": {"tn": 20984, "fp": 643, "fn": 193, "tp": 21529}, "eval_mcc2": 0.961635, "eval_mcc": 0.961635, "eval_sn": 0.991115, "eval_sp": 0.970269, "update_flag": false, "test_avg_loss": 0.076055, "test_total_loss": 309.08632, "test_acc": 0.980545, "test_prec": 0.969718, "test_recall": 0.992063, "test_f1": 0.980763, "test_roc_auc": 0.998309, "test_pr_auc": 0.998123, "test_confusion_matrix": {"tn": 31510, "fp": 1007, "fn": 258, "tp": 32247}, "test_mcc2": 0.961345, "test_mcc": 0.961345, "test_sn": 0.992063, "test_sp": 0.969032, "lr": 3.0029556650246303e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.09527503140525875, "train_cur_epoch_loss": 1330.6763300826997, "train_cur_epoch_avg_loss": 0.06548603986627459, "train_cur_epoch_time": 2757.0562431812286, "train_cur_epoch_avg_time": 0.1356819017313597, "epoch": 7, "step": 142240} ################################################## Training, Epoch: 0008, Batch: 000160, Sample Num: 2560, Cur Loss: 0.23626254, Cur Avg Loss: 0.05183146, Log Avg loss: 0.05017663, Global Avg Loss: 0.09522622, Time: 0.1653 Steps: 142400, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000360, Sample Num: 5760, Cur Loss: 0.00172053, Cur Avg Loss: 0.05804512, Log Avg loss: 0.06301605, Global Avg Loss: 0.09518104, Time: 0.0512 Steps: 142600, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00088024, Cur Avg Loss: 0.05943766, Log Avg loss: 0.06194423, Global Avg Loss: 0.09513449, Time: 0.0438 Steps: 142800, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000760, Sample Num: 12160, Cur Loss: 0.02815492, Cur Avg Loss: 0.06479875, Log Avg loss: 0.07980982, Global Avg Loss: 0.09511306, Time: 0.0427 Steps: 143000, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000960, Sample Num: 15360, Cur Loss: 0.00267031, Cur Avg Loss: 0.06338954, Log Avg loss: 0.05803453, Global Avg Loss: 0.09506127, Time: 0.3112 Steps: 143200, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 001160, Sample Num: 18560, Cur Loss: 0.08017201, Cur Avg Loss: 0.06270594, Log Avg loss: 0.05942463, Global Avg Loss: 0.09501157, Time: 0.0947 Steps: 143400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00183563, Cur Avg Loss: 0.06257834, Log Avg loss: 0.06183827, Global Avg Loss: 0.09496537, Time: 0.1941 Steps: 143600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001560, Sample Num: 24960, Cur Loss: 0.78097653, Cur Avg Loss: 0.06244028, Log Avg loss: 0.06150144, Global Avg Loss: 0.09491883, Time: 0.0944 Steps: 143800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001760, Sample Num: 28160, Cur Loss: 0.02085252, Cur Avg Loss: 0.06465676, Log Avg loss: 0.08194537, Global Avg Loss: 0.09490081, Time: 0.0550 Steps: 144000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001960, Sample Num: 31360, Cur Loss: 0.00663999, Cur Avg Loss: 0.06370237, Log Avg loss: 0.05530369, Global Avg Loss: 0.09484589, Time: 0.0835 Steps: 144200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002160, Sample Num: 34560, Cur Loss: 0.00028203, Cur Avg Loss: 0.06387661, Log Avg loss: 0.06558421, Global Avg Loss: 0.09480536, Time: 0.0991 Steps: 144400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00479513, Cur Avg Loss: 0.06281509, Log Avg loss: 0.05135069, Global Avg Loss: 0.09474526, Time: 0.0597 Steps: 144600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002560, Sample Num: 40960, Cur Loss: 0.00068172, Cur Avg Loss: 0.06417145, Log Avg loss: 0.08017639, Global Avg Loss: 0.09472513, Time: 0.0306 Steps: 144800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002760, Sample Num: 44160, Cur Loss: 0.08904586, Cur Avg Loss: 0.06422219, Log Avg loss: 0.06487167, Global Avg Loss: 0.09468396, Time: 0.0512 Steps: 145000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002960, Sample Num: 47360, Cur Loss: 0.00119334, Cur Avg Loss: 0.06355597, Log Avg loss: 0.05436213, Global Avg Loss: 0.09462842, Time: 0.1862 Steps: 145200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00692473, Cur Avg Loss: 0.06290001, Log Avg loss: 0.05319191, Global Avg Loss: 0.09457142, Time: 0.0592 Steps: 145400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003360, Sample Num: 53760, Cur Loss: 0.00127095, Cur Avg Loss: 0.06263952, Log Avg loss: 0.05852380, Global Avg Loss: 0.09452190, Time: 0.3121 Steps: 145600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003560, Sample Num: 56960, Cur Loss: 0.38047394, Cur Avg Loss: 0.06220842, Log Avg loss: 0.05496588, Global Avg Loss: 0.09446764, Time: 0.1005 Steps: 145800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003760, Sample Num: 60160, Cur Loss: 0.50959319, Cur Avg Loss: 0.06213597, Log Avg loss: 0.06084640, Global Avg Loss: 0.09442159, Time: 0.1630 Steps: 146000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003960, Sample Num: 63360, Cur Loss: 0.00213642, Cur Avg Loss: 0.06193989, Log Avg loss: 0.05825344, Global Avg Loss: 0.09437211, Time: 0.0945 Steps: 146200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00027704, Cur Avg Loss: 0.06209071, Log Avg loss: 0.06507696, Global Avg Loss: 0.09433209, Time: 0.0661 Steps: 146400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004360, Sample Num: 69760, Cur Loss: 0.33715895, Cur Avg Loss: 0.06270447, Log Avg loss: 0.07547070, Global Avg Loss: 0.09430636, Time: 0.1882 Steps: 146600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004560, Sample Num: 72960, Cur Loss: 0.00047244, Cur Avg Loss: 0.06258224, Log Avg loss: 0.05991759, Global Avg Loss: 0.09425951, Time: 0.0540 Steps: 146800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004760, Sample Num: 76160, Cur Loss: 0.15558082, Cur Avg Loss: 0.06204419, Log Avg loss: 0.04977672, Global Avg Loss: 0.09419899, Time: 0.0878 Steps: 147000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004960, Sample Num: 79360, Cur Loss: 0.00325650, Cur Avg Loss: 0.06175761, Log Avg loss: 0.05493701, Global Avg Loss: 0.09414564, Time: 0.0808 Steps: 147200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 005160, Sample Num: 82560, Cur Loss: 0.01404934, Cur Avg Loss: 0.06167711, Log Avg loss: 0.05968066, Global Avg Loss: 0.09409888, Time: 0.0633 Steps: 147400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005360, Sample Num: 85760, Cur Loss: 0.00480892, Cur Avg Loss: 0.06176638, Log Avg loss: 0.06406963, Global Avg Loss: 0.09405819, Time: 0.1862 Steps: 147600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00087908, Cur Avg Loss: 0.06118027, Log Avg loss: 0.04547260, Global Avg Loss: 0.09399244, Time: 0.2850 Steps: 147800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005760, Sample Num: 92160, Cur Loss: 0.00023678, Cur Avg Loss: 0.06076049, Log Avg loss: 0.04909063, Global Avg Loss: 0.09393176, Time: 0.3107 Steps: 148000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00025507, Cur Avg Loss: 0.06088155, Log Avg loss: 0.06436796, Global Avg Loss: 0.09389187, Time: 0.0591 Steps: 148200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006160, Sample Num: 98560, Cur Loss: 0.04557967, Cur Avg Loss: 0.06077090, Log Avg loss: 0.05747339, Global Avg Loss: 0.09384278, Time: 0.0948 Steps: 148400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006360, Sample Num: 101760, Cur Loss: 0.05226130, Cur Avg Loss: 0.06115595, Log Avg loss: 0.07301553, Global Avg Loss: 0.09381475, Time: 0.0801 Steps: 148600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006560, Sample Num: 104960, Cur Loss: 0.05640870, Cur Avg Loss: 0.06062617, Log Avg loss: 0.04377930, Global Avg Loss: 0.09374750, Time: 0.0541 Steps: 148800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006760, Sample Num: 108160, Cur Loss: 0.62901932, Cur Avg Loss: 0.06084401, Log Avg loss: 0.06798926, Global Avg Loss: 0.09371293, Time: 0.0619 Steps: 149000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006960, Sample Num: 111360, Cur Loss: 0.00153171, Cur Avg Loss: 0.06042528, Log Avg loss: 0.04627193, Global Avg Loss: 0.09364933, Time: 0.1883 Steps: 149200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 007160, Sample Num: 114560, Cur Loss: 0.02153851, Cur Avg Loss: 0.06082752, Log Avg loss: 0.07482553, Global Avg Loss: 0.09362413, Time: 0.2258 Steps: 149400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 007360, Sample Num: 117760, Cur Loss: 0.01697145, Cur Avg Loss: 0.06085276, Log Avg loss: 0.06175662, Global Avg Loss: 0.09358153, Time: 0.0310 Steps: 149600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007560, Sample Num: 120960, Cur Loss: 0.00196969, Cur Avg Loss: 0.06046491, Log Avg loss: 0.04619205, Global Avg Loss: 0.09351826, Time: 0.1861 Steps: 149800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00057584, Cur Avg Loss: 0.06065892, Log Avg loss: 0.06799231, Global Avg Loss: 0.09348422, Time: 0.1882 Steps: 150000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007960, Sample Num: 127360, Cur Loss: 0.00130189, Cur Avg Loss: 0.06091640, Log Avg loss: 0.07090667, Global Avg Loss: 0.09345416, Time: 0.1860 Steps: 150200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00403441, Cur Avg Loss: 0.06108426, Log Avg loss: 0.06776518, Global Avg Loss: 0.09342000, Time: 0.0310 Steps: 150400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00632715, Cur Avg Loss: 0.06087428, Log Avg loss: 0.05230711, Global Avg Loss: 0.09336540, Time: 0.0791 Steps: 150600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008560, Sample Num: 136960, Cur Loss: 0.05827837, Cur Avg Loss: 0.06078425, Log Avg loss: 0.05702074, Global Avg Loss: 0.09331720, Time: 0.1306 Steps: 150800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008760, Sample Num: 140160, Cur Loss: 0.26926410, Cur Avg Loss: 0.06093148, Log Avg loss: 0.06723297, Global Avg Loss: 0.09328265, Time: 0.1077 Steps: 151000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008960, Sample Num: 143360, Cur Loss: 0.00076784, Cur Avg Loss: 0.06074585, Log Avg loss: 0.05261539, Global Avg Loss: 0.09322886, Time: 0.0834 Steps: 151200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 009160, Sample Num: 146560, Cur Loss: 0.08667751, Cur Avg Loss: 0.06102859, Log Avg loss: 0.07369509, Global Avg Loss: 0.09320305, Time: 0.1005 Steps: 151400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00253950, Cur Avg Loss: 0.06063336, Log Avg loss: 0.04253182, Global Avg Loss: 0.09313621, Time: 0.1865 Steps: 151600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009560, Sample Num: 152960, Cur Loss: 0.18479413, Cur Avg Loss: 0.06088068, Log Avg loss: 0.07245530, Global Avg Loss: 0.09310896, Time: 0.0565 Steps: 151800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009760, Sample Num: 156160, Cur Loss: 0.29524374, Cur Avg Loss: 0.06074959, Log Avg loss: 0.05448368, Global Avg Loss: 0.09305813, Time: 0.0946 Steps: 152000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009960, Sample Num: 159360, Cur Loss: 0.17834327, Cur Avg Loss: 0.06078614, Log Avg loss: 0.06256982, Global Avg Loss: 0.09301807, Time: 0.0872 Steps: 152200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010160, Sample Num: 162560, Cur Loss: 0.00064445, Cur Avg Loss: 0.06053080, Log Avg loss: 0.04781498, Global Avg Loss: 0.09295875, Time: 0.0524 Steps: 152400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00204476, Cur Avg Loss: 0.06028633, Log Avg loss: 0.04786713, Global Avg Loss: 0.09289965, Time: 0.1394 Steps: 152600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010560, Sample Num: 168960, Cur Loss: 0.05406406, Cur Avg Loss: 0.06026538, Log Avg loss: 0.05917994, Global Avg Loss: 0.09285552, Time: 0.2028 Steps: 152800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010760, Sample Num: 172160, Cur Loss: 0.00048916, Cur Avg Loss: 0.06001208, Log Avg loss: 0.04663797, Global Avg Loss: 0.09279510, Time: 0.1880 Steps: 153000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010960, Sample Num: 175360, Cur Loss: 0.06527133, Cur Avg Loss: 0.05990038, Log Avg loss: 0.05389067, Global Avg Loss: 0.09274431, Time: 0.0337 Steps: 153200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00157531, Cur Avg Loss: 0.05977469, Log Avg loss: 0.05288687, Global Avg Loss: 0.09269235, Time: 0.1952 Steps: 153400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 011360, Sample Num: 181760, Cur Loss: 0.00928166, Cur Avg Loss: 0.05985692, Log Avg loss: 0.06444589, Global Avg Loss: 0.09265557, Time: 0.0778 Steps: 153600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00038889, Cur Avg Loss: 0.05973003, Log Avg loss: 0.05252248, Global Avg Loss: 0.09260338, Time: 0.1825 Steps: 153800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00059297, Cur Avg Loss: 0.05941607, Log Avg loss: 0.04126898, Global Avg Loss: 0.09253671, Time: 0.3137 Steps: 154000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011960, Sample Num: 191360, Cur Loss: 0.00130727, Cur Avg Loss: 0.05937455, Log Avg loss: 0.05693315, Global Avg Loss: 0.09249053, Time: 0.0514 Steps: 154200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012160, Sample Num: 194560, Cur Loss: 0.00890106, Cur Avg Loss: 0.05921223, Log Avg loss: 0.04950542, Global Avg Loss: 0.09243485, Time: 0.1564 Steps: 154400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012360, Sample Num: 197760, Cur Loss: 0.01098420, Cur Avg Loss: 0.05892541, Log Avg loss: 0.04148709, Global Avg Loss: 0.09236894, Time: 0.0944 Steps: 154600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012560, Sample Num: 200960, Cur Loss: 0.00513682, Cur Avg Loss: 0.05919281, Log Avg loss: 0.07571829, Global Avg Loss: 0.09234743, Time: 0.1094 Steps: 154800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012760, Sample Num: 204160, Cur Loss: 0.00036178, Cur Avg Loss: 0.05894955, Log Avg loss: 0.04367262, Global Avg Loss: 0.09228462, Time: 0.1005 Steps: 155000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012960, Sample Num: 207360, Cur Loss: 0.10431403, Cur Avg Loss: 0.05882269, Log Avg loss: 0.05072903, Global Avg Loss: 0.09223107, Time: 0.3712 Steps: 155200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 013160, Sample Num: 210560, Cur Loss: 0.00061597, Cur Avg Loss: 0.05881785, Log Avg loss: 0.05850423, Global Avg Loss: 0.09218767, Time: 0.0505 Steps: 155400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 013360, Sample Num: 213760, Cur Loss: 0.00108350, Cur Avg Loss: 0.05910730, Log Avg loss: 0.07815284, Global Avg Loss: 0.09216963, Time: 0.1806 Steps: 155600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013560, Sample Num: 216960, Cur Loss: 0.00038902, Cur Avg Loss: 0.05908940, Log Avg loss: 0.05789404, Global Avg Loss: 0.09212563, Time: 0.0611 Steps: 155800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013760, Sample Num: 220160, Cur Loss: 0.00162648, Cur Avg Loss: 0.05913482, Log Avg loss: 0.06221439, Global Avg Loss: 0.09208728, Time: 0.0543 Steps: 156000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013960, Sample Num: 223360, Cur Loss: 0.00021820, Cur Avg Loss: 0.05886395, Log Avg loss: 0.04022756, Global Avg Loss: 0.09202088, Time: 0.2024 Steps: 156200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014160, Sample Num: 226560, Cur Loss: 0.01123940, Cur Avg Loss: 0.05881966, Log Avg loss: 0.05572834, Global Avg Loss: 0.09197447, Time: 0.0501 Steps: 156400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014360, Sample Num: 229760, Cur Loss: 0.08967134, Cur Avg Loss: 0.05889896, Log Avg loss: 0.06451356, Global Avg Loss: 0.09193940, Time: 0.3150 Steps: 156600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014560, Sample Num: 232960, Cur Loss: 0.16755408, Cur Avg Loss: 0.05882744, Log Avg loss: 0.05369227, Global Avg Loss: 0.09189061, Time: 0.0873 Steps: 156800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014760, Sample Num: 236160, Cur Loss: 0.00006103, Cur Avg Loss: 0.05872051, Log Avg loss: 0.05093571, Global Avg Loss: 0.09183844, Time: 0.0866 Steps: 157000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014960, Sample Num: 239360, Cur Loss: 0.00606918, Cur Avg Loss: 0.05874797, Log Avg loss: 0.06077491, Global Avg Loss: 0.09179892, Time: 0.0600 Steps: 157200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 015160, Sample Num: 242560, Cur Loss: 0.25676599, Cur Avg Loss: 0.05883984, Log Avg loss: 0.06571162, Global Avg Loss: 0.09176577, Time: 0.0997 Steps: 157400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 015360, Sample Num: 245760, Cur Loss: 0.00005282, Cur Avg Loss: 0.05888040, Log Avg loss: 0.06195521, Global Avg Loss: 0.09172794, Time: 0.0931 Steps: 157600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015560, Sample Num: 248960, Cur Loss: 0.00024831, Cur Avg Loss: 0.05883438, Log Avg loss: 0.05529970, Global Avg Loss: 0.09168177, Time: 0.0884 Steps: 157800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015760, Sample Num: 252160, Cur Loss: 0.00042565, Cur Avg Loss: 0.05870000, Log Avg loss: 0.04824515, Global Avg Loss: 0.09162679, Time: 0.1061 Steps: 158000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015960, Sample Num: 255360, Cur Loss: 0.00070204, Cur Avg Loss: 0.05870059, Log Avg loss: 0.05874722, Global Avg Loss: 0.09158522, Time: 0.3217 Steps: 158200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016160, Sample Num: 258560, Cur Loss: 0.00140932, Cur Avg Loss: 0.05865272, Log Avg loss: 0.05483278, Global Avg Loss: 0.09153882, Time: 0.2212 Steps: 158400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016360, Sample Num: 261760, Cur Loss: 0.00021277, Cur Avg Loss: 0.05870124, Log Avg loss: 0.06262157, Global Avg Loss: 0.09150235, Time: 0.0999 Steps: 158600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016560, Sample Num: 264960, Cur Loss: 0.00010829, Cur Avg Loss: 0.05851504, Log Avg loss: 0.04328374, Global Avg Loss: 0.09144162, Time: 0.0893 Steps: 158800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016760, Sample Num: 268160, Cur Loss: 0.00156027, Cur Avg Loss: 0.05856711, Log Avg loss: 0.06287833, Global Avg Loss: 0.09140569, Time: 0.1798 Steps: 159000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016960, Sample Num: 271360, Cur Loss: 0.41092217, Cur Avg Loss: 0.05856139, Log Avg loss: 0.05808219, Global Avg Loss: 0.09136383, Time: 0.0720 Steps: 159200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 017160, Sample Num: 274560, Cur Loss: 0.00016163, Cur Avg Loss: 0.05872716, Log Avg loss: 0.07278472, Global Avg Loss: 0.09134052, Time: 0.0401 Steps: 159400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 017360, Sample Num: 277760, Cur Loss: 0.01962217, Cur Avg Loss: 0.05878253, Log Avg loss: 0.06353333, Global Avg Loss: 0.09130567, Time: 0.3040 Steps: 159600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017560, Sample Num: 280960, Cur Loss: 0.00062458, Cur Avg Loss: 0.05869418, Log Avg loss: 0.05102538, Global Avg Loss: 0.09125526, Time: 0.0945 Steps: 159800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017760, Sample Num: 284160, Cur Loss: 0.00021929, Cur Avg Loss: 0.05875585, Log Avg loss: 0.06416987, Global Avg Loss: 0.09122140, Time: 0.0510 Steps: 160000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017960, Sample Num: 287360, Cur Loss: 0.23635355, Cur Avg Loss: 0.05859298, Log Avg loss: 0.04413038, Global Avg Loss: 0.09116261, Time: 0.0767 Steps: 160200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018160, Sample Num: 290560, Cur Loss: 0.00113511, Cur Avg Loss: 0.05860661, Log Avg loss: 0.05983072, Global Avg Loss: 0.09112354, Time: 0.0988 Steps: 160400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018360, Sample Num: 293760, Cur Loss: 0.00193361, Cur Avg Loss: 0.05848513, Log Avg loss: 0.04745476, Global Avg Loss: 0.09106916, Time: 0.0543 Steps: 160600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018560, Sample Num: 296960, Cur Loss: 0.11246192, Cur Avg Loss: 0.05864377, Log Avg loss: 0.07320696, Global Avg Loss: 0.09104695, Time: 0.1263 Steps: 160800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018760, Sample Num: 300160, Cur Loss: 0.00104695, Cur Avg Loss: 0.05862477, Log Avg loss: 0.05686134, Global Avg Loss: 0.09100448, Time: 0.0513 Steps: 161000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018960, Sample Num: 303360, Cur Loss: 0.00049781, Cur Avg Loss: 0.05867150, Log Avg loss: 0.06305523, Global Avg Loss: 0.09096980, Time: 0.0744 Steps: 161200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 019160, Sample Num: 306560, Cur Loss: 0.00086572, Cur Avg Loss: 0.05878243, Log Avg loss: 0.06929858, Global Avg Loss: 0.09094295, Time: 0.1261 Steps: 161400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 019360, Sample Num: 309760, Cur Loss: 0.00034402, Cur Avg Loss: 0.05877120, Log Avg loss: 0.05769528, Global Avg Loss: 0.09090180, Time: 0.3109 Steps: 161600, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019560, Sample Num: 312960, Cur Loss: 0.00038822, Cur Avg Loss: 0.05862796, Log Avg loss: 0.04476205, Global Avg Loss: 0.09084477, Time: 0.0564 Steps: 161800, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019760, Sample Num: 316160, Cur Loss: 0.00035139, Cur Avg Loss: 0.05862863, Log Avg loss: 0.05869433, Global Avg Loss: 0.09080508, Time: 0.0817 Steps: 162000, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019960, Sample Num: 319360, Cur Loss: 0.00133643, Cur Avg Loss: 0.05869521, Log Avg loss: 0.06527378, Global Avg Loss: 0.09077359, Time: 0.0948 Steps: 162200, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 020160, Sample Num: 322560, Cur Loss: 0.00034528, Cur Avg Loss: 0.05861787, Log Avg loss: 0.05089885, Global Avg Loss: 0.09072449, Time: 0.0365 Steps: 162400, Updated lr: 0.000020 ***** Running evaluation checkpoint-162560 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-162560 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2766.101441, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.06565, "eval_total_loss": 177.911686, "eval_acc": 0.982929, "eval_prec": 0.975955, "eval_recall": 0.990332, "eval_f1": 0.983091, "eval_roc_auc": 0.998524, "eval_pr_auc": 0.998364, "eval_confusion_matrix": {"tn": 21097, "fp": 530, "fn": 210, "tp": 21512}, "eval_mcc2": 0.965963, "eval_mcc": 0.965963, "eval_sn": 0.990332, "eval_sp": 0.975494, "update_flag": true, "test_avg_loss": 0.066562, "test_total_loss": 270.507199, "test_acc": 0.982806, "test_prec": 0.974482, "test_recall": 0.991571, "test_f1": 0.982952, "test_roc_auc": 0.998512, "test_pr_auc": 0.998359, "test_confusion_matrix": {"tn": 31673, "fp": 844, "fn": 274, "tp": 32231}, "test_mcc2": 0.96576, "test_mcc": 0.96576, "test_sn": 0.991571, "test_sp": 0.974044, "lr": 2.001970443349754e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.09069668911716106, "train_cur_epoch_loss": 1191.733315801699, "train_cur_epoch_avg_loss": 0.05864829310047732, "train_cur_epoch_time": 2766.1014409065247, "train_cur_epoch_avg_time": 0.13612703941469118, "epoch": 8, "step": 162560} ################################################## Training, Epoch: 0009, Batch: 000040, Sample Num: 640, Cur Loss: 0.00164748, Cur Avg Loss: 0.04198266, Log Avg loss: 0.05838181, Global Avg Loss: 0.09068471, Time: 0.0946 Steps: 162600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00303005, Cur Avg Loss: 0.04597921, Log Avg loss: 0.04677852, Global Avg Loss: 0.09063077, Time: 0.0370 Steps: 162800, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000440, Sample Num: 7040, Cur Loss: 0.00052556, Cur Avg Loss: 0.05233218, Log Avg loss: 0.05995574, Global Avg Loss: 0.09059313, Time: 0.0515 Steps: 163000, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000640, Sample Num: 10240, Cur Loss: 0.55564296, Cur Avg Loss: 0.05802651, Log Avg loss: 0.07055404, Global Avg Loss: 0.09056857, Time: 0.0303 Steps: 163200, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000840, Sample Num: 13440, Cur Loss: 0.00025679, Cur Avg Loss: 0.05793547, Log Avg loss: 0.05764415, Global Avg Loss: 0.09052827, Time: 0.0909 Steps: 163400, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00212618, Cur Avg Loss: 0.05848394, Log Avg loss: 0.06078751, Global Avg Loss: 0.09049191, Time: 0.0837 Steps: 163600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 001240, Sample Num: 19840, Cur Loss: 0.00223883, Cur Avg Loss: 0.05718741, Log Avg loss: 0.05044542, Global Avg Loss: 0.09044302, Time: 0.0579 Steps: 163800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00183363, Cur Avg Loss: 0.05638774, Log Avg loss: 0.05142979, Global Avg Loss: 0.09039544, Time: 0.0526 Steps: 164000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00034727, Cur Avg Loss: 0.05750915, Log Avg loss: 0.06558336, Global Avg Loss: 0.09036522, Time: 0.0770 Steps: 164200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001840, Sample Num: 29440, Cur Loss: 0.00029102, Cur Avg Loss: 0.05760524, Log Avg loss: 0.05839312, Global Avg Loss: 0.09032632, Time: 0.0539 Steps: 164400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00143284, Cur Avg Loss: 0.05771221, Log Avg loss: 0.05869635, Global Avg Loss: 0.09028789, Time: 0.0840 Steps: 164600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002240, Sample Num: 35840, Cur Loss: 0.00730797, Cur Avg Loss: 0.05784016, Log Avg loss: 0.05914520, Global Avg Loss: 0.09025010, Time: 0.0617 Steps: 164800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002440, Sample Num: 39040, Cur Loss: 0.00177902, Cur Avg Loss: 0.05715470, Log Avg loss: 0.04947759, Global Avg Loss: 0.09020067, Time: 0.1278 Steps: 165000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002640, Sample Num: 42240, Cur Loss: 0.00040417, Cur Avg Loss: 0.05803175, Log Avg loss: 0.06873175, Global Avg Loss: 0.09017468, Time: 0.0535 Steps: 165200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00167112, Cur Avg Loss: 0.05789455, Log Avg loss: 0.05608346, Global Avg Loss: 0.09013346, Time: 0.0366 Steps: 165400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 003040, Sample Num: 48640, Cur Loss: 0.14565068, Cur Avg Loss: 0.05733436, Log Avg loss: 0.04937980, Global Avg Loss: 0.09008424, Time: 0.1893 Steps: 165600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 003240, Sample Num: 51840, Cur Loss: 0.00026765, Cur Avg Loss: 0.05629818, Log Avg loss: 0.04054820, Global Avg Loss: 0.09002449, Time: 0.2039 Steps: 165800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003440, Sample Num: 55040, Cur Loss: 0.00607582, Cur Avg Loss: 0.05605444, Log Avg loss: 0.05210579, Global Avg Loss: 0.08997880, Time: 0.0952 Steps: 166000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003640, Sample Num: 58240, Cur Loss: 0.15271774, Cur Avg Loss: 0.05559467, Log Avg loss: 0.04768671, Global Avg Loss: 0.08992791, Time: 0.0796 Steps: 166200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003840, Sample Num: 61440, Cur Loss: 0.00209309, Cur Avg Loss: 0.05574581, Log Avg loss: 0.05849651, Global Avg Loss: 0.08989013, Time: 0.0540 Steps: 166400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00134386, Cur Avg Loss: 0.05613449, Log Avg loss: 0.06359709, Global Avg Loss: 0.08985857, Time: 0.0944 Steps: 166600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004240, Sample Num: 67840, Cur Loss: 0.00041303, Cur Avg Loss: 0.05639765, Log Avg loss: 0.06171362, Global Avg Loss: 0.08982482, Time: 0.0594 Steps: 166800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004440, Sample Num: 71040, Cur Loss: 0.22352590, Cur Avg Loss: 0.05633428, Log Avg loss: 0.05499081, Global Avg Loss: 0.08978310, Time: 0.0305 Steps: 167000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00203681, Cur Avg Loss: 0.05610796, Log Avg loss: 0.05108351, Global Avg Loss: 0.08973681, Time: 0.1627 Steps: 167200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00118668, Cur Avg Loss: 0.05566613, Log Avg loss: 0.04541571, Global Avg Loss: 0.08968386, Time: 0.1901 Steps: 167400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 005040, Sample Num: 80640, Cur Loss: 0.00105631, Cur Avg Loss: 0.05589390, Log Avg loss: 0.06140600, Global Avg Loss: 0.08965011, Time: 0.0923 Steps: 167600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 005240, Sample Num: 83840, Cur Loss: 0.00592188, Cur Avg Loss: 0.05610258, Log Avg loss: 0.06136138, Global Avg Loss: 0.08961640, Time: 0.0368 Steps: 167800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005440, Sample Num: 87040, Cur Loss: 0.00047716, Cur Avg Loss: 0.05587395, Log Avg loss: 0.04988362, Global Avg Loss: 0.08956910, Time: 0.1875 Steps: 168000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005640, Sample Num: 90240, Cur Loss: 0.15791552, Cur Avg Loss: 0.05546400, Log Avg loss: 0.04431352, Global Avg Loss: 0.08951528, Time: 0.0425 Steps: 168200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005840, Sample Num: 93440, Cur Loss: 0.00372760, Cur Avg Loss: 0.05547963, Log Avg loss: 0.05592041, Global Avg Loss: 0.08947538, Time: 0.1882 Steps: 168400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006040, Sample Num: 96640, Cur Loss: 0.02161592, Cur Avg Loss: 0.05598855, Log Avg loss: 0.07084885, Global Avg Loss: 0.08945329, Time: 0.1939 Steps: 168600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006240, Sample Num: 99840, Cur Loss: 0.00758047, Cur Avg Loss: 0.05566178, Log Avg loss: 0.04579343, Global Avg Loss: 0.08940156, Time: 0.0356 Steps: 168800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006440, Sample Num: 103040, Cur Loss: 0.00010281, Cur Avg Loss: 0.05573129, Log Avg loss: 0.05789992, Global Avg Loss: 0.08936428, Time: 0.0514 Steps: 169000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006640, Sample Num: 106240, Cur Loss: 0.00711400, Cur Avg Loss: 0.05538263, Log Avg loss: 0.04415604, Global Avg Loss: 0.08931084, Time: 0.1530 Steps: 169200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006840, Sample Num: 109440, Cur Loss: 0.00015375, Cur Avg Loss: 0.05607156, Log Avg loss: 0.07894385, Global Avg Loss: 0.08929860, Time: 0.1840 Steps: 169400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 007040, Sample Num: 112640, Cur Loss: 0.36358804, Cur Avg Loss: 0.05562730, Log Avg loss: 0.04043380, Global Avg Loss: 0.08924098, Time: 0.3024 Steps: 169600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 007240, Sample Num: 115840, Cur Loss: 0.00083491, Cur Avg Loss: 0.05561967, Log Avg loss: 0.05535099, Global Avg Loss: 0.08920106, Time: 0.3133 Steps: 169800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007440, Sample Num: 119040, Cur Loss: 0.01445890, Cur Avg Loss: 0.05560145, Log Avg loss: 0.05494198, Global Avg Loss: 0.08916076, Time: 0.1023 Steps: 170000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007640, Sample Num: 122240, Cur Loss: 0.00011400, Cur Avg Loss: 0.05546067, Log Avg loss: 0.05022341, Global Avg Loss: 0.08911500, Time: 0.0940 Steps: 170200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007840, Sample Num: 125440, Cur Loss: 0.00171028, Cur Avg Loss: 0.05565169, Log Avg loss: 0.06294859, Global Avg Loss: 0.08908429, Time: 0.0495 Steps: 170400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008040, Sample Num: 128640, Cur Loss: 0.00046633, Cur Avg Loss: 0.05574732, Log Avg loss: 0.05949616, Global Avg Loss: 0.08904960, Time: 0.0303 Steps: 170600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008240, Sample Num: 131840, Cur Loss: 0.00038838, Cur Avg Loss: 0.05563515, Log Avg loss: 0.05112591, Global Avg Loss: 0.08900520, Time: 0.0835 Steps: 170800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008440, Sample Num: 135040, Cur Loss: 0.46320400, Cur Avg Loss: 0.05540442, Log Avg loss: 0.04589825, Global Avg Loss: 0.08895478, Time: 0.1918 Steps: 171000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008640, Sample Num: 138240, Cur Loss: 0.00007587, Cur Avg Loss: 0.05515646, Log Avg loss: 0.04469253, Global Avg Loss: 0.08890307, Time: 0.0518 Steps: 171200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008840, Sample Num: 141440, Cur Loss: 0.11306109, Cur Avg Loss: 0.05534827, Log Avg loss: 0.06363449, Global Avg Loss: 0.08887358, Time: 0.0678 Steps: 171400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 009040, Sample Num: 144640, Cur Loss: 0.00682379, Cur Avg Loss: 0.05521375, Log Avg loss: 0.04926827, Global Avg Loss: 0.08882742, Time: 0.1877 Steps: 171600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 009240, Sample Num: 147840, Cur Loss: 0.18140492, Cur Avg Loss: 0.05527205, Log Avg loss: 0.05790687, Global Avg Loss: 0.08879143, Time: 0.0362 Steps: 171800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009440, Sample Num: 151040, Cur Loss: 0.21893977, Cur Avg Loss: 0.05513636, Log Avg loss: 0.04886752, Global Avg Loss: 0.08874501, Time: 0.1884 Steps: 172000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009640, Sample Num: 154240, Cur Loss: 0.00430690, Cur Avg Loss: 0.05529689, Log Avg loss: 0.06287422, Global Avg Loss: 0.08871496, Time: 0.1893 Steps: 172200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009840, Sample Num: 157440, Cur Loss: 0.41094643, Cur Avg Loss: 0.05524933, Log Avg loss: 0.05295680, Global Avg Loss: 0.08867348, Time: 0.0831 Steps: 172400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010040, Sample Num: 160640, Cur Loss: 0.00117262, Cur Avg Loss: 0.05524164, Log Avg loss: 0.05486328, Global Avg Loss: 0.08863430, Time: 0.4076 Steps: 172600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010240, Sample Num: 163840, Cur Loss: 0.02574619, Cur Avg Loss: 0.05517818, Log Avg loss: 0.05199233, Global Avg Loss: 0.08859189, Time: 0.0616 Steps: 172800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010440, Sample Num: 167040, Cur Loss: 0.00057789, Cur Avg Loss: 0.05497932, Log Avg loss: 0.04479755, Global Avg Loss: 0.08854126, Time: 0.1866 Steps: 173000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010640, Sample Num: 170240, Cur Loss: 0.00447721, Cur Avg Loss: 0.05486930, Log Avg loss: 0.04912641, Global Avg Loss: 0.08849575, Time: 0.0658 Steps: 173200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010840, Sample Num: 173440, Cur Loss: 0.37859240, Cur Avg Loss: 0.05436858, Log Avg loss: 0.02773015, Global Avg Loss: 0.08842566, Time: 0.0646 Steps: 173400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 011040, Sample Num: 176640, Cur Loss: 0.00303777, Cur Avg Loss: 0.05444067, Log Avg loss: 0.05834807, Global Avg Loss: 0.08839101, Time: 0.1868 Steps: 173600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 011240, Sample Num: 179840, Cur Loss: 0.00005751, Cur Avg Loss: 0.05431125, Log Avg loss: 0.04716738, Global Avg Loss: 0.08834357, Time: 0.0920 Steps: 173800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011440, Sample Num: 183040, Cur Loss: 0.00036785, Cur Avg Loss: 0.05425215, Log Avg loss: 0.05093091, Global Avg Loss: 0.08830057, Time: 0.0943 Steps: 174000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011640, Sample Num: 186240, Cur Loss: 0.00043534, Cur Avg Loss: 0.05422255, Log Avg loss: 0.05252936, Global Avg Loss: 0.08825950, Time: 0.2836 Steps: 174200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011840, Sample Num: 189440, Cur Loss: 0.02159186, Cur Avg Loss: 0.05411161, Log Avg loss: 0.04765452, Global Avg Loss: 0.08821293, Time: 0.2033 Steps: 174400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012040, Sample Num: 192640, Cur Loss: 0.15144265, Cur Avg Loss: 0.05414671, Log Avg loss: 0.05622467, Global Avg Loss: 0.08817629, Time: 0.0711 Steps: 174600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012240, Sample Num: 195840, Cur Loss: 0.01887464, Cur Avg Loss: 0.05395555, Log Avg loss: 0.04244796, Global Avg Loss: 0.08812397, Time: 0.2985 Steps: 174800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012440, Sample Num: 199040, Cur Loss: 0.01717126, Cur Avg Loss: 0.05394251, Log Avg loss: 0.05314471, Global Avg Loss: 0.08808399, Time: 0.3413 Steps: 175000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012640, Sample Num: 202240, Cur Loss: 0.00008812, Cur Avg Loss: 0.05365584, Log Avg loss: 0.03582487, Global Avg Loss: 0.08802434, Time: 0.0782 Steps: 175200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012840, Sample Num: 205440, Cur Loss: 0.00707493, Cur Avg Loss: 0.05362439, Log Avg loss: 0.05163657, Global Avg Loss: 0.08798284, Time: 0.2773 Steps: 175400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 013040, Sample Num: 208640, Cur Loss: 0.00076097, Cur Avg Loss: 0.05354008, Log Avg loss: 0.04812722, Global Avg Loss: 0.08793745, Time: 0.2164 Steps: 175600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 013240, Sample Num: 211840, Cur Loss: 0.00061600, Cur Avg Loss: 0.05372984, Log Avg loss: 0.06610217, Global Avg Loss: 0.08791261, Time: 0.3039 Steps: 175800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013440, Sample Num: 215040, Cur Loss: 0.00014756, Cur Avg Loss: 0.05379639, Log Avg loss: 0.05820245, Global Avg Loss: 0.08787885, Time: 0.1466 Steps: 176000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013640, Sample Num: 218240, Cur Loss: 0.03299882, Cur Avg Loss: 0.05388178, Log Avg loss: 0.05961998, Global Avg Loss: 0.08784677, Time: 0.1566 Steps: 176200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013840, Sample Num: 221440, Cur Loss: 0.00187336, Cur Avg Loss: 0.05372255, Log Avg loss: 0.04286291, Global Avg Loss: 0.08779577, Time: 0.2994 Steps: 176400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014040, Sample Num: 224640, Cur Loss: 0.01551840, Cur Avg Loss: 0.05357281, Log Avg loss: 0.04321087, Global Avg Loss: 0.08774528, Time: 0.2033 Steps: 176600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014240, Sample Num: 227840, Cur Loss: 0.00133107, Cur Avg Loss: 0.05369362, Log Avg loss: 0.06217416, Global Avg Loss: 0.08771635, Time: 0.1875 Steps: 176800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014440, Sample Num: 231040, Cur Loss: 0.00966099, Cur Avg Loss: 0.05377833, Log Avg loss: 0.05980969, Global Avg Loss: 0.08768482, Time: 0.0882 Steps: 177000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014640, Sample Num: 234240, Cur Loss: 0.00125144, Cur Avg Loss: 0.05358066, Log Avg loss: 0.03930898, Global Avg Loss: 0.08763022, Time: 0.2569 Steps: 177200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014840, Sample Num: 237440, Cur Loss: 0.00031449, Cur Avg Loss: 0.05368285, Log Avg loss: 0.06116358, Global Avg Loss: 0.08760038, Time: 0.0370 Steps: 177400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015040, Sample Num: 240640, Cur Loss: 0.00041153, Cur Avg Loss: 0.05362248, Log Avg loss: 0.04914285, Global Avg Loss: 0.08755707, Time: 0.3096 Steps: 177600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015240, Sample Num: 243840, Cur Loss: 0.00009030, Cur Avg Loss: 0.05356538, Log Avg loss: 0.04927089, Global Avg Loss: 0.08751401, Time: 0.0532 Steps: 177800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015440, Sample Num: 247040, Cur Loss: 0.21961780, Cur Avg Loss: 0.05369461, Log Avg loss: 0.06354230, Global Avg Loss: 0.08748707, Time: 0.2983 Steps: 178000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 015640, Sample Num: 250240, Cur Loss: 0.02865617, Cur Avg Loss: 0.05344240, Log Avg loss: 0.03397202, Global Avg Loss: 0.08742701, Time: 0.1821 Steps: 178200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 015840, Sample Num: 253440, Cur Loss: 0.00023740, Cur Avg Loss: 0.05333798, Log Avg loss: 0.04517191, Global Avg Loss: 0.08737964, Time: 0.0945 Steps: 178400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016040, Sample Num: 256640, Cur Loss: 0.01337545, Cur Avg Loss: 0.05336862, Log Avg loss: 0.05579550, Global Avg Loss: 0.08734427, Time: 0.3140 Steps: 178600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016240, Sample Num: 259840, Cur Loss: 0.00097610, Cur Avg Loss: 0.05328480, Log Avg loss: 0.04656217, Global Avg Loss: 0.08729865, Time: 0.1059 Steps: 178800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016440, Sample Num: 263040, Cur Loss: 0.03194081, Cur Avg Loss: 0.05328865, Log Avg loss: 0.05360169, Global Avg Loss: 0.08726100, Time: 0.1863 Steps: 179000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016640, Sample Num: 266240, Cur Loss: 0.20651264, Cur Avg Loss: 0.05327268, Log Avg loss: 0.05195975, Global Avg Loss: 0.08722160, Time: 0.0598 Steps: 179200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016840, Sample Num: 269440, Cur Loss: 0.00520507, Cur Avg Loss: 0.05312361, Log Avg loss: 0.04072064, Global Avg Loss: 0.08716976, Time: 0.0532 Steps: 179400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017040, Sample Num: 272640, Cur Loss: 0.00865464, Cur Avg Loss: 0.05320292, Log Avg loss: 0.05988157, Global Avg Loss: 0.08713937, Time: 0.0805 Steps: 179600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017240, Sample Num: 275840, Cur Loss: 0.06996812, Cur Avg Loss: 0.05325577, Log Avg loss: 0.05775820, Global Avg Loss: 0.08710669, Time: 0.3040 Steps: 179800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017440, Sample Num: 279040, Cur Loss: 0.00048997, Cur Avg Loss: 0.05318747, Log Avg loss: 0.04730043, Global Avg Loss: 0.08706246, Time: 0.3334 Steps: 180000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 017640, Sample Num: 282240, Cur Loss: 0.00377909, Cur Avg Loss: 0.05329391, Log Avg loss: 0.06257549, Global Avg Loss: 0.08703529, Time: 0.1871 Steps: 180200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 017840, Sample Num: 285440, Cur Loss: 0.00027063, Cur Avg Loss: 0.05316265, Log Avg loss: 0.04158539, Global Avg Loss: 0.08698490, Time: 0.0949 Steps: 180400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018040, Sample Num: 288640, Cur Loss: 0.00077327, Cur Avg Loss: 0.05308508, Log Avg loss: 0.04616520, Global Avg Loss: 0.08693969, Time: 0.0591 Steps: 180600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018240, Sample Num: 291840, Cur Loss: 0.02724951, Cur Avg Loss: 0.05311994, Log Avg loss: 0.05626479, Global Avg Loss: 0.08690576, Time: 0.1063 Steps: 180800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018440, Sample Num: 295040, Cur Loss: 0.51063454, Cur Avg Loss: 0.05303828, Log Avg loss: 0.04559094, Global Avg Loss: 0.08686011, Time: 0.0765 Steps: 181000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018640, Sample Num: 298240, Cur Loss: 0.00041181, Cur Avg Loss: 0.05305225, Log Avg loss: 0.05433987, Global Avg Loss: 0.08682421, Time: 0.0364 Steps: 181200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018840, Sample Num: 301440, Cur Loss: 0.12564024, Cur Avg Loss: 0.05301206, Log Avg loss: 0.04926655, Global Avg Loss: 0.08678281, Time: 0.2558 Steps: 181400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019040, Sample Num: 304640, Cur Loss: 0.09153710, Cur Avg Loss: 0.05293244, Log Avg loss: 0.04543261, Global Avg Loss: 0.08673727, Time: 0.0540 Steps: 181600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019240, Sample Num: 307840, Cur Loss: 0.02642197, Cur Avg Loss: 0.05293849, Log Avg loss: 0.05351431, Global Avg Loss: 0.08670072, Time: 0.0805 Steps: 181800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019440, Sample Num: 311040, Cur Loss: 0.00468092, Cur Avg Loss: 0.05286044, Log Avg loss: 0.04535233, Global Avg Loss: 0.08665528, Time: 0.3137 Steps: 182000, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 019640, Sample Num: 314240, Cur Loss: 0.00018574, Cur Avg Loss: 0.05280066, Log Avg loss: 0.04698937, Global Avg Loss: 0.08661174, Time: 0.0585 Steps: 182200, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 019840, Sample Num: 317440, Cur Loss: 0.00012405, Cur Avg Loss: 0.05280141, Log Avg loss: 0.05287528, Global Avg Loss: 0.08657475, Time: 0.0929 Steps: 182400, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 020040, Sample Num: 320640, Cur Loss: 0.00101630, Cur Avg Loss: 0.05266139, Log Avg loss: 0.03877095, Global Avg Loss: 0.08652239, Time: 0.1878 Steps: 182600, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 020240, Sample Num: 323840, Cur Loss: 0.00332884, Cur Avg Loss: 0.05267172, Log Avg loss: 0.05370710, Global Avg Loss: 0.08648648, Time: 0.0782 Steps: 182800, Updated lr: 0.000010 ***** Running evaluation checkpoint-182880 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-182880 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2759.364817, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.060408, "eval_total_loss": 163.70448, "eval_acc": 0.985928, "eval_prec": 0.980824, "eval_recall": 0.991299, "eval_f1": 0.986034, "eval_roc_auc": 0.998721, "eval_pr_auc": 0.998716, "eval_confusion_matrix": {"tn": 21206, "fp": 421, "fn": 189, "tp": 21533}, "eval_mcc2": 0.971911, "eval_mcc": 0.971911, "eval_sn": 0.991299, "eval_sp": 0.980534, "update_flag": true, "test_avg_loss": 0.061509, "test_total_loss": 249.971024, "test_acc": 0.985559, "test_prec": 0.979057, "test_recall": 0.99234, "test_f1": 0.985653, "test_roc_auc": 0.998684, "test_pr_auc": 0.998605, "test_confusion_matrix": {"tn": 31827, "fp": 690, "fn": 249, "tp": 32256}, "test_mcc2": 0.971207, "test_mcc": 0.971207, "test_sn": 0.99234, "test_sp": 0.97878, "lr": 1.000985221674877e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.08646650628355367, "train_cur_epoch_loss": 1069.3408862505894, "train_cur_epoch_avg_loss": 0.05262504361469436, "train_cur_epoch_time": 2759.364817380905, "train_cur_epoch_avg_time": 0.13579551266638312, "epoch": 9, "step": 182880} ################################################## Training, Epoch: 0010, Batch: 000120, Sample Num: 1920, Cur Loss: 0.08094902, Cur Avg Loss: 0.02634397, Log Avg loss: 0.03213287, Global Avg Loss: 0.08642708, Time: 0.1877 Steps: 183000, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000320, Sample Num: 5120, Cur Loss: 0.24178278, Cur Avg Loss: 0.04703242, Log Avg loss: 0.05944550, Global Avg Loss: 0.08639763, Time: 0.0681 Steps: 183200, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00044164, Cur Avg Loss: 0.05149868, Log Avg loss: 0.05864470, Global Avg Loss: 0.08636736, Time: 0.3351 Steps: 183400, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000720, Sample Num: 11520, Cur Loss: 0.00111035, Cur Avg Loss: 0.05476509, Log Avg loss: 0.06325774, Global Avg Loss: 0.08634219, Time: 0.0590 Steps: 183600, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000920, Sample Num: 14720, Cur Loss: 0.24680798, Cur Avg Loss: 0.05177695, Log Avg loss: 0.04101966, Global Avg Loss: 0.08629287, Time: 0.0591 Steps: 183800, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 001120, Sample Num: 17920, Cur Loss: 0.00067925, Cur Avg Loss: 0.05106747, Log Avg loss: 0.04780388, Global Avg Loss: 0.08625103, Time: 0.0759 Steps: 184000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001320, Sample Num: 21120, Cur Loss: 0.00362165, Cur Avg Loss: 0.05143538, Log Avg loss: 0.05349566, Global Avg Loss: 0.08621547, Time: 0.4116 Steps: 184200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00044981, Cur Avg Loss: 0.05038149, Log Avg loss: 0.04342581, Global Avg Loss: 0.08616906, Time: 0.2027 Steps: 184400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001720, Sample Num: 27520, Cur Loss: 0.01141866, Cur Avg Loss: 0.05204470, Log Avg loss: 0.06468506, Global Avg Loss: 0.08614578, Time: 0.3049 Steps: 184600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00003585, Cur Avg Loss: 0.05189273, Log Avg loss: 0.05058584, Global Avg Loss: 0.08610730, Time: 0.0724 Steps: 184800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002120, Sample Num: 33920, Cur Loss: 0.00190825, Cur Avg Loss: 0.05163330, Log Avg loss: 0.04914277, Global Avg Loss: 0.08606734, Time: 0.0603 Steps: 185000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00042696, Cur Avg Loss: 0.05147485, Log Avg loss: 0.04979527, Global Avg Loss: 0.08602817, Time: 0.0561 Steps: 185200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002520, Sample Num: 40320, Cur Loss: 0.00003657, Cur Avg Loss: 0.05107221, Log Avg loss: 0.04640159, Global Avg Loss: 0.08598542, Time: 0.2025 Steps: 185400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00056723, Cur Avg Loss: 0.05152403, Log Avg loss: 0.05721701, Global Avg Loss: 0.08595442, Time: 0.1871 Steps: 185600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002920, Sample Num: 46720, Cur Loss: 0.23842920, Cur Avg Loss: 0.05145192, Log Avg loss: 0.05047111, Global Avg Loss: 0.08591622, Time: 0.0535 Steps: 185800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 003120, Sample Num: 49920, Cur Loss: 0.00314557, Cur Avg Loss: 0.05094051, Log Avg loss: 0.04347403, Global Avg Loss: 0.08587059, Time: 0.2984 Steps: 186000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003320, Sample Num: 53120, Cur Loss: 0.07067732, Cur Avg Loss: 0.05087083, Log Avg loss: 0.04978386, Global Avg Loss: 0.08583183, Time: 0.2980 Steps: 186200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00033755, Cur Avg Loss: 0.05029051, Log Avg loss: 0.04065718, Global Avg Loss: 0.08578335, Time: 0.0605 Steps: 186400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003720, Sample Num: 59520, Cur Loss: 0.01269807, Cur Avg Loss: 0.04963986, Log Avg loss: 0.03818832, Global Avg Loss: 0.08573234, Time: 0.0564 Steps: 186600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003920, Sample Num: 62720, Cur Loss: 0.00010398, Cur Avg Loss: 0.04914877, Log Avg loss: 0.04001450, Global Avg Loss: 0.08568339, Time: 0.0627 Steps: 186800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00010209, Cur Avg Loss: 0.04954013, Log Avg loss: 0.05721080, Global Avg Loss: 0.08565294, Time: 0.3032 Steps: 187000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004320, Sample Num: 69120, Cur Loss: 0.06773841, Cur Avg Loss: 0.05011559, Log Avg loss: 0.06197004, Global Avg Loss: 0.08562764, Time: 0.0927 Steps: 187200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00333585, Cur Avg Loss: 0.04997821, Log Avg loss: 0.04701075, Global Avg Loss: 0.08558643, Time: 0.1073 Steps: 187400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004720, Sample Num: 75520, Cur Loss: 0.00051793, Cur Avg Loss: 0.05006117, Log Avg loss: 0.05193621, Global Avg Loss: 0.08555055, Time: 0.1890 Steps: 187600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004920, Sample Num: 78720, Cur Loss: 0.00405829, Cur Avg Loss: 0.04955371, Log Avg loss: 0.03757768, Global Avg Loss: 0.08549946, Time: 0.0520 Steps: 187800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 005120, Sample Num: 81920, Cur Loss: 0.00040800, Cur Avg Loss: 0.04991457, Log Avg loss: 0.05879160, Global Avg Loss: 0.08547105, Time: 0.2772 Steps: 188000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005320, Sample Num: 85120, Cur Loss: 0.01216129, Cur Avg Loss: 0.04987710, Log Avg loss: 0.04891795, Global Avg Loss: 0.08543220, Time: 0.0589 Steps: 188200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005520, Sample Num: 88320, Cur Loss: 0.10603079, Cur Avg Loss: 0.04946909, Log Avg loss: 0.03861587, Global Avg Loss: 0.08538251, Time: 0.3719 Steps: 188400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005720, Sample Num: 91520, Cur Loss: 0.00223591, Cur Avg Loss: 0.04920993, Log Avg loss: 0.04205717, Global Avg Loss: 0.08533656, Time: 0.2050 Steps: 188600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005920, Sample Num: 94720, Cur Loss: 0.01242067, Cur Avg Loss: 0.04959799, Log Avg loss: 0.06069643, Global Avg Loss: 0.08531046, Time: 0.0450 Steps: 188800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006120, Sample Num: 97920, Cur Loss: 0.00179302, Cur Avg Loss: 0.05006966, Log Avg loss: 0.06403113, Global Avg Loss: 0.08528794, Time: 0.1664 Steps: 189000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006320, Sample Num: 101120, Cur Loss: 0.00097449, Cur Avg Loss: 0.05015409, Log Avg loss: 0.05273773, Global Avg Loss: 0.08525353, Time: 0.0595 Steps: 189200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006520, Sample Num: 104320, Cur Loss: 0.00101882, Cur Avg Loss: 0.04984812, Log Avg loss: 0.04017963, Global Avg Loss: 0.08520594, Time: 0.0554 Steps: 189400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006720, Sample Num: 107520, Cur Loss: 0.00229683, Cur Avg Loss: 0.04985496, Log Avg loss: 0.05007769, Global Avg Loss: 0.08516888, Time: 0.3140 Steps: 189600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006920, Sample Num: 110720, Cur Loss: 0.00146948, Cur Avg Loss: 0.04972268, Log Avg loss: 0.04527814, Global Avg Loss: 0.08512685, Time: 0.0408 Steps: 189800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 007120, Sample Num: 113920, Cur Loss: 0.32296365, Cur Avg Loss: 0.04993772, Log Avg loss: 0.05737813, Global Avg Loss: 0.08509764, Time: 0.2244 Steps: 190000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 007320, Sample Num: 117120, Cur Loss: 0.27307975, Cur Avg Loss: 0.05007023, Log Avg loss: 0.05478745, Global Avg Loss: 0.08506577, Time: 0.0957 Steps: 190200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007520, Sample Num: 120320, Cur Loss: 0.47446191, Cur Avg Loss: 0.04981806, Log Avg loss: 0.04058861, Global Avg Loss: 0.08501905, Time: 0.0516 Steps: 190400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007720, Sample Num: 123520, Cur Loss: 0.43840766, Cur Avg Loss: 0.04984133, Log Avg loss: 0.05071663, Global Avg Loss: 0.08498305, Time: 0.0480 Steps: 190600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007920, Sample Num: 126720, Cur Loss: 0.00025367, Cur Avg Loss: 0.05013274, Log Avg loss: 0.06138089, Global Avg Loss: 0.08495831, Time: 0.3142 Steps: 190800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008120, Sample Num: 129920, Cur Loss: 0.00024800, Cur Avg Loss: 0.05001911, Log Avg loss: 0.04551941, Global Avg Loss: 0.08491701, Time: 0.0362 Steps: 191000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008320, Sample Num: 133120, Cur Loss: 0.00040057, Cur Avg Loss: 0.04972856, Log Avg loss: 0.03793219, Global Avg Loss: 0.08486787, Time: 0.1193 Steps: 191200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008520, Sample Num: 136320, Cur Loss: 0.00049908, Cur Avg Loss: 0.04964344, Log Avg loss: 0.04610255, Global Avg Loss: 0.08482736, Time: 0.0522 Steps: 191400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008720, Sample Num: 139520, Cur Loss: 0.11731236, Cur Avg Loss: 0.04950888, Log Avg loss: 0.04377644, Global Avg Loss: 0.08478451, Time: 0.3138 Steps: 191600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008920, Sample Num: 142720, Cur Loss: 0.00007259, Cur Avg Loss: 0.04959309, Log Avg loss: 0.05326502, Global Avg Loss: 0.08475164, Time: 0.0736 Steps: 191800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 009120, Sample Num: 145920, Cur Loss: 0.00167240, Cur Avg Loss: 0.04967467, Log Avg loss: 0.05331276, Global Avg Loss: 0.08471889, Time: 0.0444 Steps: 192000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 009320, Sample Num: 149120, Cur Loss: 0.00030757, Cur Avg Loss: 0.04952119, Log Avg loss: 0.04252258, Global Avg Loss: 0.08467499, Time: 0.3214 Steps: 192200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009520, Sample Num: 152320, Cur Loss: 0.04181709, Cur Avg Loss: 0.04971173, Log Avg loss: 0.05859114, Global Avg Loss: 0.08464787, Time: 0.0908 Steps: 192400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009720, Sample Num: 155520, Cur Loss: 0.01083725, Cur Avg Loss: 0.04976786, Log Avg loss: 0.05243945, Global Avg Loss: 0.08461443, Time: 0.0844 Steps: 192600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009920, Sample Num: 158720, Cur Loss: 0.12812097, Cur Avg Loss: 0.04977149, Log Avg loss: 0.04994782, Global Avg Loss: 0.08457846, Time: 0.1526 Steps: 192800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010120, Sample Num: 161920, Cur Loss: 0.48019856, Cur Avg Loss: 0.04967488, Log Avg loss: 0.04488339, Global Avg Loss: 0.08453733, Time: 0.3139 Steps: 193000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010320, Sample Num: 165120, Cur Loss: 0.00275849, Cur Avg Loss: 0.04933502, Log Avg loss: 0.03213787, Global Avg Loss: 0.08448309, Time: 0.1869 Steps: 193200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010520, Sample Num: 168320, Cur Loss: 0.00020992, Cur Avg Loss: 0.04929231, Log Avg loss: 0.04708831, Global Avg Loss: 0.08444441, Time: 0.1877 Steps: 193400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010720, Sample Num: 171520, Cur Loss: 0.00469644, Cur Avg Loss: 0.04897631, Log Avg loss: 0.03235481, Global Avg Loss: 0.08439060, Time: 0.0925 Steps: 193600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010920, Sample Num: 174720, Cur Loss: 0.00030085, Cur Avg Loss: 0.04865373, Log Avg loss: 0.03136328, Global Avg Loss: 0.08433588, Time: 0.2032 Steps: 193800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 011120, Sample Num: 177920, Cur Loss: 0.00253523, Cur Avg Loss: 0.04878618, Log Avg loss: 0.05601839, Global Avg Loss: 0.08430669, Time: 0.0441 Steps: 194000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 011320, Sample Num: 181120, Cur Loss: 0.00008538, Cur Avg Loss: 0.04891150, Log Avg loss: 0.05587925, Global Avg Loss: 0.08427741, Time: 0.1234 Steps: 194200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011520, Sample Num: 184320, Cur Loss: 0.01242729, Cur Avg Loss: 0.04885198, Log Avg loss: 0.04548280, Global Avg Loss: 0.08423750, Time: 0.1865 Steps: 194400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011720, Sample Num: 187520, Cur Loss: 0.00023661, Cur Avg Loss: 0.04872679, Log Avg loss: 0.04151596, Global Avg Loss: 0.08419359, Time: 0.1609 Steps: 194600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011920, Sample Num: 190720, Cur Loss: 0.00044482, Cur Avg Loss: 0.04880269, Log Avg loss: 0.05325038, Global Avg Loss: 0.08416182, Time: 0.1889 Steps: 194800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012120, Sample Num: 193920, Cur Loss: 0.00044333, Cur Avg Loss: 0.04892701, Log Avg loss: 0.05633668, Global Avg Loss: 0.08413328, Time: 0.3093 Steps: 195000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012320, Sample Num: 197120, Cur Loss: 0.00050893, Cur Avg Loss: 0.04863100, Log Avg loss: 0.03069256, Global Avg Loss: 0.08407853, Time: 0.0824 Steps: 195200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012520, Sample Num: 200320, Cur Loss: 0.00021163, Cur Avg Loss: 0.04869690, Log Avg loss: 0.05275641, Global Avg Loss: 0.08404647, Time: 0.3634 Steps: 195400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012720, Sample Num: 203520, Cur Loss: 0.00029199, Cur Avg Loss: 0.04835050, Log Avg loss: 0.02666565, Global Avg Loss: 0.08398780, Time: 0.2216 Steps: 195600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012920, Sample Num: 206720, Cur Loss: 0.00027150, Cur Avg Loss: 0.04803394, Log Avg loss: 0.02790072, Global Avg Loss: 0.08393051, Time: 0.1635 Steps: 195800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 013120, Sample Num: 209920, Cur Loss: 0.00014154, Cur Avg Loss: 0.04816265, Log Avg loss: 0.05647745, Global Avg Loss: 0.08390249, Time: 0.2210 Steps: 196000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 013320, Sample Num: 213120, Cur Loss: 0.20484217, Cur Avg Loss: 0.04823955, Log Avg loss: 0.05328443, Global Avg Loss: 0.08387128, Time: 0.1749 Steps: 196200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013520, Sample Num: 216320, Cur Loss: 0.00019819, Cur Avg Loss: 0.04818479, Log Avg loss: 0.04453781, Global Avg Loss: 0.08383123, Time: 0.0588 Steps: 196400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013720, Sample Num: 219520, Cur Loss: 0.02309648, Cur Avg Loss: 0.04824963, Log Avg loss: 0.05263250, Global Avg Loss: 0.08379949, Time: 0.1879 Steps: 196600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013920, Sample Num: 222720, Cur Loss: 0.00139545, Cur Avg Loss: 0.04800247, Log Avg loss: 0.03104777, Global Avg Loss: 0.08374588, Time: 0.0734 Steps: 196800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014120, Sample Num: 225920, Cur Loss: 0.02113074, Cur Avg Loss: 0.04795055, Log Avg loss: 0.04433659, Global Avg Loss: 0.08370587, Time: 0.0597 Steps: 197000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014320, Sample Num: 229120, Cur Loss: 0.00046720, Cur Avg Loss: 0.04810230, Log Avg loss: 0.05881569, Global Avg Loss: 0.08368063, Time: 0.0799 Steps: 197200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014520, Sample Num: 232320, Cur Loss: 0.14333937, Cur Avg Loss: 0.04790556, Log Avg loss: 0.03381914, Global Avg Loss: 0.08363011, Time: 0.0307 Steps: 197400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014720, Sample Num: 235520, Cur Loss: 0.00069863, Cur Avg Loss: 0.04794551, Log Avg loss: 0.05084602, Global Avg Loss: 0.08359693, Time: 0.3420 Steps: 197600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014920, Sample Num: 238720, Cur Loss: 0.00046542, Cur Avg Loss: 0.04795875, Log Avg loss: 0.04893282, Global Avg Loss: 0.08356188, Time: 0.1284 Steps: 197800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 015120, Sample Num: 241920, Cur Loss: 0.00056129, Cur Avg Loss: 0.04790733, Log Avg loss: 0.04407148, Global Avg Loss: 0.08352199, Time: 0.0786 Steps: 198000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 015320, Sample Num: 245120, Cur Loss: 0.01674728, Cur Avg Loss: 0.04776745, Log Avg loss: 0.03719297, Global Avg Loss: 0.08347524, Time: 0.0737 Steps: 198200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015520, Sample Num: 248320, Cur Loss: 0.00036671, Cur Avg Loss: 0.04787043, Log Avg loss: 0.05575836, Global Avg Loss: 0.08344730, Time: 0.2031 Steps: 198400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015720, Sample Num: 251520, Cur Loss: 0.00036529, Cur Avg Loss: 0.04765565, Log Avg loss: 0.03098924, Global Avg Loss: 0.08339447, Time: 0.2738 Steps: 198600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015920, Sample Num: 254720, Cur Loss: 0.00538216, Cur Avg Loss: 0.04776196, Log Avg loss: 0.05611719, Global Avg Loss: 0.08336703, Time: 0.0386 Steps: 198800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016120, Sample Num: 257920, Cur Loss: 0.00023878, Cur Avg Loss: 0.04780768, Log Avg loss: 0.05144701, Global Avg Loss: 0.08333495, Time: 0.0553 Steps: 199000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016320, Sample Num: 261120, Cur Loss: 0.00219622, Cur Avg Loss: 0.04776094, Log Avg loss: 0.04399421, Global Avg Loss: 0.08329545, Time: 0.0476 Steps: 199200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016520, Sample Num: 264320, Cur Loss: 0.38813806, Cur Avg Loss: 0.04772613, Log Avg loss: 0.04488573, Global Avg Loss: 0.08325692, Time: 0.0959 Steps: 199400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016720, Sample Num: 267520, Cur Loss: 0.00088868, Cur Avg Loss: 0.04764956, Log Avg loss: 0.04132466, Global Avg Loss: 0.08321491, Time: 0.0791 Steps: 199600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016920, Sample Num: 270720, Cur Loss: 0.21107012, Cur Avg Loss: 0.04760821, Log Avg loss: 0.04415096, Global Avg Loss: 0.08317580, Time: 0.0729 Steps: 199800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 017120, Sample Num: 273920, Cur Loss: 0.00051780, Cur Avg Loss: 0.04770693, Log Avg loss: 0.05605929, Global Avg Loss: 0.08314869, Time: 0.0720 Steps: 200000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 017320, Sample Num: 277120, Cur Loss: 0.00016217, Cur Avg Loss: 0.04767926, Log Avg loss: 0.04531054, Global Avg Loss: 0.08311089, Time: 0.3135 Steps: 200200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017520, Sample Num: 280320, Cur Loss: 0.05017463, Cur Avg Loss: 0.04769834, Log Avg loss: 0.04935020, Global Avg Loss: 0.08307719, Time: 0.3950 Steps: 200400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017720, Sample Num: 283520, Cur Loss: 0.00099283, Cur Avg Loss: 0.04781953, Log Avg loss: 0.05843586, Global Avg Loss: 0.08305263, Time: 0.0591 Steps: 200600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017920, Sample Num: 286720, Cur Loss: 0.41938424, Cur Avg Loss: 0.04767103, Log Avg loss: 0.03451402, Global Avg Loss: 0.08300428, Time: 0.3134 Steps: 200800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018120, Sample Num: 289920, Cur Loss: 0.46115777, Cur Avg Loss: 0.04768944, Log Avg loss: 0.04933925, Global Avg Loss: 0.08297078, Time: 0.0878 Steps: 201000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018320, Sample Num: 293120, Cur Loss: 0.07470149, Cur Avg Loss: 0.04764787, Log Avg loss: 0.04388176, Global Avg Loss: 0.08293193, Time: 0.2515 Steps: 201200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018520, Sample Num: 296320, Cur Loss: 0.00096547, Cur Avg Loss: 0.04768859, Log Avg loss: 0.05141832, Global Avg Loss: 0.08290063, Time: 0.2986 Steps: 201400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018720, Sample Num: 299520, Cur Loss: 0.00017579, Cur Avg Loss: 0.04774334, Log Avg loss: 0.05281287, Global Avg Loss: 0.08287078, Time: 0.0844 Steps: 201600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018920, Sample Num: 302720, Cur Loss: 0.44103014, Cur Avg Loss: 0.04769003, Log Avg loss: 0.04270059, Global Avg Loss: 0.08283097, Time: 0.1848 Steps: 201800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 019120, Sample Num: 305920, Cur Loss: 0.00169320, Cur Avg Loss: 0.04774910, Log Avg loss: 0.05333741, Global Avg Loss: 0.08280177, Time: 0.0427 Steps: 202000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 019320, Sample Num: 309120, Cur Loss: 0.00008256, Cur Avg Loss: 0.04774868, Log Avg loss: 0.04770832, Global Avg Loss: 0.08276706, Time: 0.0944 Steps: 202200, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019520, Sample Num: 312320, Cur Loss: 0.00016368, Cur Avg Loss: 0.04767904, Log Avg loss: 0.04095115, Global Avg Loss: 0.08272574, Time: 0.1870 Steps: 202400, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019720, Sample Num: 315520, Cur Loss: 0.00062599, Cur Avg Loss: 0.04767242, Log Avg loss: 0.04702717, Global Avg Loss: 0.08269050, Time: 0.1879 Steps: 202600, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019920, Sample Num: 318720, Cur Loss: 0.16015759, Cur Avg Loss: 0.04767092, Log Avg loss: 0.04752309, Global Avg Loss: 0.08265582, Time: 0.2984 Steps: 202800, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 020120, Sample Num: 321920, Cur Loss: 0.08075560, Cur Avg Loss: 0.04761341, Log Avg loss: 0.04188539, Global Avg Loss: 0.08261565, Time: 0.0542 Steps: 203000, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 020320, Sample Num: 325113, Cur Loss: 0.00019185, Cur Avg Loss: 0.04766777, Log Avg loss: 0.05313599, Global Avg Loss: 0.08258663, Time: 0.0348 Steps: 203200, Updated lr: 0.000000 ***** Running evaluation checkpoint-203200 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-203200 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2753.649088, Avg time per batch (s): 0.140000 {"eval_avg_loss": 0.050129, "eval_total_loss": 135.849993, "eval_acc": 0.988027, "eval_prec": 0.986553, "eval_recall": 0.989596, "eval_f1": 0.988072, "eval_roc_auc": 0.998877, "eval_pr_auc": 0.998865, "eval_confusion_matrix": {"tn": 21334, "fp": 293, "fn": 226, "tp": 21496}, "eval_mcc2": 0.976059, "eval_mcc": 0.976059, "eval_sn": 0.989596, "eval_sp": 0.986452, "update_flag": true, "test_avg_loss": 0.049796, "test_total_loss": 202.369206, "test_acc": 0.988342, "test_prec": 0.985829, "test_recall": 0.990924, "test_f1": 0.98837, "test_roc_auc": 0.998875, "test_pr_auc": 0.998815, "test_confusion_matrix": {"tn": 32054, "fp": 463, "fn": 295, "tp": 32210}, "test_mcc2": 0.976698, "test_mcc": 0.976698, "test_sn": 0.990924, "test_sp": 0.985761, "lr": 0.0, "cur_epoch_step": 20320, "train_global_avg_loss": 0.0825866326662456, "train_cur_epoch_loss": 968.6090886447953, "train_cur_epoch_avg_loss": 0.047667770110472206, "train_cur_epoch_time": 2753.649087905884, "train_cur_epoch_avg_time": 0.13551422676702185, "epoch": 10, "step": 203200} ################################################## #########################Best Metric######################### {"epoch": 10, "global_step": 203200, "eval_avg_loss": 0.050129, "eval_total_loss": 135.849993, "eval_acc": 0.988027, "eval_prec": 0.986553, "eval_recall": 0.989596, "eval_f1": 0.988072, "eval_roc_auc": 0.998877, "eval_pr_auc": 0.998865, "eval_confusion_matrix": {"tn": 21334, "fp": 293, "fn": 226, "tp": 21496}, "eval_mcc2": 0.976059, "eval_mcc": 0.976059, "eval_sn": 0.989596, "eval_sp": 0.986452, "update_flag": true, "test_avg_loss": 0.049796, "test_total_loss": 202.369206, "test_acc": 0.988342, "test_prec": 0.985829, "test_recall": 0.990924, "test_f1": 0.98837, "test_roc_auc": 0.998875, "test_pr_auc": 0.998815, "test_confusion_matrix": {"tn": 32054, "fp": 463, "fn": 295, "tp": 32210}, "test_mcc2": 0.976698, "test_mcc": 0.976698, "test_sn": 0.990924, "test_sp": 0.985761} ################################################## Total Time: 45029.106822, Avg time per epoch(10 epochs): 4502.910000 ++++++++++++Validation+++++++++++++ best acc global step: 203200 checkpoint path: ../models/ViralCapsid/protein/binary_class/luca_base/seq/20250103143044/checkpoint-203200 ***** Running evaluation checkpoint-203200 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## {"evaluation_avg_loss_203200": 0.050129, "evaluation_total_loss_203200": 135.849993, "evaluation_acc_203200": 0.988027, "evaluation_prec_203200": 0.986553, "evaluation_recall_203200": 0.989596, "evaluation_f1_203200": 0.988072, "evaluation_roc_auc_203200": 0.998877, "evaluation_pr_auc_203200": 0.998865, "evaluation_confusion_matrix_203200": {"tn": 21334, "fp": 293, "fn": 226, "tp": 21496}, "evaluation_mcc2_203200": 0.976059, "evaluation_mcc_203200": 0.976059, "evaluation_sn_203200": 0.989596, "evaluation_sp_203200": 0.986452} ++++++++++++Testing+++++++++++++ best acc global step: 203200 checkpoint path: ../models/ViralCapsid/protein/binary_class/luca_base/seq/20250103143044/checkpoint-203200 ***** Running testing checkpoint-203200 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## {"evaluation_avg_loss_203200": 0.049796, "evaluation_total_loss_203200": 202.369206, "evaluation_acc_203200": 0.988342, "evaluation_prec_203200": 0.985829, "evaluation_recall_203200": 0.990924, "evaluation_f1_203200": 0.98837, "evaluation_roc_auc_203200": 0.998875, "evaluation_pr_auc_203200": 0.998815, "evaluation_confusion_matrix_203200": {"tn": 32054, "fp": 463, "fn": 295, "tp": 32210}, "evaluation_mcc2_203200": 0.976698, "evaluation_mcc_203200": 0.976698, "evaluation_sn_203200": 0.990924, "evaluation_sp_203200": 0.985761}