{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "f1", "beta1": 0.9, "beta2": 0.98, "buffer_size": 1024, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "RdRP", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/RdRP/protein/binary_class/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 1152, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "matrix", "intermediate_size": 4096, "label_filepath": "../dataset/RdRP/protein/binary_class/label.txt", "label_size": 2, "label_type": "RdRP", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": null, "llm_step": null, "llm_task_level": "token_level,span_level,seq_level,structure_level", "llm_time_str": null, "llm_type": "esmc", "llm_version": "600M", "lmdb_path": null, "local_rank": -1, "log_dir": "../logs/RdRP/protein/binary_class/luca_base/matrix/20250409154233", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": true, "matrix_dirpath": "../matrices/RdRP/protein/binary_class/luca_base/600M/esmc//", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "non_ignore": false, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 0, "num_hidden_layers": 0, "num_train_epochs": 10, "output_dir": "../models/RdRP/protein/binary_class/luca_base/matrix/20250409154233", "output_mode": "binary_class", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 40.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "null", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "binary_class", "tb_log_dir": "../tb-logs/RdRP/protein/binary_class/luca_base/matrix/20250409154233", "test_data_dir": "../dataset/RdRP/protein/binary_class/test/", "time_str": "20250409154236", "train_data_dir": "../dataset/RdRP/protein/binary_class/train/", "trunc_type": "right", "vector_dirpath": "../vectors/RdRP/protein/binary_class/luca_base/600M/esmc//", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 200, "weight": null, "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,embedding_matrix ################################################## Encoder Config: {'llm_type': 'esmc', 'llm_version': '600M', 'llm_step': None, 'llm_dirpath': None, 'input_type': 'matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/RdRP/protein/binary_class/luca_base/600M/esmc//', 'matrix_dirpath': '../matrices/RdRP/protein/binary_class/luca_base/600M/esmc//', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': True, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "_attn_implementation_autoset": true, "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 1152, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4098, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "num_attention_heads": 8, "num_hidden_layers": 4, "pad_token_id": 0, "pos_weight": 40.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": null, "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.46.3", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39 } ################################################## Mode Architecture: LucaBase( (matrix_pooler): GlobalMaskValueAttentionPooling1D (1152 -> 1152) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=1152, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=128, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=1, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 4145537 ################################################## {"total_num": "3.950000M", "total_size": "15.810000MB", "param_sum": "3.950000M", "param_size": "15.810000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "3.953492M", "trainable_size": "15.813969MB"} ################################################## Train dataset len: 190846, batch size: 16, batch num: 11928 Train dataset t_total: 119280, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 190846 Train Dataset Num Epochs = 10 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 119280 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.68335551, Cur Avg Loss: 1.38284016, Log Avg loss: 1.38284016, Global Avg Loss: 1.38284016, Time: 0.0233 Steps: 200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.25140870, Cur Avg Loss: 1.22763985, Log Avg loss: 1.07243955, Global Avg Loss: 1.22763985, Time: 0.0296 Steps: 400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.91543722, Cur Avg Loss: 0.98772381, Log Avg loss: 0.50789171, Global Avg Loss: 0.98772381, Time: 0.0632 Steps: 600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.16269445, Cur Avg Loss: 0.78719301, Log Avg loss: 0.18560062, Global Avg Loss: 0.78719301, Time: 0.0630 Steps: 800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.00037014, Cur Avg Loss: 0.66005805, Log Avg loss: 0.15151820, Global Avg Loss: 0.66005805, Time: 0.0431 Steps: 1000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.04240015, Cur Avg Loss: 0.56398370, Log Avg loss: 0.08361198, Global Avg Loss: 0.56398370, Time: 0.0301 Steps: 1200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.02941719, Cur Avg Loss: 0.48886830, Log Avg loss: 0.03817591, Global Avg Loss: 0.48886830, Time: 0.0201 Steps: 1400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00008029, Cur Avg Loss: 0.43884455, Log Avg loss: 0.08867832, Global Avg Loss: 0.43884455, Time: 0.0434 Steps: 1600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.00002649, Cur Avg Loss: 0.39344154, Log Avg loss: 0.03021746, Global Avg Loss: 0.39344154, Time: 0.0230 Steps: 1800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00218008, Cur Avg Loss: 0.35594203, Log Avg loss: 0.01844637, Global Avg Loss: 0.35594203, Time: 0.0319 Steps: 2000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.05694085, Cur Avg Loss: 0.32450754, Log Avg loss: 0.01016268, Global Avg Loss: 0.32450754, Time: 0.0371 Steps: 2200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.00032458, Cur Avg Loss: 0.29873133, Log Avg loss: 0.01519298, Global Avg Loss: 0.29873133, Time: 0.0160 Steps: 2400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00000000, Cur Avg Loss: 0.27638450, Log Avg loss: 0.00822264, Global Avg Loss: 0.27638450, Time: 0.0355 Steps: 2600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00002080, Cur Avg Loss: 0.25838712, Log Avg loss: 0.02442119, Global Avg Loss: 0.25838712, Time: 0.0352 Steps: 2800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00000000, Cur Avg Loss: 0.24281026, Log Avg loss: 0.02473412, Global Avg Loss: 0.24281026, Time: 0.0346 Steps: 3000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00050509, Cur Avg Loss: 0.22799801, Log Avg loss: 0.00581423, Global Avg Loss: 0.22799801, Time: 0.1137 Steps: 3200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.01587437, Cur Avg Loss: 0.21556920, Log Avg loss: 0.01670827, Global Avg Loss: 0.21556920, Time: 0.0478 Steps: 3400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.00000000, Cur Avg Loss: 0.20407235, Log Avg loss: 0.00862588, Global Avg Loss: 0.20407235, Time: 0.0342 Steps: 3600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.00027016, Cur Avg Loss: 0.19358591, Log Avg loss: 0.00483008, Global Avg Loss: 0.19358591, Time: 0.0240 Steps: 3800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00000191, Cur Avg Loss: 0.18531210, Log Avg loss: 0.02810967, Global Avg Loss: 0.18531210, Time: 0.0228 Steps: 4000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.00000000, Cur Avg Loss: 0.17675300, Log Avg loss: 0.00557109, Global Avg Loss: 0.17675300, Time: 0.0312 Steps: 4200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00175841, Cur Avg Loss: 0.16952860, Log Avg loss: 0.01781618, Global Avg Loss: 0.16952860, Time: 0.0618 Steps: 4400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00000000, Cur Avg Loss: 0.16225743, Log Avg loss: 0.00229164, Global Avg Loss: 0.16225743, Time: 0.0361 Steps: 4600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.00000000, Cur Avg Loss: 0.15556446, Log Avg loss: 0.00162616, Global Avg Loss: 0.15556446, Time: 0.0250 Steps: 4800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005000, Sample Num: 80000, Cur Loss: 0.00000000, Cur Avg Loss: 0.15578357, Log Avg loss: 0.16104212, Global Avg Loss: 0.15578357, Time: 0.0701 Steps: 5000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00000015, Cur Avg Loss: 0.15003085, Log Avg loss: 0.00621293, Global Avg Loss: 0.15003085, Time: 0.0390 Steps: 5200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00000000, Cur Avg Loss: 0.14940999, Log Avg loss: 0.13326759, Global Avg Loss: 0.14940999, Time: 0.0280 Steps: 5400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005600, Sample Num: 89600, Cur Loss: 0.00000000, Cur Avg Loss: 0.14409119, Log Avg loss: 0.00048366, Global Avg Loss: 0.14409119, Time: 0.0342 Steps: 5600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 005800, Sample Num: 92800, Cur Loss: 0.00000006, Cur Avg Loss: 0.13912640, Log Avg loss: 0.00011232, Global Avg Loss: 0.13912640, Time: 0.0608 Steps: 5800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006000, Sample Num: 96000, Cur Loss: 0.00038020, Cur Avg Loss: 0.13543934, Log Avg loss: 0.02851456, Global Avg Loss: 0.13543934, Time: 0.0533 Steps: 6000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006200, Sample Num: 99200, Cur Loss: 0.00000000, Cur Avg Loss: 0.13313304, Log Avg loss: 0.06394393, Global Avg Loss: 0.13313304, Time: 0.0315 Steps: 6200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00000000, Cur Avg Loss: 0.12900155, Log Avg loss: 0.00092541, Global Avg Loss: 0.12900155, Time: 0.0323 Steps: 6400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006600, Sample Num: 105600, Cur Loss: 0.00000000, Cur Avg Loss: 0.12534579, Log Avg loss: 0.00836151, Global Avg Loss: 0.12534579, Time: 0.1002 Steps: 6600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006800, Sample Num: 108800, Cur Loss: 0.00000000, Cur Avg Loss: 0.12177991, Log Avg loss: 0.00410566, Global Avg Loss: 0.12177991, Time: 0.0574 Steps: 6800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00000000, Cur Avg Loss: 0.11907357, Log Avg loss: 0.02705834, Global Avg Loss: 0.11907357, Time: 0.0281 Steps: 7000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007200, Sample Num: 115200, Cur Loss: 0.00000000, Cur Avg Loss: 0.11675873, Log Avg loss: 0.03573900, Global Avg Loss: 0.11675873, Time: 0.0345 Steps: 7200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00000000, Cur Avg Loss: 0.11360854, Log Avg loss: 0.00020199, Global Avg Loss: 0.11360854, Time: 0.0297 Steps: 7400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00000010, Cur Avg Loss: 0.11062610, Log Avg loss: 0.00027581, Global Avg Loss: 0.11062610, Time: 0.0609 Steps: 7600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007800, Sample Num: 124800, Cur Loss: 0.00000016, Cur Avg Loss: 0.10779271, Log Avg loss: 0.00012383, Global Avg Loss: 0.10779271, Time: 0.0258 Steps: 7800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00000842, Cur Avg Loss: 0.10515079, Log Avg loss: 0.00211568, Global Avg Loss: 0.10515079, Time: 0.0640 Steps: 8000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00000000, Cur Avg Loss: 0.10263258, Log Avg loss: 0.00190442, Global Avg Loss: 0.10263258, Time: 0.0651 Steps: 8200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008400, Sample Num: 134400, Cur Loss: 0.00000360, Cur Avg Loss: 0.10206900, Log Avg loss: 0.07896210, Global Avg Loss: 0.10206900, Time: 0.0626 Steps: 8400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008600, Sample Num: 137600, Cur Loss: 0.00001610, Cur Avg Loss: 0.10230274, Log Avg loss: 0.11212004, Global Avg Loss: 0.10230274, Time: 0.0852 Steps: 8600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00000000, Cur Avg Loss: 0.09999512, Log Avg loss: 0.00076725, Global Avg Loss: 0.09999512, Time: 0.0257 Steps: 8800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00000000, Cur Avg Loss: 0.09779343, Log Avg loss: 0.00091902, Global Avg Loss: 0.09779343, Time: 0.0268 Steps: 9000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00000005, Cur Avg Loss: 0.09658042, Log Avg loss: 0.04199492, Global Avg Loss: 0.09658042, Time: 0.0421 Steps: 9200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00000000, Cur Avg Loss: 0.09829622, Log Avg loss: 0.17722313, Global Avg Loss: 0.09829622, Time: 0.0343 Steps: 9400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00000050, Cur Avg Loss: 0.09627774, Log Avg loss: 0.00140915, Global Avg Loss: 0.09627774, Time: 0.0257 Steps: 9600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009800, Sample Num: 156800, Cur Loss: 0.00000293, Cur Avg Loss: 0.09482456, Log Avg loss: 0.02507220, Global Avg Loss: 0.09482456, Time: 0.0561 Steps: 9800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00000000, Cur Avg Loss: 0.09300730, Log Avg loss: 0.00396152, Global Avg Loss: 0.09300730, Time: 0.0295 Steps: 10000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010200, Sample Num: 163200, Cur Loss: 0.00000000, Cur Avg Loss: 0.09124742, Log Avg loss: 0.00325303, Global Avg Loss: 0.09124742, Time: 0.0304 Steps: 10200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010400, Sample Num: 166400, Cur Loss: 0.00000492, Cur Avg Loss: 0.08949730, Log Avg loss: 0.00024162, Global Avg Loss: 0.08949730, Time: 0.0614 Steps: 10400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00001106, Cur Avg Loss: 0.08785939, Log Avg loss: 0.00268790, Global Avg Loss: 0.08785939, Time: 0.0199 Steps: 10600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00000019, Cur Avg Loss: 0.08625403, Log Avg loss: 0.00116994, Global Avg Loss: 0.08625403, Time: 0.0471 Steps: 10800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00000000, Cur Avg Loss: 0.08468774, Log Avg loss: 0.00010816, Global Avg Loss: 0.08468774, Time: 0.0266 Steps: 11000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00000024, Cur Avg Loss: 0.08328886, Log Avg loss: 0.00635050, Global Avg Loss: 0.08328886, Time: 0.0311 Steps: 11200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00000000, Cur Avg Loss: 0.08185663, Log Avg loss: 0.00165180, Global Avg Loss: 0.08185663, Time: 0.0562 Steps: 11400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00000092, Cur Avg Loss: 0.08050647, Log Avg loss: 0.00354697, Global Avg Loss: 0.08050647, Time: 0.0302 Steps: 11600, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 011800, Sample Num: 188800, Cur Loss: 0.00000000, Cur Avg Loss: 0.07924921, Log Avg loss: 0.00632814, Global Avg Loss: 0.07924921, Time: 0.0841 Steps: 11800, Updated lr: 0.000090 ***** Running evaluation checkpoint-11928 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-11928 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 544.704283, Avg time per batch (s): 0.050000 {"eval_avg_loss": 0.002444, "eval_total_loss": 3.404538, "eval_acc": 0.999776, "eval_prec": 0.990826, "eval_recall": 1.0, "eval_f1": 0.995392, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999914, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 0, "tp": 540}, "eval_mcc2": 0.995288, "eval_mcc": 0.995288, "eval_sn": 1.0, "eval_sp": 0.99977, "update_flag": true, "test_avg_loss": 0.001745, "test_total_loss": 2.430984, "test_acc": 0.999776, "test_prec": 0.990826, "test_recall": 1.0, "test_f1": 0.995392, "test_roc_auc": 0.999999, "test_pr_auc": 0.999973, "test_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 0, "tp": 540}, "test_mcc2": 0.995288, "test_mcc": 0.995288, "test_sn": 1.0, "test_sp": 0.99977, "lr": 9.015115888478334e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.07842134071632137, "train_cur_epoch_loss": 935.4097520642813, "train_cur_epoch_avg_loss": 0.07842134071632137, "train_cur_epoch_time": 544.7042827606201, "train_cur_epoch_avg_time": 0.04566601968147385, "epoch": 1, "step": 11928} ################################################## Training, Epoch: 0002, Batch: 000072, Sample Num: 1152, Cur Loss: 0.00000002, Cur Avg Loss: 0.00103948, Log Avg loss: 0.00171971, Global Avg Loss: 0.07795705, Time: 0.0268 Steps: 12000, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000272, Sample Num: 4352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01359473, Log Avg loss: 0.01811462, Global Avg Loss: 0.07697603, Time: 0.0585 Steps: 12200, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000472, Sample Num: 7552, Cur Loss: 0.00000471, Cur Avg Loss: 0.01383006, Log Avg loss: 0.01415010, Global Avg Loss: 0.07596270, Time: 0.0576 Steps: 12400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000672, Sample Num: 10752, Cur Loss: 0.00000001, Cur Avg Loss: 0.00981386, Log Avg loss: 0.00033564, Global Avg Loss: 0.07476228, Time: 0.0593 Steps: 12600, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000872, Sample Num: 13952, Cur Loss: 0.00000000, Cur Avg Loss: 0.00777442, Log Avg loss: 0.00092189, Global Avg Loss: 0.07360852, Time: 0.0356 Steps: 12800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001072, Sample Num: 17152, Cur Loss: 0.03162842, Cur Avg Loss: 0.02946156, Log Avg loss: 0.12401750, Global Avg Loss: 0.07438404, Time: 0.0312 Steps: 13000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001272, Sample Num: 20352, Cur Loss: 0.00000000, Cur Avg Loss: 0.02485883, Log Avg loss: 0.00018821, Global Avg Loss: 0.07325986, Time: 0.0565 Steps: 13200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001472, Sample Num: 23552, Cur Loss: 0.00000000, Cur Avg Loss: 0.02481380, Log Avg loss: 0.02452742, Global Avg Loss: 0.07253251, Time: 0.0333 Steps: 13400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001672, Sample Num: 26752, Cur Loss: 0.00000000, Cur Avg Loss: 0.02185105, Log Avg loss: 0.00004515, Global Avg Loss: 0.07146652, Time: 0.0428 Steps: 13600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001872, Sample Num: 29952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01978697, Log Avg loss: 0.00253126, Global Avg Loss: 0.07046746, Time: 0.0502 Steps: 13800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002072, Sample Num: 33152, Cur Loss: 0.00087471, Cur Avg Loss: 0.01840110, Log Avg loss: 0.00542936, Global Avg Loss: 0.06953834, Time: 0.0237 Steps: 14000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002272, Sample Num: 36352, Cur Loss: 0.00000006, Cur Avg Loss: 0.01713382, Log Avg loss: 0.00400487, Global Avg Loss: 0.06861534, Time: 0.0588 Steps: 14200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002472, Sample Num: 39552, Cur Loss: 0.00416109, Cur Avg Loss: 0.01973089, Log Avg loss: 0.04923355, Global Avg Loss: 0.06834615, Time: 0.0328 Steps: 14400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002672, Sample Num: 42752, Cur Loss: 0.00000000, Cur Avg Loss: 0.02321700, Log Avg loss: 0.06630536, Global Avg Loss: 0.06831819, Time: 0.0292 Steps: 14600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002872, Sample Num: 45952, Cur Loss: 0.00000000, Cur Avg Loss: 0.02161377, Log Avg loss: 0.00019454, Global Avg Loss: 0.06739760, Time: 0.0295 Steps: 14800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003072, Sample Num: 49152, Cur Loss: 0.00000000, Cur Avg Loss: 0.02029751, Log Avg loss: 0.00139609, Global Avg Loss: 0.06651758, Time: 0.0263 Steps: 15000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003272, Sample Num: 52352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01905734, Log Avg loss: 0.00000826, Global Avg Loss: 0.06564246, Time: 0.0505 Steps: 15200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003472, Sample Num: 55552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01820657, Log Avg loss: 0.00428796, Global Avg Loss: 0.06484565, Time: 0.0235 Steps: 15400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003672, Sample Num: 58752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01745594, Log Avg loss: 0.00442502, Global Avg Loss: 0.06407102, Time: 0.0360 Steps: 15600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003872, Sample Num: 61952, Cur Loss: 0.00000001, Cur Avg Loss: 0.01718240, Log Avg loss: 0.01216031, Global Avg Loss: 0.06341393, Time: 0.0739 Steps: 15800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004072, Sample Num: 65152, Cur Loss: 0.00000007, Cur Avg Loss: 0.01640428, Log Avg loss: 0.00133971, Global Avg Loss: 0.06263800, Time: 0.0296 Steps: 16000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004272, Sample Num: 68352, Cur Loss: 0.00000093, Cur Avg Loss: 0.01584210, Log Avg loss: 0.00439628, Global Avg Loss: 0.06191896, Time: 0.0848 Steps: 16200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004472, Sample Num: 71552, Cur Loss: 0.00000007, Cur Avg Loss: 0.01536950, Log Avg loss: 0.00527476, Global Avg Loss: 0.06122818, Time: 0.0635 Steps: 16400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 004672, Sample Num: 74752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01471199, Log Avg loss: 0.00001003, Global Avg Loss: 0.06049061, Time: 0.0231 Steps: 16600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 004872, Sample Num: 77952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01597434, Log Avg loss: 0.04546271, Global Avg Loss: 0.06031171, Time: 0.0332 Steps: 16800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005072, Sample Num: 81152, Cur Loss: 0.00000185, Cur Avg Loss: 0.01540623, Log Avg loss: 0.00156713, Global Avg Loss: 0.05962060, Time: 0.0313 Steps: 17000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005272, Sample Num: 84352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01681384, Log Avg loss: 0.05251077, Global Avg Loss: 0.05953792, Time: 0.0365 Steps: 17200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005472, Sample Num: 87552, Cur Loss: 0.00000005, Cur Avg Loss: 0.01684807, Log Avg loss: 0.01775056, Global Avg Loss: 0.05905761, Time: 0.0496 Steps: 17400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005672, Sample Num: 90752, Cur Loss: 0.00000018, Cur Avg Loss: 0.01625435, Log Avg loss: 0.00000994, Global Avg Loss: 0.05838661, Time: 0.0296 Steps: 17600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 005872, Sample Num: 93952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01570468, Log Avg loss: 0.00011622, Global Avg Loss: 0.05773189, Time: 0.1106 Steps: 17800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006072, Sample Num: 97152, Cur Loss: 0.00001572, Cur Avg Loss: 0.01518744, Log Avg loss: 0.00000133, Global Avg Loss: 0.05709044, Time: 0.1279 Steps: 18000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006272, Sample Num: 100352, Cur Loss: 0.00000048, Cur Avg Loss: 0.01729707, Log Avg loss: 0.08134520, Global Avg Loss: 0.05735698, Time: 0.0674 Steps: 18200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006472, Sample Num: 103552, Cur Loss: 0.00000245, Cur Avg Loss: 0.01679548, Log Avg loss: 0.00106576, Global Avg Loss: 0.05674511, Time: 0.0582 Steps: 18400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006672, Sample Num: 106752, Cur Loss: 0.00000008, Cur Avg Loss: 0.01636922, Log Avg loss: 0.00257529, Global Avg Loss: 0.05616264, Time: 0.0330 Steps: 18600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006872, Sample Num: 109952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01591160, Log Avg loss: 0.00064561, Global Avg Loss: 0.05557204, Time: 0.0351 Steps: 18800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007072, Sample Num: 113152, Cur Loss: 0.00000052, Cur Avg Loss: 0.01615484, Log Avg loss: 0.02451244, Global Avg Loss: 0.05524509, Time: 0.0497 Steps: 19000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007272, Sample Num: 116352, Cur Loss: 0.00000381, Cur Avg Loss: 0.01574498, Log Avg loss: 0.00125225, Global Avg Loss: 0.05468267, Time: 0.0366 Steps: 19200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007472, Sample Num: 119552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01532363, Log Avg loss: 0.00000333, Global Avg Loss: 0.05411896, Time: 0.0154 Steps: 19400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007672, Sample Num: 122752, Cur Loss: 0.00000011, Cur Avg Loss: 0.01492891, Log Avg loss: 0.00018243, Global Avg Loss: 0.05356859, Time: 0.0644 Steps: 19600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007872, Sample Num: 125952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01455061, Log Avg loss: 0.00003873, Global Avg Loss: 0.05302788, Time: 0.0388 Steps: 19800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 008072, Sample Num: 129152, Cur Loss: 0.00000000, Cur Avg Loss: 0.01427613, Log Avg loss: 0.00347265, Global Avg Loss: 0.05253233, Time: 0.0165 Steps: 20000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008272, Sample Num: 132352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01404988, Log Avg loss: 0.00491854, Global Avg Loss: 0.05206091, Time: 0.0249 Steps: 20200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008472, Sample Num: 135552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01724481, Log Avg loss: 0.14938710, Global Avg Loss: 0.05301509, Time: 0.0515 Steps: 20400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008672, Sample Num: 138752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01686594, Log Avg loss: 0.00081723, Global Avg Loss: 0.05250831, Time: 0.0266 Steps: 20600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008872, Sample Num: 141952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01648627, Log Avg loss: 0.00002366, Global Avg Loss: 0.05200365, Time: 0.0260 Steps: 20800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 009072, Sample Num: 145152, Cur Loss: 0.00000000, Cur Avg Loss: 0.01612299, Log Avg loss: 0.00000763, Global Avg Loss: 0.05150845, Time: 0.0397 Steps: 21000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 009272, Sample Num: 148352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01642394, Log Avg loss: 0.03007521, Global Avg Loss: 0.05130625, Time: 0.0361 Steps: 21200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009472, Sample Num: 151552, Cur Loss: 0.00000436, Cur Avg Loss: 0.02008351, Log Avg loss: 0.18974107, Global Avg Loss: 0.05260003, Time: 0.0741 Steps: 21400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009672, Sample Num: 154752, Cur Loss: 0.00000025, Cur Avg Loss: 0.01970312, Log Avg loss: 0.00168806, Global Avg Loss: 0.05212863, Time: 0.0430 Steps: 21600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009872, Sample Num: 157952, Cur Loss: 0.00000034, Cur Avg Loss: 0.01967853, Log Avg loss: 0.01848935, Global Avg Loss: 0.05182001, Time: 0.0637 Steps: 21800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010072, Sample Num: 161152, Cur Loss: 0.00000000, Cur Avg Loss: 0.01947331, Log Avg loss: 0.00934380, Global Avg Loss: 0.05143386, Time: 0.0405 Steps: 22000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010272, Sample Num: 164352, Cur Loss: 0.00003916, Cur Avg Loss: 0.01911543, Log Avg loss: 0.00109255, Global Avg Loss: 0.05098034, Time: 0.1102 Steps: 22200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010472, Sample Num: 167552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01875263, Log Avg loss: 0.00011896, Global Avg Loss: 0.05052622, Time: 0.0277 Steps: 22400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 010672, Sample Num: 170752, Cur Loss: 0.00005103, Cur Avg Loss: 0.01843472, Log Avg loss: 0.00178887, Global Avg Loss: 0.05009491, Time: 0.0579 Steps: 22600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 010872, Sample Num: 173952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01809583, Log Avg loss: 0.00001274, Global Avg Loss: 0.04965560, Time: 0.0307 Steps: 22800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011072, Sample Num: 177152, Cur Loss: 0.00000001, Cur Avg Loss: 0.01776973, Log Avg loss: 0.00004284, Global Avg Loss: 0.04922418, Time: 0.0499 Steps: 23000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011272, Sample Num: 180352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01750129, Log Avg loss: 0.00264042, Global Avg Loss: 0.04882260, Time: 0.0714 Steps: 23200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011472, Sample Num: 183552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01720250, Log Avg loss: 0.00036294, Global Avg Loss: 0.04840841, Time: 0.0638 Steps: 23400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011672, Sample Num: 186752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01691680, Log Avg loss: 0.00052887, Global Avg Loss: 0.04800265, Time: 0.0301 Steps: 23600, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 011872, Sample Num: 189952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01675443, Log Avg loss: 0.00727859, Global Avg Loss: 0.04766043, Time: 0.0758 Steps: 23800, Updated lr: 0.000080 ***** Running evaluation checkpoint-23856 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-23856 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 541.709071, Avg time per batch (s): 0.050000 {"eval_avg_loss": 0.004344, "eval_total_loss": 6.051439, "eval_acc": 0.999776, "eval_prec": 0.990826, "eval_recall": 1.0, "eval_f1": 0.995392, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999911, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 0, "tp": 540}, "eval_mcc2": 0.995288, "eval_mcc": 0.995288, "eval_sn": 1.0, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.002287, "test_total_loss": 3.186016, "test_acc": 0.99982, "test_prec": 0.994465, "test_recall": 0.998148, "test_f1": 0.996303, "test_roc_auc": 0.999999, "test_pr_auc": 0.999979, "test_confusion_matrix": {"tn": 21741, "fp": 3, "fn": 1, "tp": 539}, "test_mcc2": 0.996213, "test_mcc": 0.996213, "test_sn": 0.998148, "test_sp": 0.999862, "lr": 8.013436345314075e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.04754855590140065, "train_cur_epoch_loss": 198.90859751953093, "train_cur_epoch_avg_loss": 0.01667577108647979, "train_cur_epoch_time": 541.7090709209442, "train_cur_epoch_avg_time": 0.04541491204903959, "epoch": 2, "step": 23856} ################################################## Training, Epoch: 0003, Batch: 000144, Sample Num: 2304, Cur Loss: 0.00000000, Cur Avg Loss: 0.00208531, Log Avg loss: 0.00150152, Global Avg Loss: 0.04727578, Time: 0.0219 Steps: 24000, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000344, Sample Num: 5504, Cur Loss: 0.00000000, Cur Avg Loss: 0.00358999, Log Avg loss: 0.00467337, Global Avg Loss: 0.04692369, Time: 0.0654 Steps: 24200, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000544, Sample Num: 8704, Cur Loss: 0.00000000, Cur Avg Loss: 0.00227181, Log Avg loss: 0.00000454, Global Avg Loss: 0.04653911, Time: 0.0287 Steps: 24400, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000744, Sample Num: 11904, Cur Loss: 0.00000000, Cur Avg Loss: 0.00194437, Log Avg loss: 0.00105372, Global Avg Loss: 0.04616931, Time: 0.0271 Steps: 24600, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000944, Sample Num: 15104, Cur Loss: 0.00000000, Cur Avg Loss: 0.02817215, Log Avg loss: 0.12573948, Global Avg Loss: 0.04681100, Time: 0.0412 Steps: 24800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001144, Sample Num: 18304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02368576, Log Avg loss: 0.00251002, Global Avg Loss: 0.04645659, Time: 0.0287 Steps: 25000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001344, Sample Num: 21504, Cur Loss: 0.00000000, Cur Avg Loss: 0.02036938, Log Avg loss: 0.00139970, Global Avg Loss: 0.04609900, Time: 0.0538 Steps: 25200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001544, Sample Num: 24704, Cur Loss: 0.00000393, Cur Avg Loss: 0.01794844, Log Avg loss: 0.00167968, Global Avg Loss: 0.04574924, Time: 0.0308 Steps: 25400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001744, Sample Num: 27904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01621199, Log Avg loss: 0.00280660, Global Avg Loss: 0.04541375, Time: 0.0337 Steps: 25600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001944, Sample Num: 31104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01454463, Log Avg loss: 0.00000526, Global Avg Loss: 0.04506175, Time: 0.0303 Steps: 25800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002144, Sample Num: 34304, Cur Loss: 0.00000012, Cur Avg Loss: 0.01417092, Log Avg loss: 0.01053847, Global Avg Loss: 0.04479618, Time: 0.0194 Steps: 26000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002344, Sample Num: 37504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01582600, Log Avg loss: 0.03356847, Global Avg Loss: 0.04471048, Time: 0.0312 Steps: 26200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002544, Sample Num: 40704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01466171, Log Avg loss: 0.00101629, Global Avg Loss: 0.04437946, Time: 0.0299 Steps: 26400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002744, Sample Num: 43904, Cur Loss: 0.00000014, Cur Avg Loss: 0.02048821, Log Avg loss: 0.09460129, Global Avg Loss: 0.04475707, Time: 0.0605 Steps: 26600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002944, Sample Num: 47104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01916341, Log Avg loss: 0.00098716, Global Avg Loss: 0.04443043, Time: 0.0294 Steps: 26800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003144, Sample Num: 50304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01794478, Log Avg loss: 0.00000645, Global Avg Loss: 0.04410136, Time: 0.0581 Steps: 27000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003344, Sample Num: 53504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01698897, Log Avg loss: 0.00196367, Global Avg Loss: 0.04379152, Time: 0.0441 Steps: 27200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003544, Sample Num: 56704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01613290, Log Avg loss: 0.00181948, Global Avg Loss: 0.04348516, Time: 0.0481 Steps: 27400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003744, Sample Num: 59904, Cur Loss: 0.00000400, Cur Avg Loss: 0.01548209, Log Avg loss: 0.00394965, Global Avg Loss: 0.04319867, Time: 0.0347 Steps: 27600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003944, Sample Num: 63104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01469814, Log Avg loss: 0.00002255, Global Avg Loss: 0.04288805, Time: 0.0308 Steps: 27800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 004144, Sample Num: 66304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01402752, Log Avg loss: 0.00080299, Global Avg Loss: 0.04258744, Time: 0.0260 Steps: 28000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 004344, Sample Num: 69504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01371517, Log Avg loss: 0.00724322, Global Avg Loss: 0.04233677, Time: 0.0648 Steps: 28200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004544, Sample Num: 72704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01311167, Log Avg loss: 0.00000377, Global Avg Loss: 0.04203865, Time: 0.0345 Steps: 28400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004744, Sample Num: 75904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01255896, Log Avg loss: 0.00000131, Global Avg Loss: 0.04174469, Time: 0.0467 Steps: 28600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004944, Sample Num: 79104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01240368, Log Avg loss: 0.00872034, Global Avg Loss: 0.04151535, Time: 0.0551 Steps: 28800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005144, Sample Num: 82304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01192178, Log Avg loss: 0.00000922, Global Avg Loss: 0.04122910, Time: 0.0176 Steps: 29000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005344, Sample Num: 85504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01150791, Log Avg loss: 0.00086336, Global Avg Loss: 0.04095262, Time: 0.0954 Steps: 29200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005544, Sample Num: 88704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01131705, Log Avg loss: 0.00621729, Global Avg Loss: 0.04071633, Time: 0.0604 Steps: 29400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 005744, Sample Num: 91904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01092338, Log Avg loss: 0.00001077, Global Avg Loss: 0.04044129, Time: 0.0406 Steps: 29600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 005944, Sample Num: 95104, Cur Loss: 0.00000727, Cur Avg Loss: 0.01055587, Log Avg loss: 0.00000090, Global Avg Loss: 0.04016988, Time: 0.0212 Steps: 29800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006144, Sample Num: 98304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01308847, Log Avg loss: 0.08835729, Global Avg Loss: 0.04049113, Time: 0.0351 Steps: 30000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006344, Sample Num: 101504, Cur Loss: 0.00000012, Cur Avg Loss: 0.01267594, Log Avg loss: 0.00000302, Global Avg Loss: 0.04022300, Time: 0.0211 Steps: 30200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006544, Sample Num: 104704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01232645, Log Avg loss: 0.00124065, Global Avg Loss: 0.03996653, Time: 0.0340 Steps: 30400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006744, Sample Num: 107904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01197436, Log Avg loss: 0.00045415, Global Avg Loss: 0.03970828, Time: 0.0299 Steps: 30600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 006944, Sample Num: 111104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01165318, Log Avg loss: 0.00082275, Global Avg Loss: 0.03945578, Time: 0.0317 Steps: 30800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007144, Sample Num: 114304, Cur Loss: 0.00000016, Cur Avg Loss: 0.01190939, Log Avg loss: 0.02080522, Global Avg Loss: 0.03933545, Time: 0.0675 Steps: 31000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007344, Sample Num: 117504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01161190, Log Avg loss: 0.00098545, Global Avg Loss: 0.03908962, Time: 0.0308 Steps: 31200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007544, Sample Num: 120704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01130410, Log Avg loss: 0.00000175, Global Avg Loss: 0.03884065, Time: 0.0279 Steps: 31400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007744, Sample Num: 123904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01101269, Log Avg loss: 0.00002078, Global Avg Loss: 0.03859496, Time: 0.0244 Steps: 31600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007944, Sample Num: 127104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01080010, Log Avg loss: 0.00256852, Global Avg Loss: 0.03836838, Time: 0.0356 Steps: 31800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008144, Sample Num: 130304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01056724, Log Avg loss: 0.00131792, Global Avg Loss: 0.03813681, Time: 0.0350 Steps: 32000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008344, Sample Num: 133504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01046436, Log Avg loss: 0.00627515, Global Avg Loss: 0.03793891, Time: 0.1102 Steps: 32200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008544, Sample Num: 136704, Cur Loss: 0.00000006, Cur Avg Loss: 0.01330848, Log Avg loss: 0.13196519, Global Avg Loss: 0.03851932, Time: 0.0342 Steps: 32400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008744, Sample Num: 139904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01302503, Log Avg loss: 0.00091620, Global Avg Loss: 0.03828863, Time: 0.0467 Steps: 32600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008944, Sample Num: 143104, Cur Loss: 0.00000024, Cur Avg Loss: 0.01273384, Log Avg loss: 0.00000281, Global Avg Loss: 0.03805518, Time: 0.0676 Steps: 32800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 009144, Sample Num: 146304, Cur Loss: 0.00000012, Cur Avg Loss: 0.01246315, Log Avg loss: 0.00035810, Global Avg Loss: 0.03782671, Time: 0.0233 Steps: 33000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009344, Sample Num: 149504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01660419, Log Avg loss: 0.20593234, Global Avg Loss: 0.03883939, Time: 0.0350 Steps: 33200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009544, Sample Num: 152704, Cur Loss: 0.00000003, Cur Avg Loss: 0.01627744, Log Avg loss: 0.00101166, Global Avg Loss: 0.03861288, Time: 0.0754 Steps: 33400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009744, Sample Num: 155904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01597064, Log Avg loss: 0.00133047, Global Avg Loss: 0.03839096, Time: 0.0196 Steps: 33600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009944, Sample Num: 159104, Cur Loss: 0.00000001, Cur Avg Loss: 0.01664858, Log Avg loss: 0.04967773, Global Avg Loss: 0.03845775, Time: 0.0616 Steps: 33800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 010144, Sample Num: 162304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01650102, Log Avg loss: 0.00916441, Global Avg Loss: 0.03828543, Time: 0.0684 Steps: 34000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 010344, Sample Num: 165504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01618445, Log Avg loss: 0.00012795, Global Avg Loss: 0.03806229, Time: 0.0326 Steps: 34200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010544, Sample Num: 168704, Cur Loss: 0.00000496, Cur Avg Loss: 0.01587782, Log Avg loss: 0.00001866, Global Avg Loss: 0.03784111, Time: 0.0813 Steps: 34400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010744, Sample Num: 171904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01560931, Log Avg loss: 0.00145357, Global Avg Loss: 0.03763077, Time: 0.0369 Steps: 34600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010944, Sample Num: 175104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01532406, Log Avg loss: 0.00000039, Global Avg Loss: 0.03741451, Time: 0.0409 Steps: 34800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011144, Sample Num: 178304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01508464, Log Avg loss: 0.00198339, Global Avg Loss: 0.03721204, Time: 0.0331 Steps: 35000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011344, Sample Num: 181504, Cur Loss: 0.00000025, Cur Avg Loss: 0.01481874, Log Avg loss: 0.00000297, Global Avg Loss: 0.03700063, Time: 0.0351 Steps: 35200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011544, Sample Num: 184704, Cur Loss: 0.00000107, Cur Avg Loss: 0.01456213, Log Avg loss: 0.00000708, Global Avg Loss: 0.03679163, Time: 0.0299 Steps: 35400, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 011744, Sample Num: 187904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01447422, Log Avg loss: 0.00940000, Global Avg Loss: 0.03663774, Time: 0.0252 Steps: 35600, Updated lr: 0.000070 ***** Running evaluation checkpoint-35784 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-35784 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 520.196050, Avg time per batch (s): 0.040000 {"eval_avg_loss": 0.012548, "eval_total_loss": 17.478693, "eval_acc": 0.999641, "eval_prec": 0.990775, "eval_recall": 0.994444, "eval_f1": 0.992606, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999908, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 3, "tp": 537}, "eval_mcc2": 0.992424, "eval_mcc": 0.992424, "eval_sn": 0.994444, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.006481, "test_total_loss": 9.028269, "test_acc": 0.999865, "test_prec": 0.996303, "test_recall": 0.998148, "test_f1": 0.997225, "test_roc_auc": 1.0, "test_pr_auc": 0.999983, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 1, "tp": 539}, "test_mcc2": 0.997156, "test_mcc": 0.997156, "test_sn": 0.998148, "test_sp": 0.999908, "lr": 7.011756802149816e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.03646800797600032, "train_cur_epoch_loss": 170.65284782938053, "train_cur_epoch_avg_loss": 0.014306912125199574, "train_cur_epoch_time": 520.1960504055023, "train_cur_epoch_avg_time": 0.04361133890052837, "epoch": 3, "step": 35784} ################################################## Training, Epoch: 0004, Batch: 000016, Sample Num: 256, Cur Loss: 0.00000000, Cur Avg Loss: 0.00009338, Log Avg loss: 0.00334577, Global Avg Loss: 0.03645175, Time: 0.0664 Steps: 35800, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000216, Sample Num: 3456, Cur Loss: 0.00000000, Cur Avg Loss: 0.00010574, Log Avg loss: 0.00010673, Global Avg Loss: 0.03624983, Time: 0.0183 Steps: 36000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000416, Sample Num: 6656, Cur Loss: 0.00000000, Cur Avg Loss: 0.00028552, Log Avg loss: 0.00047968, Global Avg Loss: 0.03605221, Time: 0.0375 Steps: 36200, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000616, Sample Num: 9856, Cur Loss: 0.00000006, Cur Avg Loss: 0.00019387, Log Avg loss: 0.00000322, Global Avg Loss: 0.03585414, Time: 0.0341 Steps: 36400, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000816, Sample Num: 13056, Cur Loss: 0.00000000, Cur Avg Loss: 0.00041811, Log Avg loss: 0.00110880, Global Avg Loss: 0.03566427, Time: 0.0442 Steps: 36600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001016, Sample Num: 16256, Cur Loss: 0.00000092, Cur Avg Loss: 0.02529921, Log Avg loss: 0.12681405, Global Avg Loss: 0.03615965, Time: 0.0732 Steps: 36800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001216, Sample Num: 19456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02113876, Log Avg loss: 0.00000368, Global Avg Loss: 0.03596421, Time: 0.0801 Steps: 37000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001416, Sample Num: 22656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01815472, Log Avg loss: 0.00001177, Global Avg Loss: 0.03577092, Time: 0.0250 Steps: 37200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001616, Sample Num: 25856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01602808, Log Avg loss: 0.00097147, Global Avg Loss: 0.03558483, Time: 0.0285 Steps: 37400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001816, Sample Num: 29056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01452821, Log Avg loss: 0.00240924, Global Avg Loss: 0.03540836, Time: 0.0283 Steps: 37600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002016, Sample Num: 32256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01364376, Log Avg loss: 0.00561300, Global Avg Loss: 0.03525071, Time: 0.0341 Steps: 37800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002216, Sample Num: 35456, Cur Loss: 0.00000002, Cur Avg Loss: 0.01276037, Log Avg loss: 0.00385576, Global Avg Loss: 0.03508548, Time: 0.0783 Steps: 38000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002416, Sample Num: 38656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01356618, Log Avg loss: 0.02249453, Global Avg Loss: 0.03501956, Time: 0.0386 Steps: 38200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002616, Sample Num: 41856, Cur Loss: 0.00000089, Cur Avg Loss: 0.01398639, Log Avg loss: 0.01906254, Global Avg Loss: 0.03493645, Time: 0.0355 Steps: 38400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002816, Sample Num: 45056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01888885, Log Avg loss: 0.08301303, Global Avg Loss: 0.03518555, Time: 0.0538 Steps: 38600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003016, Sample Num: 48256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01767096, Log Avg loss: 0.00052308, Global Avg Loss: 0.03500688, Time: 0.0684 Steps: 38800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003216, Sample Num: 51456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01657241, Log Avg loss: 0.00000633, Global Avg Loss: 0.03482739, Time: 0.0413 Steps: 39000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003416, Sample Num: 54656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01569375, Log Avg loss: 0.00156487, Global Avg Loss: 0.03465768, Time: 0.0359 Steps: 39200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003616, Sample Num: 57856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01526599, Log Avg loss: 0.00795985, Global Avg Loss: 0.03452216, Time: 0.0328 Steps: 39400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003816, Sample Num: 61056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01453936, Log Avg loss: 0.00140192, Global Avg Loss: 0.03435488, Time: 0.0418 Steps: 39600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004016, Sample Num: 64256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01381546, Log Avg loss: 0.00000332, Global Avg Loss: 0.03418226, Time: 0.0136 Steps: 39800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004216, Sample Num: 67456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01320167, Log Avg loss: 0.00087693, Global Avg Loss: 0.03401574, Time: 0.0279 Steps: 40000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004416, Sample Num: 70656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01264370, Log Avg loss: 0.00088159, Global Avg Loss: 0.03385089, Time: 0.1208 Steps: 40200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 004616, Sample Num: 73856, Cur Loss: 0.00000002, Cur Avg Loss: 0.01209598, Log Avg loss: 0.00000236, Global Avg Loss: 0.03368332, Time: 0.0648 Steps: 40400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 004816, Sample Num: 77056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01170168, Log Avg loss: 0.00260129, Global Avg Loss: 0.03353021, Time: 0.0510 Steps: 40600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005016, Sample Num: 80256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01134743, Log Avg loss: 0.00281707, Global Avg Loss: 0.03337965, Time: 0.0567 Steps: 40800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005216, Sample Num: 83456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01091477, Log Avg loss: 0.00006349, Global Avg Loss: 0.03321714, Time: 0.0217 Steps: 41000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005416, Sample Num: 86656, Cur Loss: 0.00000042, Cur Avg Loss: 0.01070162, Log Avg loss: 0.00514269, Global Avg Loss: 0.03308085, Time: 0.0288 Steps: 41200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005616, Sample Num: 89856, Cur Loss: 0.00000001, Cur Avg Loss: 0.01032052, Log Avg loss: 0.00000045, Global Avg Loss: 0.03292104, Time: 0.0492 Steps: 41400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 005816, Sample Num: 93056, Cur Loss: 0.00000001, Cur Avg Loss: 0.00996578, Log Avg loss: 0.00000452, Global Avg Loss: 0.03276279, Time: 0.0918 Steps: 41600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006016, Sample Num: 96256, Cur Loss: 0.00000000, Cur Avg Loss: 0.00963447, Log Avg loss: 0.00000007, Global Avg Loss: 0.03260603, Time: 0.0286 Steps: 41800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006216, Sample Num: 99456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01201895, Log Avg loss: 0.08374413, Global Avg Loss: 0.03284955, Time: 0.0250 Steps: 42000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006416, Sample Num: 102656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01165377, Log Avg loss: 0.00030384, Global Avg Loss: 0.03269530, Time: 0.0218 Steps: 42200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006616, Sample Num: 105856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01133345, Log Avg loss: 0.00105774, Global Avg Loss: 0.03254607, Time: 0.1102 Steps: 42400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006816, Sample Num: 109056, Cur Loss: 0.00000024, Cur Avg Loss: 0.01100625, Log Avg loss: 0.00018258, Global Avg Loss: 0.03239413, Time: 0.0197 Steps: 42600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007016, Sample Num: 112256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01069281, Log Avg loss: 0.00001068, Global Avg Loss: 0.03224280, Time: 0.0180 Steps: 42800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007216, Sample Num: 115456, Cur Loss: 0.00000024, Cur Avg Loss: 0.01190524, Log Avg loss: 0.05443731, Global Avg Loss: 0.03234603, Time: 0.0265 Steps: 43000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007416, Sample Num: 118656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01158475, Log Avg loss: 0.00002137, Global Avg Loss: 0.03219638, Time: 0.0557 Steps: 43200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007616, Sample Num: 121856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01128106, Log Avg loss: 0.00002028, Global Avg Loss: 0.03204810, Time: 0.0600 Steps: 43400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007816, Sample Num: 125056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01099242, Log Avg loss: 0.00000095, Global Avg Loss: 0.03190110, Time: 0.0347 Steps: 43600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 008016, Sample Num: 128256, Cur Loss: 0.00000072, Cur Avg Loss: 0.01078667, Log Avg loss: 0.00274614, Global Avg Loss: 0.03176797, Time: 0.0419 Steps: 43800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008216, Sample Num: 131456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01059277, Log Avg loss: 0.00282116, Global Avg Loss: 0.03163640, Time: 0.0343 Steps: 44000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008416, Sample Num: 134656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01237417, Log Avg loss: 0.08555414, Global Avg Loss: 0.03188037, Time: 0.0416 Steps: 44200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008616, Sample Num: 137856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01208723, Log Avg loss: 0.00001266, Global Avg Loss: 0.03173682, Time: 0.0603 Steps: 44400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008816, Sample Num: 141056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01182756, Log Avg loss: 0.00064086, Global Avg Loss: 0.03159738, Time: 0.0332 Steps: 44600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 009016, Sample Num: 144256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01156521, Log Avg loss: 0.00000095, Global Avg Loss: 0.03145632, Time: 0.0245 Steps: 44800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 009216, Sample Num: 147456, Cur Loss: 0.00000001, Cur Avg Loss: 0.01204830, Log Avg loss: 0.03382600, Global Avg Loss: 0.03146685, Time: 0.0335 Steps: 45000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009416, Sample Num: 150656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01603942, Log Avg loss: 0.19995023, Global Avg Loss: 0.03221235, Time: 0.0197 Steps: 45200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009616, Sample Num: 153856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01571219, Log Avg loss: 0.00030641, Global Avg Loss: 0.03207180, Time: 0.0361 Steps: 45400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009816, Sample Num: 157056, Cur Loss: 0.00004601, Cur Avg Loss: 0.01731069, Log Avg loss: 0.09416631, Global Avg Loss: 0.03234414, Time: 0.0363 Steps: 45600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010016, Sample Num: 160256, Cur Loss: 0.00115058, Cur Avg Loss: 0.01730147, Log Avg loss: 0.01684901, Global Avg Loss: 0.03227648, Time: 0.0497 Steps: 45800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010216, Sample Num: 163456, Cur Loss: 0.00003016, Cur Avg Loss: 0.01698701, Log Avg loss: 0.00123877, Global Avg Loss: 0.03214153, Time: 0.0201 Steps: 46000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010416, Sample Num: 166656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01666184, Log Avg loss: 0.00005221, Global Avg Loss: 0.03200262, Time: 0.0324 Steps: 46200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 010616, Sample Num: 169856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01636636, Log Avg loss: 0.00097791, Global Avg Loss: 0.03186889, Time: 0.0214 Steps: 46400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 010816, Sample Num: 173056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01606375, Log Avg loss: 0.00000134, Global Avg Loss: 0.03173212, Time: 0.0200 Steps: 46600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011016, Sample Num: 176256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01577213, Log Avg loss: 0.00000121, Global Avg Loss: 0.03159652, Time: 0.0484 Steps: 46800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011216, Sample Num: 179456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01551801, Log Avg loss: 0.00152109, Global Avg Loss: 0.03146854, Time: 0.0433 Steps: 47000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011416, Sample Num: 182656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01524616, Log Avg loss: 0.00000087, Global Avg Loss: 0.03133520, Time: 0.0197 Steps: 47200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011616, Sample Num: 185856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01498367, Log Avg loss: 0.00000043, Global Avg Loss: 0.03120298, Time: 0.0540 Steps: 47400, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 011816, Sample Num: 189056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01490257, Log Avg loss: 0.01019269, Global Avg Loss: 0.03111471, Time: 0.0328 Steps: 47600, Updated lr: 0.000060 ***** Running evaluation checkpoint-47712 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-47712 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 523.718020, Avg time per batch (s): 0.040000 {"eval_avg_loss": 0.021571, "eval_total_loss": 30.048925, "eval_acc": 0.999596, "eval_prec": 0.990758, "eval_recall": 0.992593, "eval_f1": 0.991674, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999901, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 4, "tp": 536}, "eval_mcc2": 0.991468, "eval_mcc": 0.991468, "eval_sn": 0.992593, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.011616, "test_total_loss": 16.181106, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 6.010077258985556e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.031056675100680022, "train_cur_epoch_loss": 176.80488499044466, "train_cur_epoch_avg_loss": 0.0148226764747187, "train_cur_epoch_time": 523.7180197238922, "train_cur_epoch_avg_time": 0.04390660795807279, "epoch": 4, "step": 47712} ################################################## Training, Epoch: 0005, Batch: 000088, Sample Num: 1408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000086, Log Avg loss: 0.00358069, Global Avg Loss: 0.03099950, Time: 0.0392 Steps: 47800, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000288, Sample Num: 4608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00004606, Log Avg loss: 0.00006595, Global Avg Loss: 0.03087061, Time: 0.0281 Steps: 48000, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000488, Sample Num: 7808, Cur Loss: 0.00000012, Cur Avg Loss: 0.00005021, Log Avg loss: 0.00005619, Global Avg Loss: 0.03074275, Time: 0.0273 Steps: 48200, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000688, Sample Num: 11008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00003593, Log Avg loss: 0.00000107, Global Avg Loss: 0.03061572, Time: 0.0266 Steps: 48400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000888, Sample Num: 14208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00023346, Log Avg loss: 0.00091295, Global Avg Loss: 0.03049349, Time: 0.0232 Steps: 48600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001088, Sample Num: 17408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01931479, Log Avg loss: 0.10403589, Global Avg Loss: 0.03079489, Time: 0.0192 Steps: 48800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001288, Sample Num: 20608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01631572, Log Avg loss: 0.00000081, Global Avg Loss: 0.03066920, Time: 0.0311 Steps: 49000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001488, Sample Num: 23808, Cur Loss: 0.00000002, Cur Avg Loss: 0.01417992, Log Avg loss: 0.00042533, Global Avg Loss: 0.03054626, Time: 0.0670 Steps: 49200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001688, Sample Num: 27008, Cur Loss: 0.00000274, Cur Avg Loss: 0.01271235, Log Avg loss: 0.00179368, Global Avg Loss: 0.03042985, Time: 0.0288 Steps: 49400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001888, Sample Num: 30208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01136577, Log Avg loss: 0.00000058, Global Avg Loss: 0.03030715, Time: 0.0818 Steps: 49600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002088, Sample Num: 33408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01135230, Log Avg loss: 0.01122519, Global Avg Loss: 0.03023052, Time: 0.0579 Steps: 49800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002288, Sample Num: 36608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01282224, Log Avg loss: 0.02816846, Global Avg Loss: 0.03022227, Time: 0.0593 Steps: 50000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002488, Sample Num: 39808, Cur Loss: 0.00001083, Cur Avg Loss: 0.01179178, Log Avg loss: 0.00000325, Global Avg Loss: 0.03010187, Time: 0.1119 Steps: 50200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002688, Sample Num: 43008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01870895, Log Avg loss: 0.10475849, Global Avg Loss: 0.03039813, Time: 0.0314 Steps: 50400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002888, Sample Num: 46208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01743521, Log Avg loss: 0.00031615, Global Avg Loss: 0.03027923, Time: 0.0335 Steps: 50600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003088, Sample Num: 49408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01630603, Log Avg loss: 0.00000067, Global Avg Loss: 0.03016002, Time: 0.0298 Steps: 50800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003288, Sample Num: 52608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01537328, Log Avg loss: 0.00097171, Global Avg Loss: 0.03004556, Time: 0.0406 Steps: 51000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003488, Sample Num: 55808, Cur Loss: 0.00000001, Cur Avg Loss: 0.01449291, Log Avg loss: 0.00001951, Global Avg Loss: 0.02992827, Time: 0.0557 Steps: 51200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003688, Sample Num: 59008, Cur Loss: 0.00000270, Cur Avg Loss: 0.01501604, Log Avg loss: 0.02413953, Global Avg Loss: 0.02990574, Time: 0.0596 Steps: 51400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003888, Sample Num: 62208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01426938, Log Avg loss: 0.00050093, Global Avg Loss: 0.02979177, Time: 0.0220 Steps: 51600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 004088, Sample Num: 65408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01357143, Log Avg loss: 0.00000337, Global Avg Loss: 0.02967676, Time: 0.0374 Steps: 51800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 004288, Sample Num: 68608, Cur Loss: 0.00000006, Cur Avg Loss: 0.01301702, Log Avg loss: 0.00168489, Global Avg Loss: 0.02956910, Time: 0.1259 Steps: 52000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 004488, Sample Num: 71808, Cur Loss: 0.00002718, Cur Avg Loss: 0.01246568, Log Avg loss: 0.00064495, Global Avg Loss: 0.02945828, Time: 0.0541 Steps: 52200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 004688, Sample Num: 75008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01193390, Log Avg loss: 0.00000071, Global Avg Loss: 0.02934584, Time: 0.0782 Steps: 52400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 004888, Sample Num: 78208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01155847, Log Avg loss: 0.00275831, Global Avg Loss: 0.02924475, Time: 0.0733 Steps: 52600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 005088, Sample Num: 81408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01110551, Log Avg loss: 0.00003514, Global Avg Loss: 0.02913411, Time: 0.0308 Steps: 52800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 005288, Sample Num: 84608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01068562, Log Avg loss: 0.00000376, Global Avg Loss: 0.02902418, Time: 0.0173 Steps: 53000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 005488, Sample Num: 87808, Cur Loss: 0.00000012, Cur Avg Loss: 0.01050076, Log Avg loss: 0.00561289, Global Avg Loss: 0.02893617, Time: 0.0390 Steps: 53200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 005688, Sample Num: 91008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01013154, Log Avg loss: 0.00000013, Global Avg Loss: 0.02882780, Time: 0.0472 Steps: 53400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 005888, Sample Num: 94208, Cur Loss: 0.00000012, Cur Avg Loss: 0.00978742, Log Avg loss: 0.00000081, Global Avg Loss: 0.02872023, Time: 0.0308 Steps: 53600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006088, Sample Num: 97408, Cur Loss: 0.02033927, Cur Avg Loss: 0.00946925, Log Avg loss: 0.00010232, Global Avg Loss: 0.02861385, Time: 0.1116 Steps: 53800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006288, Sample Num: 100608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01138029, Log Avg loss: 0.06955218, Global Avg Loss: 0.02876547, Time: 0.0625 Steps: 54000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006488, Sample Num: 103808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01105655, Log Avg loss: 0.00087820, Global Avg Loss: 0.02866256, Time: 0.0655 Steps: 54200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006688, Sample Num: 107008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01072762, Log Avg loss: 0.00005714, Global Avg Loss: 0.02855740, Time: 0.0288 Steps: 54400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 006888, Sample Num: 110208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01041881, Log Avg loss: 0.00009212, Global Avg Loss: 0.02845313, Time: 0.0226 Steps: 54600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007088, Sample Num: 113408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01185792, Log Avg loss: 0.06142095, Global Avg Loss: 0.02857345, Time: 0.0388 Steps: 54800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007288, Sample Num: 116608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01160072, Log Avg loss: 0.00248553, Global Avg Loss: 0.02847858, Time: 0.0285 Steps: 55000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007488, Sample Num: 119808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01129087, Log Avg loss: 0.00000018, Global Avg Loss: 0.02837540, Time: 0.0410 Steps: 55200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007688, Sample Num: 123008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01099794, Log Avg loss: 0.00003054, Global Avg Loss: 0.02827307, Time: 0.0315 Steps: 55400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007888, Sample Num: 126208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01071912, Log Avg loss: 0.00000114, Global Avg Loss: 0.02817138, Time: 0.0223 Steps: 55600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008088, Sample Num: 129408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01055730, Log Avg loss: 0.00417541, Global Avg Loss: 0.02808537, Time: 0.0198 Steps: 55800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008288, Sample Num: 132608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01030991, Log Avg loss: 0.00030511, Global Avg Loss: 0.02798615, Time: 0.0344 Steps: 56000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008488, Sample Num: 135808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01186075, Log Avg loss: 0.07612781, Global Avg Loss: 0.02815748, Time: 0.0271 Steps: 56200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008688, Sample Num: 139008, Cur Loss: 0.00000083, Cur Avg Loss: 0.01158840, Log Avg loss: 0.00002973, Global Avg Loss: 0.02805773, Time: 0.0298 Steps: 56400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008888, Sample Num: 142208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01132824, Log Avg loss: 0.00002676, Global Avg Loss: 0.02795868, Time: 0.0298 Steps: 56600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 009088, Sample Num: 145408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01107897, Log Avg loss: 0.00000162, Global Avg Loss: 0.02786024, Time: 0.0529 Steps: 56800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009288, Sample Num: 148608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01177966, Log Avg loss: 0.04361881, Global Avg Loss: 0.02791554, Time: 0.0381 Steps: 57000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009488, Sample Num: 151808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01556244, Log Avg loss: 0.19123475, Global Avg Loss: 0.02848658, Time: 0.0551 Steps: 57200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009688, Sample Num: 155008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01525348, Log Avg loss: 0.00059645, Global Avg Loss: 0.02838940, Time: 0.0336 Steps: 57400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009888, Sample Num: 158208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01607502, Log Avg loss: 0.05587044, Global Avg Loss: 0.02848482, Time: 0.0605 Steps: 57600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 010088, Sample Num: 161408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01600983, Log Avg loss: 0.01278716, Global Avg Loss: 0.02843051, Time: 0.0190 Steps: 57800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 010288, Sample Num: 164608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01570282, Log Avg loss: 0.00021725, Global Avg Loss: 0.02833322, Time: 0.0223 Steps: 58000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 010488, Sample Num: 167808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01540354, Log Avg loss: 0.00000827, Global Avg Loss: 0.02823588, Time: 0.0272 Steps: 58200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 010688, Sample Num: 171008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01512933, Log Avg loss: 0.00074999, Global Avg Loss: 0.02814175, Time: 0.0815 Steps: 58400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 010888, Sample Num: 174208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01485144, Log Avg loss: 0.00000065, Global Avg Loss: 0.02804571, Time: 0.0292 Steps: 58600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 011088, Sample Num: 177408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01460532, Log Avg loss: 0.00120703, Global Avg Loss: 0.02795442, Time: 0.0240 Steps: 58800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 011288, Sample Num: 180608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01434886, Log Avg loss: 0.00013019, Global Avg Loss: 0.02786010, Time: 0.0479 Steps: 59000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 011488, Sample Num: 183808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01409907, Log Avg loss: 0.00000127, Global Avg Loss: 0.02776598, Time: 0.0274 Steps: 59200, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 011688, Sample Num: 187008, Cur Loss: 0.00000001, Cur Avg Loss: 0.01407344, Log Avg loss: 0.01260134, Global Avg Loss: 0.02771492, Time: 0.0696 Steps: 59400, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 011888, Sample Num: 190208, Cur Loss: 0.00000003, Cur Avg Loss: 0.01395605, Log Avg loss: 0.00709570, Global Avg Loss: 0.02764573, Time: 0.0601 Steps: 59600, Updated lr: 0.000050 ***** Running evaluation checkpoint-59640 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-59640 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 523.133146, Avg time per batch (s): 0.040000 {"eval_avg_loss": 0.024515, "eval_total_loss": 34.149055, "eval_acc": 0.999596, "eval_prec": 0.990758, "eval_recall": 0.992593, "eval_f1": 0.991674, "eval_roc_auc": 0.999997, "eval_pr_auc": 0.999894, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 4, "tp": 536}, "eval_mcc2": 0.991468, "eval_mcc": 0.991468, "eval_sn": 0.992593, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.014005, "test_total_loss": 19.508987, "test_acc": 0.999865, "test_prec": 0.998145, "test_recall": 0.996296, "test_f1": 0.99722, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21743, "fp": 1, "fn": 2, "tp": 538}, "test_mcc2": 0.997151, "test_mcc": 0.997151, "test_sn": 0.996296, "test_sp": 0.999954, "lr": 5.008397715821297e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.027627190203302576, "train_cur_epoch_loss": 165.9095413213179, "train_cur_epoch_avg_loss": 0.01390925061379258, "train_cur_epoch_time": 523.1331458091736, "train_cur_epoch_avg_time": 0.04385757426300919, "epoch": 5, "step": 59640} ################################################## Training, Epoch: 0006, Batch: 000160, Sample Num: 2560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00007754, Log Avg loss: 0.00006203, Global Avg Loss: 0.02755348, Time: 0.0611 Steps: 59800, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000360, Sample Num: 5760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00003483, Log Avg loss: 0.00000066, Global Avg Loss: 0.02746164, Time: 0.1582 Steps: 60000, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00003863, Log Avg loss: 0.00004548, Global Avg Loss: 0.02737055, Time: 0.0908 Steps: 60200, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000760, Sample Num: 12160, Cur Loss: 0.00000001, Cur Avg Loss: 0.00002855, Log Avg loss: 0.00000033, Global Avg Loss: 0.02727992, Time: 0.1612 Steps: 60400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 000960, Sample Num: 15360, Cur Loss: 0.00000000, Cur Avg Loss: 0.02613660, Log Avg loss: 0.12534718, Global Avg Loss: 0.02760358, Time: 0.0217 Steps: 60600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001160, Sample Num: 18560, Cur Loss: 0.00000000, Cur Avg Loss: 0.02173439, Log Avg loss: 0.00060381, Global Avg Loss: 0.02751476, Time: 0.0674 Steps: 60800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01853871, Log Avg loss: 0.00000377, Global Avg Loss: 0.02742456, Time: 0.1359 Steps: 61000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001560, Sample Num: 24960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01616805, Log Avg loss: 0.00004753, Global Avg Loss: 0.02733509, Time: 0.0266 Steps: 61200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001760, Sample Num: 28160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01457763, Log Avg loss: 0.00217238, Global Avg Loss: 0.02725313, Time: 0.0372 Steps: 61400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001960, Sample Num: 31360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01309041, Log Avg loss: 0.00000283, Global Avg Loss: 0.02716466, Time: 0.0283 Steps: 61600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002160, Sample Num: 34560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01366219, Log Avg loss: 0.01926567, Global Avg Loss: 0.02713909, Time: 0.0810 Steps: 61800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01290364, Log Avg loss: 0.00471123, Global Avg Loss: 0.02706675, Time: 0.0316 Steps: 62000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002560, Sample Num: 40960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01708516, Log Avg loss: 0.06642711, Global Avg Loss: 0.02719331, Time: 0.0520 Steps: 62200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002760, Sample Num: 44160, Cur Loss: 0.00000000, Cur Avg Loss: 0.02160797, Log Avg loss: 0.07949994, Global Avg Loss: 0.02736096, Time: 0.0343 Steps: 62400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002960, Sample Num: 47360, Cur Loss: 0.00000000, Cur Avg Loss: 0.02015168, Log Avg loss: 0.00005486, Global Avg Loss: 0.02727372, Time: 0.0286 Steps: 62600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01887638, Log Avg loss: 0.00000192, Global Avg Loss: 0.02718686, Time: 0.0584 Steps: 62800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003360, Sample Num: 53760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01778753, Log Avg loss: 0.00058377, Global Avg Loss: 0.02710241, Time: 0.0462 Steps: 63000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003560, Sample Num: 56960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01973251, Log Avg loss: 0.05240814, Global Avg Loss: 0.02718249, Time: 0.0427 Steps: 63200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003760, Sample Num: 60160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01873586, Log Avg loss: 0.00099547, Global Avg Loss: 0.02709988, Time: 0.0431 Steps: 63400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003960, Sample Num: 63360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01778987, Log Avg loss: 0.00000540, Global Avg Loss: 0.02701468, Time: 0.1488 Steps: 63600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01693622, Log Avg loss: 0.00003387, Global Avg Loss: 0.02693010, Time: 0.0568 Steps: 63800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 004360, Sample Num: 69760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01617235, Log Avg loss: 0.00028391, Global Avg Loss: 0.02684683, Time: 0.0491 Steps: 64000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 004560, Sample Num: 72960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01546313, Log Avg loss: 0.00000197, Global Avg Loss: 0.02676320, Time: 0.0363 Steps: 64200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 004760, Sample Num: 76160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01488634, Log Avg loss: 0.00173558, Global Avg Loss: 0.02668548, Time: 0.0193 Steps: 64400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 004960, Sample Num: 79360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01429931, Log Avg loss: 0.00032797, Global Avg Loss: 0.02660387, Time: 0.0650 Steps: 64600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 005160, Sample Num: 82560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01374537, Log Avg loss: 0.00000779, Global Avg Loss: 0.02652179, Time: 0.0253 Steps: 64800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 005360, Sample Num: 85760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01323251, Log Avg loss: 0.00000072, Global Avg Loss: 0.02644018, Time: 0.1486 Steps: 65000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01288118, Log Avg loss: 0.00346535, Global Avg Loss: 0.02636971, Time: 0.0555 Steps: 65200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 005760, Sample Num: 92160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01243398, Log Avg loss: 0.00000205, Global Avg Loss: 0.02628907, Time: 0.0393 Steps: 65400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01201674, Log Avg loss: 0.00000006, Global Avg Loss: 0.02620892, Time: 0.0270 Steps: 65600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006160, Sample Num: 98560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01317641, Log Avg loss: 0.04773467, Global Avg Loss: 0.02627435, Time: 0.0515 Steps: 65800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006360, Sample Num: 101760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01276211, Log Avg loss: 0.00000151, Global Avg Loss: 0.02619474, Time: 0.0423 Steps: 66000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006560, Sample Num: 104960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01240573, Log Avg loss: 0.00107293, Global Avg Loss: 0.02611884, Time: 0.0848 Steps: 66200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006760, Sample Num: 108160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01203904, Log Avg loss: 0.00001152, Global Avg Loss: 0.02604020, Time: 0.0517 Steps: 66400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 006960, Sample Num: 111360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01169431, Log Avg loss: 0.00004261, Global Avg Loss: 0.02596213, Time: 0.0585 Steps: 66600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007160, Sample Num: 114560, Cur Loss: 0.00000060, Cur Avg Loss: 0.01342526, Log Avg loss: 0.07366241, Global Avg Loss: 0.02610495, Time: 0.0330 Steps: 66800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007360, Sample Num: 117760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01313602, Log Avg loss: 0.00278114, Global Avg Loss: 0.02603532, Time: 0.0437 Steps: 67000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007560, Sample Num: 120960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01278852, Log Avg loss: 0.00000034, Global Avg Loss: 0.02595784, Time: 0.0379 Steps: 67200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01246013, Log Avg loss: 0.00004728, Global Avg Loss: 0.02588095, Time: 0.0819 Steps: 67400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007960, Sample Num: 127360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01220782, Log Avg loss: 0.00241805, Global Avg Loss: 0.02581154, Time: 0.0216 Steps: 67600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01194688, Log Avg loss: 0.00156148, Global Avg Loss: 0.02574000, Time: 0.0283 Steps: 67800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01166399, Log Avg loss: 0.00012202, Global Avg Loss: 0.02566466, Time: 0.0405 Steps: 68000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008560, Sample Num: 136960, Cur Loss: 0.00000060, Cur Avg Loss: 0.01327682, Log Avg loss: 0.08069307, Global Avg Loss: 0.02582603, Time: 0.0765 Steps: 68200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008760, Sample Num: 140160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01297378, Log Avg loss: 0.00000358, Global Avg Loss: 0.02575052, Time: 0.1151 Steps: 68400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008960, Sample Num: 143360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01268421, Log Avg loss: 0.00000116, Global Avg Loss: 0.02567545, Time: 0.0563 Steps: 68600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 009160, Sample Num: 146560, Cur Loss: 0.00000012, Cur Avg Loss: 0.01240747, Log Avg loss: 0.00000948, Global Avg Loss: 0.02560084, Time: 0.0839 Steps: 68800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01629597, Log Avg loss: 0.19438940, Global Avg Loss: 0.02609009, Time: 0.0287 Steps: 69000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009560, Sample Num: 152960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01595696, Log Avg loss: 0.00009116, Global Avg Loss: 0.02601494, Time: 0.0459 Steps: 69200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009760, Sample Num: 156160, Cur Loss: 0.00000030, Cur Avg Loss: 0.01564020, Log Avg loss: 0.00049907, Global Avg Loss: 0.02594141, Time: 0.0735 Steps: 69400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009960, Sample Num: 159360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01642949, Log Avg loss: 0.05494684, Global Avg Loss: 0.02602476, Time: 0.0585 Steps: 69600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 010160, Sample Num: 162560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01639856, Log Avg loss: 0.01485813, Global Avg Loss: 0.02599276, Time: 0.0282 Steps: 69800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01608371, Log Avg loss: 0.00008941, Global Avg Loss: 0.02591875, Time: 0.0987 Steps: 70000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 010560, Sample Num: 168960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01577913, Log Avg loss: 0.00000215, Global Avg Loss: 0.02584492, Time: 0.0358 Steps: 70200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 010760, Sample Num: 172160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01549811, Log Avg loss: 0.00066028, Global Avg Loss: 0.02577337, Time: 0.0863 Steps: 70400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 010960, Sample Num: 175360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01521531, Log Avg loss: 0.00000038, Global Avg Loss: 0.02570036, Time: 0.1690 Steps: 70600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01496644, Log Avg loss: 0.00132870, Global Avg Loss: 0.02563151, Time: 0.0358 Steps: 70800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 011360, Sample Num: 181760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01470397, Log Avg loss: 0.00005818, Global Avg Loss: 0.02555948, Time: 0.0315 Steps: 71000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01444960, Log Avg loss: 0.00000102, Global Avg Loss: 0.02548768, Time: 0.0207 Steps: 71200, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01425091, Log Avg loss: 0.00276689, Global Avg Loss: 0.02542404, Time: 0.0666 Steps: 71400, Updated lr: 0.000040 ***** Running evaluation checkpoint-71568 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-71568 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 763.506907, Avg time per batch (s): 0.060000 {"eval_avg_loss": 0.025801, "eval_total_loss": 35.94122, "eval_acc": 0.999596, "eval_prec": 0.990758, "eval_recall": 0.992593, "eval_f1": 0.991674, "eval_roc_auc": 0.999997, "eval_pr_auc": 0.999868, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 4, "tp": 536}, "eval_mcc2": 0.991468, "eval_mcc": 0.991468, "eval_sn": 0.992593, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.015283, "test_total_loss": 21.289718, "test_acc": 0.999865, "test_prec": 0.998145, "test_recall": 0.996296, "test_f1": 0.99722, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21743, "fp": 1, "fn": 2, "tp": 538}, "test_mcc2": 0.997151, "test_mcc": 0.997151, "test_sn": 0.996296, "test_sp": 0.999954, "lr": 4.006718172657038e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.02537767537122373, "train_cur_epoch_loss": 168.54384724277284, "train_cur_epoch_avg_loss": 0.014130101210829379, "train_cur_epoch_time": 763.5069069862366, "train_cur_epoch_avg_time": 0.06400963338248127, "epoch": 6, "step": 71568} ################################################## Training, Epoch: 0007, Batch: 000032, Sample Num: 512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000319, Log Avg loss: 0.00476613, Global Avg Loss: 0.02536633, Time: 0.0986 Steps: 71600, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000232, Sample Num: 3712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000874, Log Avg loss: 0.00000963, Global Avg Loss: 0.02529570, Time: 0.0291 Steps: 71800, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000432, Sample Num: 6912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000716, Log Avg loss: 0.00000532, Global Avg Loss: 0.02522545, Time: 0.1090 Steps: 72000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000632, Sample Num: 10112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000514, Log Avg loss: 0.00000078, Global Avg Loss: 0.02515558, Time: 0.1473 Steps: 72200, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000832, Sample Num: 13312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00002542, Log Avg loss: 0.00008951, Global Avg Loss: 0.02508633, Time: 0.0300 Steps: 72400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001032, Sample Num: 16512, Cur Loss: 0.00000000, Cur Avg Loss: 0.02223848, Log Avg loss: 0.11464482, Global Avg Loss: 0.02533305, Time: 0.0704 Steps: 72600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001232, Sample Num: 19712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01862846, Log Avg loss: 0.00000077, Global Avg Loss: 0.02526346, Time: 0.0695 Steps: 72800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001432, Sample Num: 22912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01602699, Log Avg loss: 0.00000191, Global Avg Loss: 0.02519425, Time: 0.0497 Steps: 73000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001632, Sample Num: 26112, Cur Loss: 0.00000060, Cur Avg Loss: 0.01432399, Log Avg loss: 0.00213054, Global Avg Loss: 0.02513123, Time: 0.0821 Steps: 73200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001832, Sample Num: 29312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01290234, Log Avg loss: 0.00130161, Global Avg Loss: 0.02506630, Time: 0.0578 Steps: 73400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002032, Sample Num: 32512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01163259, Log Avg loss: 0.00000169, Global Avg Loss: 0.02499819, Time: 0.0850 Steps: 73600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002232, Sample Num: 35712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01220662, Log Avg loss: 0.01803882, Global Avg Loss: 0.02497933, Time: 0.0368 Steps: 73800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002432, Sample Num: 38912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01151589, Log Avg loss: 0.00380737, Global Avg Loss: 0.02492211, Time: 0.0381 Steps: 74000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002632, Sample Num: 42112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01549364, Log Avg loss: 0.06386299, Global Avg Loss: 0.02502707, Time: 0.0265 Steps: 74200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002832, Sample Num: 45312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01770809, Log Avg loss: 0.04685026, Global Avg Loss: 0.02508574, Time: 0.0474 Steps: 74400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003032, Sample Num: 48512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01654002, Log Avg loss: 0.00000021, Global Avg Loss: 0.02501848, Time: 0.0695 Steps: 74600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003232, Sample Num: 51712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01551653, Log Avg loss: 0.00000040, Global Avg Loss: 0.02495159, Time: 0.0755 Steps: 74800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 003432, Sample Num: 54912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01462770, Log Avg loss: 0.00026423, Global Avg Loss: 0.02488576, Time: 0.0393 Steps: 75000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 003632, Sample Num: 58112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01721693, Log Avg loss: 0.06164806, Global Avg Loss: 0.02498353, Time: 0.0884 Steps: 75200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 003832, Sample Num: 61312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01632318, Log Avg loss: 0.00009275, Global Avg Loss: 0.02491751, Time: 0.0434 Steps: 75400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 004032, Sample Num: 64512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01551362, Log Avg loss: 0.00000247, Global Avg Loss: 0.02485159, Time: 0.0613 Steps: 75600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 004232, Sample Num: 67712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01478100, Log Avg loss: 0.00001124, Global Avg Loss: 0.02478605, Time: 0.0346 Steps: 75800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 004432, Sample Num: 70912, Cur Loss: 0.00000003, Cur Avg Loss: 0.01411507, Log Avg loss: 0.00002412, Global Avg Loss: 0.02472089, Time: 0.0877 Steps: 76000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 004632, Sample Num: 74112, Cur Loss: 0.00000083, Cur Avg Loss: 0.01350564, Log Avg loss: 0.00000072, Global Avg Loss: 0.02465601, Time: 0.0887 Steps: 76200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 004832, Sample Num: 77312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01296746, Log Avg loss: 0.00050321, Global Avg Loss: 0.02459278, Time: 0.0662 Steps: 76400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005032, Sample Num: 80512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01247702, Log Avg loss: 0.00062787, Global Avg Loss: 0.02453021, Time: 0.0349 Steps: 76600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005232, Sample Num: 83712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01200017, Log Avg loss: 0.00000264, Global Avg Loss: 0.02446633, Time: 0.0241 Steps: 76800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005432, Sample Num: 86912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01157942, Log Avg loss: 0.00057272, Global Avg Loss: 0.02440427, Time: 0.0492 Steps: 77000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005632, Sample Num: 90112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01116823, Log Avg loss: 0.00000019, Global Avg Loss: 0.02434105, Time: 0.0519 Steps: 77200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 005832, Sample Num: 93312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01078526, Log Avg loss: 0.00000092, Global Avg Loss: 0.02427815, Time: 0.0215 Steps: 77400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006032, Sample Num: 96512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01042766, Log Avg loss: 0.00000002, Global Avg Loss: 0.02421558, Time: 0.1327 Steps: 77600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006232, Sample Num: 99712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01214356, Log Avg loss: 0.06389499, Global Avg Loss: 0.02431759, Time: 0.0725 Steps: 77800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006432, Sample Num: 102912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01176948, Log Avg loss: 0.00011321, Global Avg Loss: 0.02425552, Time: 0.0514 Steps: 78000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006632, Sample Num: 106112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01141506, Log Avg loss: 0.00001705, Global Avg Loss: 0.02419353, Time: 0.0245 Steps: 78200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 006832, Sample Num: 109312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01108110, Log Avg loss: 0.00000684, Global Avg Loss: 0.02413183, Time: 0.0324 Steps: 78400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007032, Sample Num: 112512, Cur Loss: 14.60699272, Cur Avg Loss: 0.01284317, Log Avg loss: 0.07303554, Global Avg Loss: 0.02425627, Time: 0.1660 Steps: 78600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007232, Sample Num: 115712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01258608, Log Avg loss: 0.00354683, Global Avg Loss: 0.02420371, Time: 0.0904 Steps: 78800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007432, Sample Num: 118912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01224755, Log Avg loss: 0.00000631, Global Avg Loss: 0.02414245, Time: 0.0513 Steps: 79000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007632, Sample Num: 122112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01192843, Log Avg loss: 0.00006977, Global Avg Loss: 0.02408166, Time: 0.0385 Steps: 79200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007832, Sample Num: 125312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01162382, Log Avg loss: 0.00000010, Global Avg Loss: 0.02402100, Time: 0.0724 Steps: 79400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008032, Sample Num: 128512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01142093, Log Avg loss: 0.00347570, Global Avg Loss: 0.02396938, Time: 0.0350 Steps: 79600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008232, Sample Num: 131712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01114452, Log Avg loss: 0.00004379, Global Avg Loss: 0.02390941, Time: 0.0647 Steps: 79800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008432, Sample Num: 134912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01314153, Log Avg loss: 0.09533868, Global Avg Loss: 0.02408799, Time: 0.0252 Steps: 80000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008632, Sample Num: 138112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01283749, Log Avg loss: 0.00001879, Global Avg Loss: 0.02402796, Time: 0.0888 Steps: 80200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008832, Sample Num: 141312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01254680, Log Avg loss: 0.00000072, Global Avg Loss: 0.02396819, Time: 0.0572 Steps: 80400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 009032, Sample Num: 144512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01226897, Log Avg loss: 0.00000023, Global Avg Loss: 0.02390872, Time: 0.0265 Steps: 80600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009232, Sample Num: 147712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01254913, Log Avg loss: 0.02520088, Global Avg Loss: 0.02391192, Time: 0.0551 Steps: 80800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009432, Sample Num: 150912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01549407, Log Avg loss: 0.15143274, Global Avg Loss: 0.02422678, Time: 0.0463 Steps: 81000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009632, Sample Num: 154112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01517306, Log Avg loss: 0.00003408, Global Avg Loss: 0.02416720, Time: 0.0386 Steps: 81200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009832, Sample Num: 157312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01647385, Log Avg loss: 0.07912011, Global Avg Loss: 0.02430222, Time: 0.0449 Steps: 81400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 010032, Sample Num: 160512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01651028, Log Avg loss: 0.01830100, Global Avg Loss: 0.02428751, Time: 0.1642 Steps: 81600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 010232, Sample Num: 163712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01619419, Log Avg loss: 0.00033892, Global Avg Loss: 0.02422895, Time: 0.0397 Steps: 81800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 010432, Sample Num: 166912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01588488, Log Avg loss: 0.00006094, Global Avg Loss: 0.02417001, Time: 0.0301 Steps: 82000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 010632, Sample Num: 170112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01559235, Log Avg loss: 0.00033367, Global Avg Loss: 0.02411201, Time: 0.0426 Steps: 82200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 010832, Sample Num: 173312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01530446, Log Avg loss: 0.00000028, Global Avg Loss: 0.02405349, Time: 0.0657 Steps: 82400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 011032, Sample Num: 176512, Cur Loss: 0.00000004, Cur Avg Loss: 0.01502701, Log Avg loss: 0.00000036, Global Avg Loss: 0.02399525, Time: 0.0768 Steps: 82600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 011232, Sample Num: 179712, Cur Loss: 0.00000021, Cur Avg Loss: 0.01477298, Log Avg loss: 0.00076080, Global Avg Loss: 0.02393913, Time: 0.0605 Steps: 82800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 011432, Sample Num: 182912, Cur Loss: 0.00000018, Cur Avg Loss: 0.01451453, Log Avg loss: 0.00000007, Global Avg Loss: 0.02388144, Time: 0.0607 Steps: 83000, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 011632, Sample Num: 186112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01426566, Log Avg loss: 0.00004006, Global Avg Loss: 0.02382413, Time: 0.0647 Steps: 83200, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 011832, Sample Num: 189312, Cur Loss: 0.00000168, Cur Avg Loss: 0.01405973, Log Avg loss: 0.00208292, Global Avg Loss: 0.02377199, Time: 0.0848 Steps: 83400, Updated lr: 0.000030 ***** Running evaluation checkpoint-83496 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-83496 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 741.964852, Avg time per batch (s): 0.060000 {"eval_avg_loss": 0.025538, "eval_total_loss": 35.574091, "eval_acc": 0.999641, "eval_prec": 0.990775, "eval_recall": 0.994444, "eval_f1": 0.992606, "eval_roc_auc": 0.999997, "eval_pr_auc": 0.999868, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 3, "tp": 537}, "eval_mcc2": 0.992424, "eval_mcc": 0.992424, "eval_sn": 0.994444, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.014906, "test_total_loss": 20.763432, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 3.005038629492778e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.02374466087828356, "train_cur_epoch_loss": 166.35473372542924, "train_cur_epoch_avg_loss": 0.013946573920642961, "train_cur_epoch_time": 741.9648520946503, "train_cur_epoch_avg_time": 0.062203626097807704, "epoch": 7, "step": 83496} ################################################## Training, Epoch: 0008, Batch: 000104, Sample Num: 1664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000152, Log Avg loss: 0.00000080, Global Avg Loss: 0.02371512, Time: 0.0827 Steps: 83600, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000304, Sample Num: 4864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000781, Log Avg loss: 0.00001109, Global Avg Loss: 0.02365855, Time: 0.0460 Steps: 83800, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000504, Sample Num: 8064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000500, Log Avg loss: 0.00000073, Global Avg Loss: 0.02360222, Time: 0.0272 Steps: 84000, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000704, Sample Num: 11264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000368, Log Avg loss: 0.00000033, Global Avg Loss: 0.02354616, Time: 0.0378 Steps: 84200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 000904, Sample Num: 14464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00004812, Log Avg loss: 0.00020457, Global Avg Loss: 0.02349085, Time: 0.0292 Steps: 84400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001104, Sample Num: 17664, Cur Loss: 0.00000000, Cur Avg Loss: 0.02147391, Log Avg loss: 0.11831845, Global Avg Loss: 0.02371503, Time: 0.0488 Steps: 84600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001304, Sample Num: 20864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01818040, Log Avg loss: 0.00000025, Global Avg Loss: 0.02365910, Time: 0.0639 Steps: 84800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001504, Sample Num: 24064, Cur Loss: 0.00000001, Cur Avg Loss: 0.01576389, Log Avg loss: 0.00000821, Global Avg Loss: 0.02360345, Time: 0.0681 Steps: 85000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001704, Sample Num: 27264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01417257, Log Avg loss: 0.00220589, Global Avg Loss: 0.02355322, Time: 0.0625 Steps: 85200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001904, Sample Num: 30464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01268444, Log Avg loss: 0.00000556, Global Avg Loss: 0.02349807, Time: 0.0248 Steps: 85400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002104, Sample Num: 33664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01203088, Log Avg loss: 0.00580901, Global Avg Loss: 0.02345674, Time: 0.0592 Steps: 85600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002304, Sample Num: 36864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01162677, Log Avg loss: 0.00737555, Global Avg Loss: 0.02341926, Time: 0.0346 Steps: 85800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002504, Sample Num: 40064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01069824, Log Avg loss: 0.00000158, Global Avg Loss: 0.02336480, Time: 0.0552 Steps: 86000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002704, Sample Num: 43264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01951240, Log Avg loss: 0.12986558, Global Avg Loss: 0.02361190, Time: 0.0304 Steps: 86200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002904, Sample Num: 46464, Cur Loss: 0.00000131, Cur Avg Loss: 0.01817215, Log Avg loss: 0.00005208, Global Avg Loss: 0.02355736, Time: 0.0261 Steps: 86400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003104, Sample Num: 49664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01700128, Log Avg loss: 0.00000013, Global Avg Loss: 0.02350296, Time: 0.0304 Steps: 86600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003304, Sample Num: 52864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01600606, Log Avg loss: 0.00056030, Global Avg Loss: 0.02345009, Time: 0.0440 Steps: 86800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003504, Sample Num: 56064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01509321, Log Avg loss: 0.00001290, Global Avg Loss: 0.02339622, Time: 0.0353 Steps: 87000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003704, Sample Num: 59264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01608114, Log Avg loss: 0.03338964, Global Avg Loss: 0.02341914, Time: 0.0584 Steps: 87200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003904, Sample Num: 62464, Cur Loss: 0.00000088, Cur Avg Loss: 0.01525781, Log Avg loss: 0.00000985, Global Avg Loss: 0.02336557, Time: 0.1059 Steps: 87400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 004104, Sample Num: 65664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01451427, Log Avg loss: 0.00000025, Global Avg Loss: 0.02331222, Time: 0.0685 Steps: 87600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 004304, Sample Num: 68864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01384007, Log Avg loss: 0.00000564, Global Avg Loss: 0.02325913, Time: 0.0200 Steps: 87800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 004504, Sample Num: 72064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01322787, Log Avg loss: 0.00005324, Global Avg Loss: 0.02320639, Time: 0.0821 Steps: 88000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 004704, Sample Num: 75264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01266548, Log Avg loss: 0.00000053, Global Avg Loss: 0.02315377, Time: 0.0588 Steps: 88200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 004904, Sample Num: 78464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01218141, Log Avg loss: 0.00079608, Global Avg Loss: 0.02310319, Time: 0.0289 Steps: 88400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 005104, Sample Num: 81664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01170425, Log Avg loss: 0.00000416, Global Avg Loss: 0.02305105, Time: 0.0339 Steps: 88600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 005304, Sample Num: 84864, Cur Loss: 0.00000018, Cur Avg Loss: 0.01126292, Log Avg loss: 0.00000033, Global Avg Loss: 0.02299913, Time: 0.0300 Steps: 88800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 005504, Sample Num: 88064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01088842, Log Avg loss: 0.00095656, Global Avg Loss: 0.02294960, Time: 0.0404 Steps: 89000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 005704, Sample Num: 91264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01050664, Log Avg loss: 0.00000010, Global Avg Loss: 0.02289814, Time: 0.1107 Steps: 89200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 005904, Sample Num: 94464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01015077, Log Avg loss: 0.00000131, Global Avg Loss: 0.02284692, Time: 0.0430 Steps: 89400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006104, Sample Num: 97664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01193103, Log Avg loss: 0.06448435, Global Avg Loss: 0.02293986, Time: 0.0480 Steps: 89600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006304, Sample Num: 100864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01155260, Log Avg loss: 0.00000298, Global Avg Loss: 0.02288877, Time: 0.0198 Steps: 89800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006504, Sample Num: 104064, Cur Loss: 0.00000024, Cur Avg Loss: 0.01119873, Log Avg loss: 0.00004464, Global Avg Loss: 0.02283801, Time: 0.0310 Steps: 90000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006704, Sample Num: 107264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01086497, Log Avg loss: 0.00001123, Global Avg Loss: 0.02278739, Time: 0.0276 Steps: 90200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 006904, Sample Num: 110464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01055055, Log Avg loss: 0.00001107, Global Avg Loss: 0.02273700, Time: 0.0559 Steps: 90400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007104, Sample Num: 113664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01292399, Log Avg loss: 0.09485525, Global Avg Loss: 0.02289621, Time: 0.0360 Steps: 90600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007304, Sample Num: 116864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01262541, Log Avg loss: 0.00201975, Global Avg Loss: 0.02285022, Time: 0.0339 Steps: 90800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007504, Sample Num: 120064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01228894, Log Avg loss: 0.00000123, Global Avg Loss: 0.02280000, Time: 0.0436 Steps: 91000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007704, Sample Num: 123264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01197238, Log Avg loss: 0.00009502, Global Avg Loss: 0.02275021, Time: 0.0765 Steps: 91200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007904, Sample Num: 126464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01170950, Log Avg loss: 0.00158311, Global Avg Loss: 0.02270390, Time: 0.0404 Steps: 91400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008104, Sample Num: 129664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01144676, Log Avg loss: 0.00106340, Global Avg Loss: 0.02265665, Time: 0.0180 Steps: 91600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008304, Sample Num: 132864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01117151, Log Avg loss: 0.00001832, Global Avg Loss: 0.02260732, Time: 0.0251 Steps: 91800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008504, Sample Num: 136064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01281285, Log Avg loss: 0.08096119, Global Avg Loss: 0.02273418, Time: 0.0477 Steps: 92000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008704, Sample Num: 139264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01251844, Log Avg loss: 0.00000034, Global Avg Loss: 0.02268487, Time: 0.0524 Steps: 92200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008904, Sample Num: 142464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01223726, Log Avg loss: 0.00000005, Global Avg Loss: 0.02263577, Time: 0.0376 Steps: 92400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 009104, Sample Num: 145664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01196843, Log Avg loss: 0.00000010, Global Avg Loss: 0.02258688, Time: 0.0331 Steps: 92600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009304, Sample Num: 148864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01594319, Log Avg loss: 0.19687444, Global Avg Loss: 0.02296250, Time: 0.0335 Steps: 92800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009504, Sample Num: 152064, Cur Loss: 0.00000002, Cur Avg Loss: 0.01560933, Log Avg loss: 0.00007802, Global Avg Loss: 0.02291328, Time: 0.0701 Steps: 93000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009704, Sample Num: 155264, Cur Loss: 0.00000005, Cur Avg Loss: 0.01528764, Log Avg loss: 0.00000115, Global Avg Loss: 0.02286411, Time: 0.0784 Steps: 93200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009904, Sample Num: 158464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01537118, Log Avg loss: 0.01942468, Global Avg Loss: 0.02285675, Time: 0.0401 Steps: 93400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 010104, Sample Num: 161664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01517330, Log Avg loss: 0.00537399, Global Avg Loss: 0.02281939, Time: 0.0160 Steps: 93600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 010304, Sample Num: 164864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01490177, Log Avg loss: 0.00118422, Global Avg Loss: 0.02277326, Time: 0.0576 Steps: 93800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 010504, Sample Num: 168064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01461808, Log Avg loss: 0.00000231, Global Avg Loss: 0.02272481, Time: 0.0639 Steps: 94000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 010704, Sample Num: 171264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01434709, Log Avg loss: 0.00011476, Global Avg Loss: 0.02267681, Time: 0.0239 Steps: 94200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 010904, Sample Num: 174464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01408394, Log Avg loss: 0.00000022, Global Avg Loss: 0.02262877, Time: 0.0311 Steps: 94400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 011104, Sample Num: 177664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01383028, Log Avg loss: 0.00000048, Global Avg Loss: 0.02258093, Time: 0.0438 Steps: 94600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 011304, Sample Num: 180864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01359363, Log Avg loss: 0.00045478, Global Avg Loss: 0.02253425, Time: 0.0270 Steps: 94800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 011504, Sample Num: 184064, Cur Loss: 0.00000007, Cur Avg Loss: 0.01335730, Log Avg loss: 0.00000033, Global Avg Loss: 0.02248681, Time: 0.0550 Steps: 95000, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 011704, Sample Num: 187264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01313273, Log Avg loss: 0.00021512, Global Avg Loss: 0.02244002, Time: 0.0211 Steps: 95200, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 011904, Sample Num: 190464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01295799, Log Avg loss: 0.00273246, Global Avg Loss: 0.02239870, Time: 0.0273 Steps: 95400, Updated lr: 0.000020 ***** Running evaluation checkpoint-95424 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-95424 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 521.975824, Avg time per batch (s): 0.040000 {"eval_avg_loss": 0.025411, "eval_total_loss": 35.397831, "eval_acc": 0.999641, "eval_prec": 0.990775, "eval_recall": 0.994444, "eval_f1": 0.992606, "eval_roc_auc": 0.999997, "eval_pr_auc": 0.999868, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 3, "tp": 537}, "eval_mcc2": 0.992424, "eval_mcc": 0.992424, "eval_sn": 0.994444, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.015143, "test_total_loss": 21.093907, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 2.003359086328519e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.0223930681845784, "train_cur_epoch_loss": 154.25193375205745, "train_cur_epoch_avg_loss": 0.012931919328643314, "train_cur_epoch_time": 521.9758241176605, "train_cur_epoch_avg_time": 0.04376054863494806, "epoch": 8, "step": 95424} ################################################## Training, Epoch: 0009, Batch: 000176, Sample Num: 2816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00002754, Log Avg loss: 0.00002423, Global Avg Loss: 0.02235189, Time: 0.0409 Steps: 95600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000376, Sample Num: 6016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001384, Log Avg loss: 0.00000178, Global Avg Loss: 0.02230523, Time: 0.0264 Steps: 95800, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000576, Sample Num: 9216, Cur Loss: 0.00000006, Cur Avg Loss: 0.00000910, Log Avg loss: 0.00000019, Global Avg Loss: 0.02225876, Time: 0.0991 Steps: 96000, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000776, Sample Num: 12416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00010097, Log Avg loss: 0.00036555, Global Avg Loss: 0.02221325, Time: 0.0781 Steps: 96200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 000976, Sample Num: 15616, Cur Loss: 0.00000000, Cur Avg Loss: 0.02295754, Log Avg loss: 0.11164104, Global Avg Loss: 0.02239878, Time: 0.0277 Steps: 96400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001176, Sample Num: 18816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01912700, Log Avg loss: 0.00043399, Global Avg Loss: 0.02235331, Time: 0.0385 Steps: 96600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001376, Sample Num: 22016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01634708, Log Avg loss: 0.00000111, Global Avg Loss: 0.02230713, Time: 0.0302 Steps: 96800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001576, Sample Num: 25216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01434512, Log Avg loss: 0.00057163, Global Avg Loss: 0.02226231, Time: 0.0378 Steps: 97000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001776, Sample Num: 28416, Cur Loss: 0.00000000, Cur Avg Loss: 0.01275203, Log Avg loss: 0.00019852, Global Avg Loss: 0.02221691, Time: 0.1098 Steps: 97200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001976, Sample Num: 31616, Cur Loss: 0.00000000, Cur Avg Loss: 0.01146234, Log Avg loss: 0.00000992, Global Avg Loss: 0.02217131, Time: 0.0394 Steps: 97400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002176, Sample Num: 34816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01101676, Log Avg loss: 0.00661434, Global Avg Loss: 0.02213943, Time: 0.0329 Steps: 97600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002376, Sample Num: 38016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01017743, Log Avg loss: 0.00104555, Global Avg Loss: 0.02209630, Time: 0.0474 Steps: 97800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002576, Sample Num: 41216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01562791, Log Avg loss: 0.08037969, Global Avg Loss: 0.02221524, Time: 0.0264 Steps: 98000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002776, Sample Num: 44416, Cur Loss: 0.00000001, Cur Avg Loss: 0.01701923, Log Avg loss: 0.03493934, Global Avg Loss: 0.02224116, Time: 0.0358 Steps: 98200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002976, Sample Num: 47616, Cur Loss: 0.00000000, Cur Avg Loss: 0.01587587, Log Avg loss: 0.00000609, Global Avg Loss: 0.02219596, Time: 0.0298 Steps: 98400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003176, Sample Num: 50816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01487614, Log Avg loss: 0.00000010, Global Avg Loss: 0.02215094, Time: 0.0309 Steps: 98600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003376, Sample Num: 54016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01403896, Log Avg loss: 0.00074464, Global Avg Loss: 0.02210761, Time: 0.0251 Steps: 98800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003576, Sample Num: 57216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01553531, Log Avg loss: 0.04079359, Global Avg Loss: 0.02214536, Time: 0.0491 Steps: 99000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003776, Sample Num: 60416, Cur Loss: 0.00000000, Cur Avg Loss: 0.01471417, Log Avg loss: 0.00003216, Global Avg Loss: 0.02210077, Time: 0.0824 Steps: 99200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003976, Sample Num: 63616, Cur Loss: 0.00000000, Cur Avg Loss: 0.01397416, Log Avg loss: 0.00000283, Global Avg Loss: 0.02205631, Time: 0.0916 Steps: 99400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 004176, Sample Num: 66816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01330508, Log Avg loss: 0.00000379, Global Avg Loss: 0.02201203, Time: 0.0565 Steps: 99600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 004376, Sample Num: 70016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01269813, Log Avg loss: 0.00002505, Global Avg Loss: 0.02196797, Time: 0.0244 Steps: 99800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 004576, Sample Num: 73216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01214320, Log Avg loss: 0.00000141, Global Avg Loss: 0.02192403, Time: 0.0511 Steps: 100000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 004776, Sample Num: 76416, Cur Loss: 0.00000000, Cur Avg Loss: 0.01163858, Log Avg loss: 0.00009278, Global Avg Loss: 0.02188046, Time: 0.0244 Steps: 100200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 004976, Sample Num: 79616, Cur Loss: 0.00000000, Cur Avg Loss: 0.01118008, Log Avg loss: 0.00023114, Global Avg Loss: 0.02183733, Time: 0.0287 Steps: 100400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 005176, Sample Num: 82816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01074820, Log Avg loss: 0.00000290, Global Avg Loss: 0.02179392, Time: 0.0613 Steps: 100600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 005376, Sample Num: 86016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01034839, Log Avg loss: 0.00000131, Global Avg Loss: 0.02175069, Time: 0.0421 Steps: 100800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 005576, Sample Num: 89216, Cur Loss: 0.00000002, Cur Avg Loss: 0.01000706, Log Avg loss: 0.00083232, Global Avg Loss: 0.02170926, Time: 0.0673 Steps: 101000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 005776, Sample Num: 92416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00966058, Log Avg loss: 0.00000068, Global Avg Loss: 0.02166636, Time: 0.0599 Steps: 101200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 005976, Sample Num: 95616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00933727, Log Avg loss: 0.00000002, Global Avg Loss: 0.02162363, Time: 0.0214 Steps: 101400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006176, Sample Num: 98816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01034947, Log Avg loss: 0.04059389, Global Avg Loss: 0.02166097, Time: 0.0421 Steps: 101600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006376, Sample Num: 102016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01002613, Log Avg loss: 0.00004135, Global Avg Loss: 0.02161849, Time: 0.0289 Steps: 101800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006576, Sample Num: 105216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00972184, Log Avg loss: 0.00002112, Global Avg Loss: 0.02157615, Time: 0.0279 Steps: 102000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006776, Sample Num: 108416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00943537, Log Avg loss: 0.00001641, Global Avg Loss: 0.02153396, Time: 0.0251 Steps: 102200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 006976, Sample Num: 111616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00916498, Log Avg loss: 0.00000392, Global Avg Loss: 0.02149190, Time: 0.0421 Steps: 102400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007176, Sample Num: 114816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01052280, Log Avg loss: 0.05788378, Global Avg Loss: 0.02156284, Time: 0.0508 Steps: 102600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007376, Sample Num: 118016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01024520, Log Avg loss: 0.00028475, Global Avg Loss: 0.02152145, Time: 0.0365 Steps: 102800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007576, Sample Num: 121216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00997894, Log Avg loss: 0.00015927, Global Avg Loss: 0.02147997, Time: 0.0324 Steps: 103000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007776, Sample Num: 124416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00972229, Log Avg loss: 0.00000031, Global Avg Loss: 0.02143834, Time: 0.0256 Steps: 103200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007976, Sample Num: 127616, Cur Loss: 0.00000001, Cur Avg Loss: 0.00951378, Log Avg loss: 0.00140718, Global Avg Loss: 0.02139959, Time: 0.0426 Steps: 103400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008176, Sample Num: 130816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00929910, Log Avg loss: 0.00073745, Global Avg Loss: 0.02135971, Time: 0.0209 Steps: 103600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008376, Sample Num: 134016, Cur Loss: 0.00000004, Cur Avg Loss: 0.00913090, Log Avg loss: 0.00225502, Global Avg Loss: 0.02132290, Time: 0.1098 Steps: 103800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008576, Sample Num: 137216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01079027, Log Avg loss: 0.08028458, Global Avg Loss: 0.02143628, Time: 0.0474 Steps: 104000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008776, Sample Num: 140416, Cur Loss: 0.00000226, Cur Avg Loss: 0.01054437, Log Avg loss: 0.00000023, Global Avg Loss: 0.02139514, Time: 0.0383 Steps: 104200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008976, Sample Num: 143616, Cur Loss: 0.00000001, Cur Avg Loss: 0.01030943, Log Avg loss: 0.00000026, Global Avg Loss: 0.02135415, Time: 0.0950 Steps: 104400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009176, Sample Num: 146816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01008473, Log Avg loss: 0.00000037, Global Avg Loss: 0.02131332, Time: 0.0356 Steps: 104600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009376, Sample Num: 150016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01369775, Log Avg loss: 0.17946298, Global Avg Loss: 0.02161514, Time: 0.0375 Steps: 104800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009576, Sample Num: 153216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01341198, Log Avg loss: 0.00001526, Global Avg Loss: 0.02157399, Time: 0.0966 Steps: 105000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009776, Sample Num: 156416, Cur Loss: 0.00260542, Cur Avg Loss: 0.01316291, Log Avg loss: 0.00123742, Global Avg Loss: 0.02153533, Time: 0.0417 Steps: 105200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009976, Sample Num: 159616, Cur Loss: 0.00000000, Cur Avg Loss: 0.01317858, Log Avg loss: 0.01394425, Global Avg Loss: 0.02152093, Time: 0.0453 Steps: 105400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 010176, Sample Num: 162816, Cur Loss: 0.00000000, Cur Avg Loss: 0.01320954, Log Avg loss: 0.01475387, Global Avg Loss: 0.02150811, Time: 0.0228 Steps: 105600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010376, Sample Num: 166016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01295695, Log Avg loss: 0.00010533, Global Avg Loss: 0.02146765, Time: 0.0921 Steps: 105800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010576, Sample Num: 169216, Cur Loss: 0.00186329, Cur Avg Loss: 0.01271239, Log Avg loss: 0.00002429, Global Avg Loss: 0.02142719, Time: 0.1108 Steps: 106000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010776, Sample Num: 172416, Cur Loss: 0.00000250, Cur Avg Loss: 0.01247646, Log Avg loss: 0.00000090, Global Avg Loss: 0.02138684, Time: 0.0240 Steps: 106200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010976, Sample Num: 175616, Cur Loss: 0.00000000, Cur Avg Loss: 0.01224912, Log Avg loss: 0.00000003, Global Avg Loss: 0.02134664, Time: 0.0366 Steps: 106400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 011176, Sample Num: 178816, Cur Loss: 0.00000697, Cur Avg Loss: 0.01203444, Log Avg loss: 0.00025260, Global Avg Loss: 0.02130706, Time: 0.0369 Steps: 106600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 011376, Sample Num: 182016, Cur Loss: 0.00000000, Cur Avg Loss: 0.01182287, Log Avg loss: 0.00000015, Global Avg Loss: 0.02126716, Time: 0.0284 Steps: 106800, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 011576, Sample Num: 185216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01161860, Log Avg loss: 0.00000015, Global Avg Loss: 0.02122741, Time: 0.0413 Steps: 107000, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 011776, Sample Num: 188416, Cur Loss: 0.00000013, Cur Avg Loss: 0.01142277, Log Avg loss: 0.00008780, Global Avg Loss: 0.02118797, Time: 0.0768 Steps: 107200, Updated lr: 0.000010 ***** Running evaluation checkpoint-107352 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-107352 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 524.217313, Avg time per batch (s): 0.040000 {"eval_avg_loss": 0.024613, "eval_total_loss": 34.285236, "eval_acc": 0.999641, "eval_prec": 0.990775, "eval_recall": 0.994444, "eval_f1": 0.992606, "eval_roc_auc": 0.999997, "eval_pr_auc": 0.999871, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 3, "tp": 537}, "eval_mcc2": 0.992424, "eval_mcc": 0.992424, "eval_sn": 0.994444, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.014913, "test_total_loss": 20.774457, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 1.0016795431642594e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.02116068937272233, "train_cur_epoch_loss": 134.80618709528275, "train_cur_epoch_avg_loss": 0.01130165887787414, "train_cur_epoch_time": 524.2173128128052, "train_cur_epoch_avg_time": 0.04394846686894745, "epoch": 9, "step": 107352} ################################################## Training, Epoch: 0010, Batch: 000048, Sample Num: 768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000286, Log Avg loss: 0.00145904, Global Avg Loss: 0.02115123, Time: 0.0393 Steps: 107400, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000248, Sample Num: 3968, Cur Loss: 0.00000143, Cur Avg Loss: 0.00000363, Log Avg loss: 0.00000382, Global Avg Loss: 0.02111193, Time: 0.0331 Steps: 107600, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000448, Sample Num: 7168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000259, Log Avg loss: 0.00000130, Global Avg Loss: 0.02107276, Time: 0.0337 Steps: 107800, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000648, Sample Num: 10368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000183, Log Avg loss: 0.00000012, Global Avg Loss: 0.02103374, Time: 0.0213 Steps: 108000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 000848, Sample Num: 13568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001399, Log Avg loss: 0.00005340, Global Avg Loss: 0.02099496, Time: 0.0275 Steps: 108200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001048, Sample Num: 16768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01937603, Log Avg loss: 0.10147110, Global Avg Loss: 0.02114344, Time: 0.0265 Steps: 108400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001248, Sample Num: 19968, Cur Loss: 0.00000000, Cur Avg Loss: 0.01627095, Log Avg loss: 0.00000028, Global Avg Loss: 0.02110450, Time: 0.0601 Steps: 108600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001448, Sample Num: 23168, Cur Loss: 0.00000000, Cur Avg Loss: 0.01402447, Log Avg loss: 0.00000649, Global Avg Loss: 0.02106571, Time: 0.0325 Steps: 108800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001648, Sample Num: 26368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01233714, Log Avg loss: 0.00012081, Global Avg Loss: 0.02102728, Time: 0.0225 Steps: 109000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001848, Sample Num: 29568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01101549, Log Avg loss: 0.00012514, Global Avg Loss: 0.02098900, Time: 0.0551 Steps: 109200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002048, Sample Num: 32768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01044446, Log Avg loss: 0.00516817, Global Avg Loss: 0.02096008, Time: 0.0306 Steps: 109400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002248, Sample Num: 35968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00962693, Log Avg loss: 0.00125534, Global Avg Loss: 0.02092412, Time: 0.0318 Steps: 109600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002448, Sample Num: 39168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00899188, Log Avg loss: 0.00185392, Global Avg Loss: 0.02088938, Time: 0.0191 Steps: 109800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002648, Sample Num: 42368, Cur Loss: 0.00000006, Cur Avg Loss: 0.01646638, Log Avg loss: 0.10795432, Global Avg Loss: 0.02104768, Time: 0.0226 Steps: 110000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002848, Sample Num: 45568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01532428, Log Avg loss: 0.00020289, Global Avg Loss: 0.02100985, Time: 0.0228 Steps: 110200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003048, Sample Num: 48768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01431884, Log Avg loss: 0.00000129, Global Avg Loss: 0.02097179, Time: 0.0248 Steps: 110400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003248, Sample Num: 51968, Cur Loss: 0.00000000, Cur Avg Loss: 0.01343715, Log Avg loss: 0.00000031, Global Avg Loss: 0.02093387, Time: 0.1107 Steps: 110600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003448, Sample Num: 55168, Cur Loss: 0.00000000, Cur Avg Loss: 0.01267578, Log Avg loss: 0.00031098, Global Avg Loss: 0.02089665, Time: 0.0313 Steps: 110800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003648, Sample Num: 58368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01426453, Log Avg loss: 0.04165460, Global Avg Loss: 0.02093405, Time: 0.0335 Steps: 111000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003848, Sample Num: 61568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01352397, Log Avg loss: 0.00001626, Global Avg Loss: 0.02089643, Time: 0.0334 Steps: 111200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 004048, Sample Num: 64768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01285581, Log Avg loss: 0.00000037, Global Avg Loss: 0.02085891, Time: 0.0315 Steps: 111400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 004248, Sample Num: 67968, Cur Loss: 0.00000000, Cur Avg Loss: 0.01225067, Log Avg loss: 0.00000258, Global Avg Loss: 0.02082153, Time: 0.0311 Steps: 111600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 004448, Sample Num: 71168, Cur Loss: 0.00000000, Cur Avg Loss: 0.01170044, Log Avg loss: 0.00001370, Global Avg Loss: 0.02078431, Time: 0.0570 Steps: 111800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 004648, Sample Num: 74368, Cur Loss: 0.00000077, Cur Avg Loss: 0.01119703, Log Avg loss: 0.00000106, Global Avg Loss: 0.02074720, Time: 0.0346 Steps: 112000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 004848, Sample Num: 77568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01073717, Log Avg loss: 0.00005018, Global Avg Loss: 0.02071030, Time: 0.0602 Steps: 112200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 005048, Sample Num: 80768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01031290, Log Avg loss: 0.00002850, Global Avg Loss: 0.02067350, Time: 0.0569 Steps: 112400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 005248, Sample Num: 83968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00991993, Log Avg loss: 0.00000133, Global Avg Loss: 0.02063679, Time: 0.0321 Steps: 112600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 005448, Sample Num: 87168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00957640, Log Avg loss: 0.00056222, Global Avg Loss: 0.02060119, Time: 0.1098 Steps: 112800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 005648, Sample Num: 90368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00923730, Log Avg loss: 0.00000007, Global Avg Loss: 0.02056473, Time: 0.0329 Steps: 113000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 005848, Sample Num: 93568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00892141, Log Avg loss: 0.00000075, Global Avg Loss: 0.02052840, Time: 0.0245 Steps: 113200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006048, Sample Num: 96768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00862639, Log Avg loss: 0.00000001, Global Avg Loss: 0.02049219, Time: 0.0185 Steps: 113400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006248, Sample Num: 99968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00957518, Log Avg loss: 0.03826658, Global Avg Loss: 0.02052349, Time: 0.0390 Steps: 113600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006448, Sample Num: 103168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00927849, Log Avg loss: 0.00000995, Global Avg Loss: 0.02048743, Time: 0.0350 Steps: 113800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006648, Sample Num: 106368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00899940, Log Avg loss: 0.00000149, Global Avg Loss: 0.02045149, Time: 0.0616 Steps: 114000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 006848, Sample Num: 109568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00873716, Log Avg loss: 0.00002022, Global Avg Loss: 0.02041571, Time: 0.0829 Steps: 114200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007048, Sample Num: 112768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00916096, Log Avg loss: 0.02367184, Global Avg Loss: 0.02042141, Time: 0.0380 Steps: 114400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007248, Sample Num: 115968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00891027, Log Avg loss: 0.00007628, Global Avg Loss: 0.02038590, Time: 0.0247 Steps: 114600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007448, Sample Num: 119168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00867172, Log Avg loss: 0.00002632, Global Avg Loss: 0.02035043, Time: 0.0299 Steps: 114800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007648, Sample Num: 122368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00844770, Log Avg loss: 0.00010529, Global Avg Loss: 0.02031522, Time: 0.0641 Steps: 115000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007848, Sample Num: 125568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00823242, Log Avg loss: 0.00000004, Global Avg Loss: 0.02027995, Time: 0.0706 Steps: 115200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008048, Sample Num: 128768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00808344, Log Avg loss: 0.00223766, Global Avg Loss: 0.02024868, Time: 0.0382 Steps: 115400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008248, Sample Num: 131968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00788840, Log Avg loss: 0.00003998, Global Avg Loss: 0.02021372, Time: 0.0484 Steps: 115600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008448, Sample Num: 135168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00967106, Log Avg loss: 0.08318804, Global Avg Loss: 0.02032248, Time: 0.1266 Steps: 115800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008648, Sample Num: 138368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00944758, Log Avg loss: 0.00000780, Global Avg Loss: 0.02028746, Time: 0.0254 Steps: 116000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008848, Sample Num: 141568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00923409, Log Avg loss: 0.00000279, Global Avg Loss: 0.02025254, Time: 0.0335 Steps: 116200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 009048, Sample Num: 144768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00902998, Log Avg loss: 0.00000012, Global Avg Loss: 0.02021775, Time: 0.0282 Steps: 116400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009248, Sample Num: 147968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00883604, Log Avg loss: 0.00006203, Global Avg Loss: 0.02018317, Time: 0.0287 Steps: 116600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009448, Sample Num: 151168, Cur Loss: 0.00000006, Cur Avg Loss: 0.01224117, Log Avg loss: 0.16969442, Global Avg Loss: 0.02043919, Time: 0.0259 Steps: 116800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009648, Sample Num: 154368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01198827, Log Avg loss: 0.00004130, Global Avg Loss: 0.02040432, Time: 0.0351 Steps: 117000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009848, Sample Num: 157568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01186689, Log Avg loss: 0.00601168, Global Avg Loss: 0.02037976, Time: 0.0362 Steps: 117200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 010048, Sample Num: 160768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01167833, Log Avg loss: 0.00239330, Global Avg Loss: 0.02034912, Time: 0.0868 Steps: 117400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 010248, Sample Num: 163968, Cur Loss: 0.00000000, Cur Avg Loss: 0.01145666, Log Avg loss: 0.00032025, Global Avg Loss: 0.02031505, Time: 0.0262 Steps: 117600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 010448, Sample Num: 167168, Cur Loss: 0.00000002, Cur Avg Loss: 0.01125018, Log Avg loss: 0.00067009, Global Avg Loss: 0.02028170, Time: 0.0775 Steps: 117800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 010648, Sample Num: 170368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01103892, Log Avg loss: 0.00000261, Global Avg Loss: 0.02024733, Time: 0.0209 Steps: 118000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 010848, Sample Num: 173568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01083541, Log Avg loss: 0.00000054, Global Avg Loss: 0.02021307, Time: 0.0248 Steps: 118200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 011048, Sample Num: 176768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01063933, Log Avg loss: 0.00000416, Global Avg Loss: 0.02017893, Time: 0.0226 Steps: 118400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 011248, Sample Num: 179968, Cur Loss: 0.00000000, Cur Avg Loss: 0.01045250, Log Avg loss: 0.00013195, Global Avg Loss: 0.02014513, Time: 0.0620 Steps: 118600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 011448, Sample Num: 183168, Cur Loss: 0.00000000, Cur Avg Loss: 0.01026989, Log Avg loss: 0.00000005, Global Avg Loss: 0.02011121, Time: 0.0504 Steps: 118800, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 011648, Sample Num: 186368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01009389, Log Avg loss: 0.00001968, Global Avg Loss: 0.02007745, Time: 0.0262 Steps: 119000, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 011848, Sample Num: 189568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00994287, Log Avg loss: 0.00114735, Global Avg Loss: 0.02004568, Time: 0.0626 Steps: 119200, Updated lr: 0.000000 ***** Running evaluation checkpoint-119280 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-119280 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 524.643396, Avg time per batch (s): 0.040000 {"eval_avg_loss": 0.023634, "eval_total_loss": 32.922373, "eval_acc": 0.999641, "eval_prec": 0.990775, "eval_recall": 0.994444, "eval_f1": 0.992606, "eval_roc_auc": 0.999997, "eval_pr_auc": 0.999874, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 3, "tp": 537}, "eval_mcc2": 0.992424, "eval_mcc": 0.992424, "eval_sn": 0.994444, "eval_sp": 0.99977, "update_flag": false, "test_avg_loss": 0.014022, "test_total_loss": 19.532925, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 0.0, "cur_epoch_step": 11928, "train_global_avg_loss": 0.020032239136670155, "train_cur_epoch_loss": 117.80315868152897, "train_cur_epoch_avg_loss": 0.009876187012200617, "train_cur_epoch_time": 524.6433956623077, "train_cur_epoch_avg_time": 0.04398418810046175, "epoch": 10, "step": 119280} ################################################## #########################Best Metric######################### {"epoch": 1, "global_step": 11928, "eval_avg_loss": 0.002444, "eval_total_loss": 3.404538, "eval_acc": 0.999776, "eval_prec": 0.990826, "eval_recall": 1.0, "eval_f1": 0.995392, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999914, "eval_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 0, "tp": 540}, "eval_mcc2": 0.995288, "eval_mcc": 0.995288, "eval_sn": 1.0, "eval_sp": 0.99977, "update_flag": true, "test_avg_loss": 0.001745, "test_total_loss": 2.430984, "test_acc": 0.999776, "test_prec": 0.990826, "test_recall": 1.0, "test_f1": 0.995392, "test_roc_auc": 0.999999, "test_pr_auc": 0.999973, "test_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 0, "tp": 540}, "test_mcc2": 0.995288, "test_mcc": 0.995288, "test_sn": 1.0, "test_sp": 0.99977} ################################################## Total Time: 60446.368631, Avg time per epoch(10 epochs): 6044.640000 ++++++++++++Validation+++++++++++++ best f1 global step: 11928 checkpoint path: ../models/RdRP/protein/binary_class/luca_base/matrix/20250409154233/checkpoint-11928 ***** Running evaluation checkpoint-11928 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## {"evaluation_avg_loss_11928": 0.002444, "evaluation_total_loss_11928": 3.404538, "evaluation_acc_11928": 0.999776, "evaluation_prec_11928": 0.990826, "evaluation_recall_11928": 1.0, "evaluation_f1_11928": 0.995392, "evaluation_roc_auc_11928": 0.999998, "evaluation_pr_auc_11928": 0.999914, "evaluation_confusion_matrix_11928": {"tn": 21739, "fp": 5, "fn": 0, "tp": 540}, "evaluation_mcc2_11928": 0.995288, "evaluation_mcc_11928": 0.995288, "evaluation_sn_11928": 1.0, "evaluation_sp_11928": 0.99977} ++++++++++++Testing+++++++++++++ best f1 global step: 11928 checkpoint path: ../models/RdRP/protein/binary_class/luca_base/matrix/20250409154233/checkpoint-11928 ***** Running testing checkpoint-11928 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## {"evaluation_avg_loss_11928": 0.001745, "evaluation_total_loss_11928": 2.430984, "evaluation_acc_11928": 0.999776, "evaluation_prec_11928": 0.990826, "evaluation_recall_11928": 1.0, "evaluation_f1_11928": 0.995392, "evaluation_roc_auc_11928": 0.999999, "evaluation_pr_auc_11928": 0.999973, "evaluation_confusion_matrix_11928": {"tn": 21739, "fp": 5, "fn": 0, "tp": 540}, "evaluation_mcc2_11928": 0.995288, "evaluation_mcc_11928": 0.995288, "evaluation_sn_11928": 1.0, "evaluation_sp_11928": 0.99977}