{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "acc", "beta1": 0.9, "beta2": 0.98, "buffer_size": 1024, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "ViralCapsid", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/ViralCapsid/protein/binary_class/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 2560, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "matrix", "intermediate_size": 4096, "label_filepath": "../dataset/ViralCapsid/protein/binary_class/label.txt", "label_size": 2, "label_type": "ViralCapsid", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": null, "llm_step": "3B", "llm_task_level": "token_level,span_level,seq_level,structure_level", "llm_time_str": null, "llm_type": "esm", "llm_version": "esm2", "local_rank": -1, "log_dir": "../logs/ViralCapsid/protein/binary_class/luca_base/matrix/20250104031508", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": true, "matrix_dirpath": "../matrices/ViralCapsid/protein/binary_class/luca_base/esm2/esm//3B", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "non_ignore": false, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 0, "num_hidden_layers": 0, "num_train_epochs": 10, "output_dir": "../models/ViralCapsid/protein/binary_class/luca_base/matrix/20250104031508", "output_mode": "binary_class", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 1.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "null", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "binary_class", "tb_log_dir": "../tb-logs/ViralCapsid/protein/binary_class/luca_base/matrix/20250104031508", "test_data_dir": "../dataset/ViralCapsid/protein/binary_class/test/", "time_str": "20250104031713", "train_data_dir": "../dataset/ViralCapsid/protein/binary_class/train/", "trunc_type": "right", "vector_dirpath": "../vectors/ViralCapsid/protein/binary_class/luca_base/esm2/esm//3B", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 200, "weight": null, "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,embedding_matrix ################################################## Encoder Config: {'llm_type': 'esm', 'llm_version': 'esm2', 'llm_step': '3B', 'llm_dirpath': None, 'input_type': 'matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/ViralCapsid/protein/binary_class/luca_base/esm2/esm//3B', 'matrix_dirpath': '../matrices/ViralCapsid/protein/binary_class/luca_base/esm2/esm//3B', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': True, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 2560, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4098, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "num_attention_heads": 8, "num_hidden_layers": 4, "pad_token_id": 0, "pos_weight": 1.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": null, "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.29.0", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39 } ################################################## Mode Architecture: LucaBase( (matrix_pooler): GlobalMaskValueAttentionPooling1D (2560 -> 2560) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=2560, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=128, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=1, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 20005249 ################################################## {"total_num": "19.080000M", "total_size": "76.310000MB", "param_sum": "19.080000M", "param_size": "76.310000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "19.078492M", "trainable_size": "76.313969MB"} ################################################## Train dataset len: 325113, batch size: 16, batch num: 20320 Train dataset t_total: 203200, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 325113 Train Dataset Num Epochs = 10 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 203200 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.20972949, Cur Avg Loss: 0.60038140, Log Avg loss: 0.60038140, Global Avg Loss: 0.60038140, Time: 0.0471 Steps: 200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.26628152, Cur Avg Loss: 0.35016830, Log Avg loss: 0.09995520, Global Avg Loss: 0.35016830, Time: 0.0448 Steps: 400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.00411138, Cur Avg Loss: 0.25056088, Log Avg loss: 0.05134605, Global Avg Loss: 0.25056088, Time: 0.0742 Steps: 600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.00142608, Cur Avg Loss: 0.20022410, Log Avg loss: 0.04921375, Global Avg Loss: 0.20022410, Time: 0.1098 Steps: 800, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.08649342, Cur Avg Loss: 0.16710809, Log Avg loss: 0.03464405, Global Avg Loss: 0.16710809, Time: 0.0752 Steps: 1000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.02820615, Cur Avg Loss: 0.14460995, Log Avg loss: 0.03211924, Global Avg Loss: 0.14460995, Time: 0.0582 Steps: 1200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.05120961, Cur Avg Loss: 0.12855769, Log Avg loss: 0.03224412, Global Avg Loss: 0.12855769, Time: 0.0526 Steps: 1400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00725065, Cur Avg Loss: 0.11628090, Log Avg loss: 0.03034337, Global Avg Loss: 0.11628090, Time: 0.1060 Steps: 1600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.00082595, Cur Avg Loss: 0.10629134, Log Avg loss: 0.02637488, Global Avg Loss: 0.10629134, Time: 0.1799 Steps: 1800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00003783, Cur Avg Loss: 0.09778095, Log Avg loss: 0.02118740, Global Avg Loss: 0.09778095, Time: 0.0449 Steps: 2000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.00022993, Cur Avg Loss: 0.09133067, Log Avg loss: 0.02682790, Global Avg Loss: 0.09133067, Time: 0.1369 Steps: 2200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.19299446, Cur Avg Loss: 0.08551694, Log Avg loss: 0.02156587, Global Avg Loss: 0.08551694, Time: 0.1270 Steps: 2400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00082949, Cur Avg Loss: 0.08070214, Log Avg loss: 0.02292460, Global Avg Loss: 0.08070214, Time: 0.0694 Steps: 2600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00025983, Cur Avg Loss: 0.07677171, Log Avg loss: 0.02567605, Global Avg Loss: 0.07677171, Time: 0.1412 Steps: 2800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00071311, Cur Avg Loss: 0.07317665, Log Avg loss: 0.02284580, Global Avg Loss: 0.07317665, Time: 0.0366 Steps: 3000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00434506, Cur Avg Loss: 0.06968306, Log Avg loss: 0.01727926, Global Avg Loss: 0.06968306, Time: 0.1952 Steps: 3200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.00119598, Cur Avg Loss: 0.06665685, Log Avg loss: 0.01823758, Global Avg Loss: 0.06665685, Time: 0.1164 Steps: 3400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.00067678, Cur Avg Loss: 0.06378330, Log Avg loss: 0.01493280, Global Avg Loss: 0.06378330, Time: 0.2369 Steps: 3600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.33631799, Cur Avg Loss: 0.06193049, Log Avg loss: 0.02858005, Global Avg Loss: 0.06193049, Time: 0.0656 Steps: 3800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.01560649, Cur Avg Loss: 0.05986132, Log Avg loss: 0.02054701, Global Avg Loss: 0.05986132, Time: 0.0458 Steps: 4000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.00045402, Cur Avg Loss: 0.05782149, Log Avg loss: 0.01702491, Global Avg Loss: 0.05782149, Time: 0.0977 Steps: 4200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00337045, Cur Avg Loss: 0.05597153, Log Avg loss: 0.01712234, Global Avg Loss: 0.05597153, Time: 0.0508 Steps: 4400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00081167, Cur Avg Loss: 0.05436376, Log Avg loss: 0.01899290, Global Avg Loss: 0.05436376, Time: 0.0644 Steps: 4600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.27916944, Cur Avg Loss: 0.05255050, Log Avg loss: 0.01084549, Global Avg Loss: 0.05255050, Time: 0.1836 Steps: 4800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005000, Sample Num: 80000, Cur Loss: 0.09714022, Cur Avg Loss: 0.05130672, Log Avg loss: 0.02145604, Global Avg Loss: 0.05130672, Time: 0.1116 Steps: 5000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00002135, Cur Avg Loss: 0.04962981, Log Avg loss: 0.00770710, Global Avg Loss: 0.04962981, Time: 0.0422 Steps: 5200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00321570, Cur Avg Loss: 0.04852436, Log Avg loss: 0.01978257, Global Avg Loss: 0.04852436, Time: 0.0435 Steps: 5400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 005600, Sample Num: 89600, Cur Loss: 0.02084295, Cur Avg Loss: 0.04731328, Log Avg loss: 0.01461401, Global Avg Loss: 0.04731328, Time: 0.1477 Steps: 5600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 005800, Sample Num: 92800, Cur Loss: 0.00084477, Cur Avg Loss: 0.04615057, Log Avg loss: 0.01359487, Global Avg Loss: 0.04615057, Time: 0.0592 Steps: 5800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006000, Sample Num: 96000, Cur Loss: 0.00042403, Cur Avg Loss: 0.04526936, Log Avg loss: 0.01971422, Global Avg Loss: 0.04526936, Time: 0.1912 Steps: 6000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006200, Sample Num: 99200, Cur Loss: 0.00012137, Cur Avg Loss: 0.04459477, Log Avg loss: 0.02435706, Global Avg Loss: 0.04459477, Time: 0.0655 Steps: 6200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00026062, Cur Avg Loss: 0.04354817, Log Avg loss: 0.01110340, Global Avg Loss: 0.04354817, Time: 0.0690 Steps: 6400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006600, Sample Num: 105600, Cur Loss: 0.00065812, Cur Avg Loss: 0.04268175, Log Avg loss: 0.01495649, Global Avg Loss: 0.04268175, Time: 0.2036 Steps: 6600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006800, Sample Num: 108800, Cur Loss: 0.00106624, Cur Avg Loss: 0.04187600, Log Avg loss: 0.01528609, Global Avg Loss: 0.04187600, Time: 0.0391 Steps: 6800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00024254, Cur Avg Loss: 0.04109254, Log Avg loss: 0.01445487, Global Avg Loss: 0.04109254, Time: 0.0467 Steps: 7000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007200, Sample Num: 115200, Cur Loss: 0.00384180, Cur Avg Loss: 0.04025765, Log Avg loss: 0.01103680, Global Avg Loss: 0.04025765, Time: 0.0442 Steps: 7200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00067792, Cur Avg Loss: 0.03971836, Log Avg loss: 0.02030384, Global Avg Loss: 0.03971836, Time: 0.0405 Steps: 7400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00023207, Cur Avg Loss: 0.03905022, Log Avg loss: 0.01432903, Global Avg Loss: 0.03905022, Time: 0.1656 Steps: 7600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 007800, Sample Num: 124800, Cur Loss: 0.01096326, Cur Avg Loss: 0.03850204, Log Avg loss: 0.01767128, Global Avg Loss: 0.03850204, Time: 0.0646 Steps: 7800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00436469, Cur Avg Loss: 0.03810910, Log Avg loss: 0.02278435, Global Avg Loss: 0.03810910, Time: 0.2129 Steps: 8000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00028636, Cur Avg Loss: 0.03765649, Log Avg loss: 0.01955186, Global Avg Loss: 0.03765649, Time: 0.0656 Steps: 8200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008400, Sample Num: 134400, Cur Loss: 0.00300494, Cur Avg Loss: 0.03720435, Log Avg loss: 0.01866691, Global Avg Loss: 0.03720435, Time: 0.0797 Steps: 8400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008600, Sample Num: 137600, Cur Loss: 0.00017421, Cur Avg Loss: 0.03669990, Log Avg loss: 0.01551295, Global Avg Loss: 0.03669990, Time: 0.0544 Steps: 8600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00030288, Cur Avg Loss: 0.03624711, Log Avg loss: 0.01677692, Global Avg Loss: 0.03624711, Time: 0.1927 Steps: 8800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00055494, Cur Avg Loss: 0.03578028, Log Avg loss: 0.01524011, Global Avg Loss: 0.03578028, Time: 0.1222 Steps: 9000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00054233, Cur Avg Loss: 0.03526746, Log Avg loss: 0.01219035, Global Avg Loss: 0.03526746, Time: 0.0490 Steps: 9200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00006623, Cur Avg Loss: 0.03478540, Log Avg loss: 0.01261069, Global Avg Loss: 0.03478540, Time: 0.0685 Steps: 9400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00018717, Cur Avg Loss: 0.03439169, Log Avg loss: 0.01588709, Global Avg Loss: 0.03439169, Time: 0.0970 Steps: 9600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 009800, Sample Num: 156800, Cur Loss: 0.00020156, Cur Avg Loss: 0.03387224, Log Avg loss: 0.00893879, Global Avg Loss: 0.03387224, Time: 0.1224 Steps: 9800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00060785, Cur Avg Loss: 0.03353217, Log Avg loss: 0.01686897, Global Avg Loss: 0.03353217, Time: 0.1041 Steps: 10000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010200, Sample Num: 163200, Cur Loss: 0.32595891, Cur Avg Loss: 0.03308639, Log Avg loss: 0.01079711, Global Avg Loss: 0.03308639, Time: 0.0765 Steps: 10200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010400, Sample Num: 166400, Cur Loss: 0.00032751, Cur Avg Loss: 0.03270338, Log Avg loss: 0.01317017, Global Avg Loss: 0.03270338, Time: 0.1167 Steps: 10400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00088854, Cur Avg Loss: 0.03240850, Log Avg loss: 0.01707455, Global Avg Loss: 0.03240850, Time: 0.1315 Steps: 10600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00359284, Cur Avg Loss: 0.03197809, Log Avg loss: 0.00916643, Global Avg Loss: 0.03197809, Time: 0.2036 Steps: 10800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00003447, Cur Avg Loss: 0.03152173, Log Avg loss: 0.00687821, Global Avg Loss: 0.03152173, Time: 0.0568 Steps: 11000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00022888, Cur Avg Loss: 0.03125234, Log Avg loss: 0.01643562, Global Avg Loss: 0.03125234, Time: 0.1081 Steps: 11200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00028267, Cur Avg Loss: 0.03096192, Log Avg loss: 0.01469882, Global Avg Loss: 0.03096192, Time: 0.0729 Steps: 11400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00002474, Cur Avg Loss: 0.03070929, Log Avg loss: 0.01630906, Global Avg Loss: 0.03070929, Time: 0.1933 Steps: 11600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 011800, Sample Num: 188800, Cur Loss: 0.20851791, Cur Avg Loss: 0.03046870, Log Avg loss: 0.01651435, Global Avg Loss: 0.03046870, Time: 0.0375 Steps: 11800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012000, Sample Num: 192000, Cur Loss: 0.00009210, Cur Avg Loss: 0.03014148, Log Avg loss: 0.01083562, Global Avg Loss: 0.03014148, Time: 0.1818 Steps: 12000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012200, Sample Num: 195200, Cur Loss: 0.16352595, Cur Avg Loss: 0.02994878, Log Avg loss: 0.01838667, Global Avg Loss: 0.02994878, Time: 0.1062 Steps: 12200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012400, Sample Num: 198400, Cur Loss: 0.00037833, Cur Avg Loss: 0.02963527, Log Avg loss: 0.01051120, Global Avg Loss: 0.02963527, Time: 0.1491 Steps: 12400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012600, Sample Num: 201600, Cur Loss: 0.00012202, Cur Avg Loss: 0.02932773, Log Avg loss: 0.01026077, Global Avg Loss: 0.02932773, Time: 0.1747 Steps: 12600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012800, Sample Num: 204800, Cur Loss: 0.00064108, Cur Avg Loss: 0.02907045, Log Avg loss: 0.01286166, Global Avg Loss: 0.02907045, Time: 0.1510 Steps: 12800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013000, Sample Num: 208000, Cur Loss: 0.00029505, Cur Avg Loss: 0.02883004, Log Avg loss: 0.01344364, Global Avg Loss: 0.02883004, Time: 0.0383 Steps: 13000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013200, Sample Num: 211200, Cur Loss: 0.00696647, Cur Avg Loss: 0.02863317, Log Avg loss: 0.01583663, Global Avg Loss: 0.02863317, Time: 0.2225 Steps: 13200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013400, Sample Num: 214400, Cur Loss: 0.00070639, Cur Avg Loss: 0.02830912, Log Avg loss: 0.00692156, Global Avg Loss: 0.02830912, Time: 0.0385 Steps: 13400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 013600, Sample Num: 217600, Cur Loss: 0.00001807, Cur Avg Loss: 0.02803817, Log Avg loss: 0.00988471, Global Avg Loss: 0.02803817, Time: 0.1728 Steps: 13600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 013800, Sample Num: 220800, Cur Loss: 0.00099336, Cur Avg Loss: 0.02783228, Log Avg loss: 0.01383209, Global Avg Loss: 0.02783228, Time: 0.1953 Steps: 13800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014000, Sample Num: 224000, Cur Loss: 0.00037552, Cur Avg Loss: 0.02757959, Log Avg loss: 0.01014337, Global Avg Loss: 0.02757959, Time: 0.1143 Steps: 14000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014200, Sample Num: 227200, Cur Loss: 0.00011040, Cur Avg Loss: 0.02735516, Log Avg loss: 0.01164560, Global Avg Loss: 0.02735516, Time: 0.0296 Steps: 14200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014400, Sample Num: 230400, Cur Loss: 0.00062337, Cur Avg Loss: 0.02712783, Log Avg loss: 0.01098744, Global Avg Loss: 0.02712783, Time: 0.0410 Steps: 14400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014600, Sample Num: 233600, Cur Loss: 0.00171157, Cur Avg Loss: 0.02700514, Log Avg loss: 0.01817133, Global Avg Loss: 0.02700514, Time: 0.0956 Steps: 14600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014800, Sample Num: 236800, Cur Loss: 0.03599565, Cur Avg Loss: 0.02680651, Log Avg loss: 0.01230636, Global Avg Loss: 0.02680651, Time: 0.0430 Steps: 14800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015000, Sample Num: 240000, Cur Loss: 0.00558130, Cur Avg Loss: 0.02664529, Log Avg loss: 0.01471513, Global Avg Loss: 0.02664529, Time: 0.0795 Steps: 15000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015200, Sample Num: 243200, Cur Loss: 0.00004071, Cur Avg Loss: 0.02639552, Log Avg loss: 0.00766242, Global Avg Loss: 0.02639552, Time: 0.0635 Steps: 15200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015400, Sample Num: 246400, Cur Loss: 0.00051547, Cur Avg Loss: 0.02622137, Log Avg loss: 0.01298648, Global Avg Loss: 0.02622137, Time: 0.1170 Steps: 15400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015600, Sample Num: 249600, Cur Loss: 0.00003604, Cur Avg Loss: 0.02604646, Log Avg loss: 0.01257785, Global Avg Loss: 0.02604646, Time: 0.0291 Steps: 15600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 015800, Sample Num: 252800, Cur Loss: 0.00042303, Cur Avg Loss: 0.02584010, Log Avg loss: 0.00974443, Global Avg Loss: 0.02584010, Time: 0.0722 Steps: 15800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016000, Sample Num: 256000, Cur Loss: 0.00006526, Cur Avg Loss: 0.02563955, Log Avg loss: 0.00979631, Global Avg Loss: 0.02563955, Time: 0.0893 Steps: 16000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016200, Sample Num: 259200, Cur Loss: 0.00014961, Cur Avg Loss: 0.02544573, Log Avg loss: 0.00993984, Global Avg Loss: 0.02544573, Time: 0.1259 Steps: 16200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016400, Sample Num: 262400, Cur Loss: 0.00035102, Cur Avg Loss: 0.02521563, Log Avg loss: 0.00657740, Global Avg Loss: 0.02521563, Time: 0.0338 Steps: 16400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016600, Sample Num: 265600, Cur Loss: 0.05465882, Cur Avg Loss: 0.02506207, Log Avg loss: 0.01247058, Global Avg Loss: 0.02506207, Time: 0.1533 Steps: 16600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016800, Sample Num: 268800, Cur Loss: 0.00019480, Cur Avg Loss: 0.02489627, Log Avg loss: 0.01113442, Global Avg Loss: 0.02489627, Time: 0.2393 Steps: 16800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017000, Sample Num: 272000, Cur Loss: 0.00014046, Cur Avg Loss: 0.02471864, Log Avg loss: 0.00979761, Global Avg Loss: 0.02471864, Time: 0.1231 Steps: 17000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017200, Sample Num: 275200, Cur Loss: 0.00050975, Cur Avg Loss: 0.02459288, Log Avg loss: 0.01390378, Global Avg Loss: 0.02459288, Time: 0.1027 Steps: 17200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017400, Sample Num: 278400, Cur Loss: 0.01094033, Cur Avg Loss: 0.02448385, Log Avg loss: 0.01510696, Global Avg Loss: 0.02448385, Time: 0.1427 Steps: 17400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017600, Sample Num: 281600, Cur Loss: 0.00018313, Cur Avg Loss: 0.02432424, Log Avg loss: 0.01043831, Global Avg Loss: 0.02432424, Time: 0.0771 Steps: 17600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 017800, Sample Num: 284800, Cur Loss: 0.00135248, Cur Avg Loss: 0.02423524, Log Avg loss: 0.01640296, Global Avg Loss: 0.02423524, Time: 0.0475 Steps: 17800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018000, Sample Num: 288000, Cur Loss: 0.00001831, Cur Avg Loss: 0.02400681, Log Avg loss: 0.00367709, Global Avg Loss: 0.02400681, Time: 0.0759 Steps: 18000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018200, Sample Num: 291200, Cur Loss: 0.00011386, Cur Avg Loss: 0.02390948, Log Avg loss: 0.01514967, Global Avg Loss: 0.02390948, Time: 0.0642 Steps: 18200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018400, Sample Num: 294400, Cur Loss: 0.00040600, Cur Avg Loss: 0.02377015, Log Avg loss: 0.01109086, Global Avg Loss: 0.02377015, Time: 0.1011 Steps: 18400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018600, Sample Num: 297600, Cur Loss: 0.00030948, Cur Avg Loss: 0.02360973, Log Avg loss: 0.00885147, Global Avg Loss: 0.02360973, Time: 0.1077 Steps: 18600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018800, Sample Num: 300800, Cur Loss: 0.03180421, Cur Avg Loss: 0.02347563, Log Avg loss: 0.01100393, Global Avg Loss: 0.02347563, Time: 0.1017 Steps: 18800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019000, Sample Num: 304000, Cur Loss: 0.00080529, Cur Avg Loss: 0.02334678, Log Avg loss: 0.01123501, Global Avg Loss: 0.02334678, Time: 0.0425 Steps: 19000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019200, Sample Num: 307200, Cur Loss: 0.00039248, Cur Avg Loss: 0.02326382, Log Avg loss: 0.01538249, Global Avg Loss: 0.02326382, Time: 0.0961 Steps: 19200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019400, Sample Num: 310400, Cur Loss: 0.00157584, Cur Avg Loss: 0.02311075, Log Avg loss: 0.00841623, Global Avg Loss: 0.02311075, Time: 0.0582 Steps: 19400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019600, Sample Num: 313600, Cur Loss: 0.00007201, Cur Avg Loss: 0.02298859, Log Avg loss: 0.01113858, Global Avg Loss: 0.02298859, Time: 0.0423 Steps: 19600, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 019800, Sample Num: 316800, Cur Loss: 0.00006543, Cur Avg Loss: 0.02286515, Log Avg loss: 0.01076813, Global Avg Loss: 0.02286515, Time: 0.0456 Steps: 19800, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 020000, Sample Num: 320000, Cur Loss: 0.00458995, Cur Avg Loss: 0.02274287, Log Avg loss: 0.01063736, Global Avg Loss: 0.02274287, Time: 0.1614 Steps: 20000, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 020200, Sample Num: 323200, Cur Loss: 0.00000577, Cur Avg Loss: 0.02259156, Log Avg loss: 0.00746070, Global Avg Loss: 0.02259156, Time: 0.0598 Steps: 20200, Updated lr: 0.000090 ***** Running evaluation checkpoint-20320 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-20320 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 2196.701725, Avg time per batch (s): 0.110000 {"eval_avg_loss": 0.009735, "eval_total_loss": 26.381427, "eval_acc": 0.997001, "eval_prec": 0.996916, "eval_recall": 0.9971, "eval_f1": 0.997008, "eval_roc_auc": 0.999881, "eval_pr_auc": 0.999861, "eval_confusion_matrix": {"tn": 21560, "fp": 67, "fn": 63, "tp": 21659}, "eval_mcc2": 0.994002, "eval_mcc": 0.994002, "eval_sn": 0.9971, "eval_sp": 0.996902, "update_flag": true, "test_avg_loss": 0.009094, "test_total_loss": 36.958048, "test_acc": 0.997293, "test_prec": 0.99717, "test_recall": 0.997416, "test_f1": 0.997293, "test_roc_auc": 0.999886, "test_pr_auc": 0.99986, "test_confusion_matrix": {"tn": 32425, "fp": 92, "fn": 84, "tp": 32421}, "test_mcc2": 0.994586, "test_mcc": 0.994586, "test_sn": 0.997416, "test_sp": 0.997171, "lr": 9.008866995073892e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.022570460378704915, "train_cur_epoch_loss": 458.63175489528385, "train_cur_epoch_avg_loss": 0.022570460378704915, "train_cur_epoch_time": 2196.7017245292664, "train_cur_epoch_avg_time": 0.10810539982919617, "epoch": 1, "step": 20320} ################################################## Training, Epoch: 0002, Batch: 000080, Sample Num: 1280, Cur Loss: 0.00031955, Cur Avg Loss: 0.01147316, Log Avg loss: 0.01600038, Global Avg Loss: 0.02252694, Time: 0.0623 Steps: 20400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000280, Sample Num: 4480, Cur Loss: 0.00050452, Cur Avg Loss: 0.00960509, Log Avg loss: 0.00885786, Global Avg Loss: 0.02239423, Time: 0.1079 Steps: 20600, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000480, Sample Num: 7680, Cur Loss: 0.00049157, Cur Avg Loss: 0.00817283, Log Avg loss: 0.00616767, Global Avg Loss: 0.02223821, Time: 0.1697 Steps: 20800, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000680, Sample Num: 10880, Cur Loss: 0.00065159, Cur Avg Loss: 0.00909812, Log Avg loss: 0.01131881, Global Avg Loss: 0.02213421, Time: 0.0645 Steps: 21000, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000880, Sample Num: 14080, Cur Loss: 0.00004716, Cur Avg Loss: 0.00917788, Log Avg loss: 0.00944909, Global Avg Loss: 0.02201454, Time: 0.0682 Steps: 21200, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 001080, Sample Num: 17280, Cur Loss: 0.00022300, Cur Avg Loss: 0.00827225, Log Avg loss: 0.00428744, Global Avg Loss: 0.02184887, Time: 0.1169 Steps: 21400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 001280, Sample Num: 20480, Cur Loss: 0.00021540, Cur Avg Loss: 0.00788410, Log Avg loss: 0.00578810, Global Avg Loss: 0.02170016, Time: 0.2715 Steps: 21600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001480, Sample Num: 23680, Cur Loss: 0.02713872, Cur Avg Loss: 0.00789198, Log Avg loss: 0.00794242, Global Avg Loss: 0.02157394, Time: 0.2066 Steps: 21800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001680, Sample Num: 26880, Cur Loss: 0.00013477, Cur Avg Loss: 0.00789677, Log Avg loss: 0.00793218, Global Avg Loss: 0.02144992, Time: 0.1089 Steps: 22000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001880, Sample Num: 30080, Cur Loss: 0.00000212, Cur Avg Loss: 0.00745565, Log Avg loss: 0.00375030, Global Avg Loss: 0.02129047, Time: 0.0492 Steps: 22200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002080, Sample Num: 33280, Cur Loss: 0.00004847, Cur Avg Loss: 0.00777924, Log Avg loss: 0.01082098, Global Avg Loss: 0.02119699, Time: 0.1411 Steps: 22400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002280, Sample Num: 36480, Cur Loss: 0.00000372, Cur Avg Loss: 0.00741048, Log Avg loss: 0.00357532, Global Avg Loss: 0.02104105, Time: 0.0507 Steps: 22600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002480, Sample Num: 39680, Cur Loss: 0.00005967, Cur Avg Loss: 0.00727503, Log Avg loss: 0.00573089, Global Avg Loss: 0.02090675, Time: 0.0675 Steps: 22800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002680, Sample Num: 42880, Cur Loss: 0.00001868, Cur Avg Loss: 0.00756266, Log Avg loss: 0.01112931, Global Avg Loss: 0.02082173, Time: 0.0635 Steps: 23000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002880, Sample Num: 46080, Cur Loss: 0.00003792, Cur Avg Loss: 0.00742034, Log Avg loss: 0.00551323, Global Avg Loss: 0.02068976, Time: 0.1455 Steps: 23200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 003080, Sample Num: 49280, Cur Loss: 0.00065130, Cur Avg Loss: 0.00762520, Log Avg loss: 0.01057516, Global Avg Loss: 0.02060331, Time: 0.0672 Steps: 23400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 003280, Sample Num: 52480, Cur Loss: 0.00000614, Cur Avg Loss: 0.00767942, Log Avg loss: 0.00851450, Global Avg Loss: 0.02050086, Time: 0.1931 Steps: 23600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003480, Sample Num: 55680, Cur Loss: 0.00325440, Cur Avg Loss: 0.00745857, Log Avg loss: 0.00383658, Global Avg Loss: 0.02036082, Time: 0.1019 Steps: 23800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003680, Sample Num: 58880, Cur Loss: 0.00008981, Cur Avg Loss: 0.00749238, Log Avg loss: 0.00808063, Global Avg Loss: 0.02025849, Time: 0.0579 Steps: 24000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003880, Sample Num: 62080, Cur Loss: 0.00002709, Cur Avg Loss: 0.00758609, Log Avg loss: 0.00931044, Global Avg Loss: 0.02016801, Time: 0.0946 Steps: 24200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004080, Sample Num: 65280, Cur Loss: 0.00000893, Cur Avg Loss: 0.00757653, Log Avg loss: 0.00739104, Global Avg Loss: 0.02006328, Time: 0.0374 Steps: 24400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004280, Sample Num: 68480, Cur Loss: 0.00002678, Cur Avg Loss: 0.00779384, Log Avg loss: 0.01222705, Global Avg Loss: 0.01999957, Time: 0.0890 Steps: 24600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004480, Sample Num: 71680, Cur Loss: 0.00011509, Cur Avg Loss: 0.00793793, Log Avg loss: 0.01102126, Global Avg Loss: 0.01992716, Time: 0.4417 Steps: 24800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004680, Sample Num: 74880, Cur Loss: 0.00000975, Cur Avg Loss: 0.00785385, Log Avg loss: 0.00597063, Global Avg Loss: 0.01981551, Time: 0.1077 Steps: 25000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004880, Sample Num: 78080, Cur Loss: 0.00000318, Cur Avg Loss: 0.00773839, Log Avg loss: 0.00503651, Global Avg Loss: 0.01969822, Time: 0.0983 Steps: 25200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 005080, Sample Num: 81280, Cur Loss: 0.00029843, Cur Avg Loss: 0.00772933, Log Avg loss: 0.00750824, Global Avg Loss: 0.01960223, Time: 0.0422 Steps: 25400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 005280, Sample Num: 84480, Cur Loss: 0.00001124, Cur Avg Loss: 0.00771283, Log Avg loss: 0.00729367, Global Avg Loss: 0.01950607, Time: 0.2321 Steps: 25600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005480, Sample Num: 87680, Cur Loss: 0.00018532, Cur Avg Loss: 0.00763685, Log Avg loss: 0.00563110, Global Avg Loss: 0.01939852, Time: 0.0543 Steps: 25800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005680, Sample Num: 90880, Cur Loss: 0.00203376, Cur Avg Loss: 0.00760160, Log Avg loss: 0.00663585, Global Avg Loss: 0.01930034, Time: 0.1081 Steps: 26000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005880, Sample Num: 94080, Cur Loss: 0.00002117, Cur Avg Loss: 0.00756594, Log Avg loss: 0.00655324, Global Avg Loss: 0.01920303, Time: 0.0571 Steps: 26200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006080, Sample Num: 97280, Cur Loss: 0.00006787, Cur Avg Loss: 0.00798929, Log Avg loss: 0.02043567, Global Avg Loss: 0.01921237, Time: 0.3014 Steps: 26400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006280, Sample Num: 100480, Cur Loss: 0.00002824, Cur Avg Loss: 0.00786660, Log Avg loss: 0.00413682, Global Avg Loss: 0.01909902, Time: 0.1053 Steps: 26600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006480, Sample Num: 103680, Cur Loss: 0.00002509, Cur Avg Loss: 0.00774966, Log Avg loss: 0.00407783, Global Avg Loss: 0.01898692, Time: 0.1487 Steps: 26800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006680, Sample Num: 106880, Cur Loss: 0.00022497, Cur Avg Loss: 0.00791409, Log Avg loss: 0.01324140, Global Avg Loss: 0.01894436, Time: 0.0363 Steps: 27000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006880, Sample Num: 110080, Cur Loss: 0.00169892, Cur Avg Loss: 0.00782351, Log Avg loss: 0.00479833, Global Avg Loss: 0.01884035, Time: 0.1195 Steps: 27200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007080, Sample Num: 113280, Cur Loss: 0.00000970, Cur Avg Loss: 0.00771915, Log Avg loss: 0.00412896, Global Avg Loss: 0.01873297, Time: 0.1306 Steps: 27400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007280, Sample Num: 116480, Cur Loss: 0.00003787, Cur Avg Loss: 0.00770526, Log Avg loss: 0.00721357, Global Avg Loss: 0.01864949, Time: 0.0489 Steps: 27600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007480, Sample Num: 119680, Cur Loss: 0.00008219, Cur Avg Loss: 0.00775195, Log Avg loss: 0.00945144, Global Avg Loss: 0.01858332, Time: 0.0538 Steps: 27800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 007680, Sample Num: 122880, Cur Loss: 0.00002616, Cur Avg Loss: 0.00770358, Log Avg loss: 0.00589475, Global Avg Loss: 0.01849269, Time: 0.0717 Steps: 28000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 007880, Sample Num: 126080, Cur Loss: 0.00012893, Cur Avg Loss: 0.00779601, Log Avg loss: 0.01134538, Global Avg Loss: 0.01844200, Time: 0.0952 Steps: 28200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008080, Sample Num: 129280, Cur Loss: 0.07971888, Cur Avg Loss: 0.00795482, Log Avg loss: 0.01421178, Global Avg Loss: 0.01841221, Time: 0.2337 Steps: 28400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008280, Sample Num: 132480, Cur Loss: 0.00000492, Cur Avg Loss: 0.00792133, Log Avg loss: 0.00656823, Global Avg Loss: 0.01832938, Time: 0.1026 Steps: 28600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008480, Sample Num: 135680, Cur Loss: 0.00000499, Cur Avg Loss: 0.00784129, Log Avg loss: 0.00452764, Global Avg Loss: 0.01823354, Time: 0.0242 Steps: 28800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008680, Sample Num: 138880, Cur Loss: 0.00000238, Cur Avg Loss: 0.00782697, Log Avg loss: 0.00721994, Global Avg Loss: 0.01815758, Time: 0.0240 Steps: 29000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008880, Sample Num: 142080, Cur Loss: 0.00017695, Cur Avg Loss: 0.00780066, Log Avg loss: 0.00665864, Global Avg Loss: 0.01807882, Time: 0.1036 Steps: 29200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009080, Sample Num: 145280, Cur Loss: 0.00002964, Cur Avg Loss: 0.00786600, Log Avg loss: 0.01076732, Global Avg Loss: 0.01802908, Time: 0.1178 Steps: 29400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009280, Sample Num: 148480, Cur Loss: 0.00085997, Cur Avg Loss: 0.00777400, Log Avg loss: 0.00359710, Global Avg Loss: 0.01793157, Time: 0.2971 Steps: 29600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009480, Sample Num: 151680, Cur Loss: 0.00109064, Cur Avg Loss: 0.00778135, Log Avg loss: 0.00812260, Global Avg Loss: 0.01786574, Time: 0.1459 Steps: 29800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 009680, Sample Num: 154880, Cur Loss: 0.00013600, Cur Avg Loss: 0.00776550, Log Avg loss: 0.00701423, Global Avg Loss: 0.01779339, Time: 0.0600 Steps: 30000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 009880, Sample Num: 158080, Cur Loss: 0.00019201, Cur Avg Loss: 0.00776132, Log Avg loss: 0.00755860, Global Avg Loss: 0.01772561, Time: 0.0410 Steps: 30200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010080, Sample Num: 161280, Cur Loss: 0.00019775, Cur Avg Loss: 0.00770382, Log Avg loss: 0.00486338, Global Avg Loss: 0.01764099, Time: 0.1180 Steps: 30400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010280, Sample Num: 164480, Cur Loss: 0.00002033, Cur Avg Loss: 0.00760480, Log Avg loss: 0.00261456, Global Avg Loss: 0.01754278, Time: 0.0364 Steps: 30600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010480, Sample Num: 167680, Cur Loss: 0.00000731, Cur Avg Loss: 0.00769310, Log Avg loss: 0.01223150, Global Avg Loss: 0.01750829, Time: 0.1425 Steps: 30800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010680, Sample Num: 170880, Cur Loss: 0.00116848, Cur Avg Loss: 0.00769192, Log Avg loss: 0.00762993, Global Avg Loss: 0.01744456, Time: 0.0344 Steps: 31000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010880, Sample Num: 174080, Cur Loss: 0.00005049, Cur Avg Loss: 0.00757166, Log Avg loss: 0.00114981, Global Avg Loss: 0.01734011, Time: 0.2166 Steps: 31200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011080, Sample Num: 177280, Cur Loss: 0.00006312, Cur Avg Loss: 0.00759339, Log Avg loss: 0.00877549, Global Avg Loss: 0.01728556, Time: 0.0389 Steps: 31400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011280, Sample Num: 180480, Cur Loss: 0.00010717, Cur Avg Loss: 0.00755977, Log Avg loss: 0.00569712, Global Avg Loss: 0.01721221, Time: 0.1688 Steps: 31600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011480, Sample Num: 183680, Cur Loss: 0.00009687, Cur Avg Loss: 0.00760261, Log Avg loss: 0.01001884, Global Avg Loss: 0.01716697, Time: 0.0471 Steps: 31800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 011680, Sample Num: 186880, Cur Loss: 0.00000639, Cur Avg Loss: 0.00754560, Log Avg loss: 0.00427351, Global Avg Loss: 0.01708639, Time: 0.1389 Steps: 32000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 011880, Sample Num: 190080, Cur Loss: 0.00022482, Cur Avg Loss: 0.00757847, Log Avg loss: 0.00949772, Global Avg Loss: 0.01703925, Time: 0.0343 Steps: 32200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012080, Sample Num: 193280, Cur Loss: 0.00013045, Cur Avg Loss: 0.00753761, Log Avg loss: 0.00511060, Global Avg Loss: 0.01696562, Time: 0.0579 Steps: 32400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012280, Sample Num: 196480, Cur Loss: 0.00012685, Cur Avg Loss: 0.00754909, Log Avg loss: 0.00824280, Global Avg Loss: 0.01691210, Time: 0.0243 Steps: 32600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012480, Sample Num: 199680, Cur Loss: 0.00003416, Cur Avg Loss: 0.00749720, Log Avg loss: 0.00431088, Global Avg Loss: 0.01683527, Time: 0.0251 Steps: 32800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012680, Sample Num: 202880, Cur Loss: 0.00003461, Cur Avg Loss: 0.00746928, Log Avg loss: 0.00572744, Global Avg Loss: 0.01676795, Time: 0.0760 Steps: 33000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012880, Sample Num: 206080, Cur Loss: 0.00000086, Cur Avg Loss: 0.00742663, Log Avg loss: 0.00472254, Global Avg Loss: 0.01669539, Time: 0.0744 Steps: 33200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013080, Sample Num: 209280, Cur Loss: 0.00001167, Cur Avg Loss: 0.00742743, Log Avg loss: 0.00747873, Global Avg Loss: 0.01664020, Time: 0.0398 Steps: 33400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013280, Sample Num: 212480, Cur Loss: 0.00002454, Cur Avg Loss: 0.00736643, Log Avg loss: 0.00337720, Global Avg Loss: 0.01656125, Time: 0.0445 Steps: 33600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013480, Sample Num: 215680, Cur Loss: 0.00817338, Cur Avg Loss: 0.00731017, Log Avg loss: 0.00357449, Global Avg Loss: 0.01648440, Time: 0.0659 Steps: 33800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 013680, Sample Num: 218880, Cur Loss: 0.00001357, Cur Avg Loss: 0.00727691, Log Avg loss: 0.00503504, Global Avg Loss: 0.01641706, Time: 0.0351 Steps: 34000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 013880, Sample Num: 222080, Cur Loss: 0.00017716, Cur Avg Loss: 0.00725449, Log Avg loss: 0.00572083, Global Avg Loss: 0.01635450, Time: 0.0395 Steps: 34200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014080, Sample Num: 225280, Cur Loss: 0.00000349, Cur Avg Loss: 0.00718452, Log Avg loss: 0.00232889, Global Avg Loss: 0.01627296, Time: 0.0958 Steps: 34400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014280, Sample Num: 228480, Cur Loss: 0.00001663, Cur Avg Loss: 0.00723267, Log Avg loss: 0.01062257, Global Avg Loss: 0.01624030, Time: 0.2149 Steps: 34600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014480, Sample Num: 231680, Cur Loss: 0.00012008, Cur Avg Loss: 0.00720104, Log Avg loss: 0.00494249, Global Avg Loss: 0.01617537, Time: 0.0660 Steps: 34800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014680, Sample Num: 234880, Cur Loss: 0.00013490, Cur Avg Loss: 0.00723203, Log Avg loss: 0.00947540, Global Avg Loss: 0.01613708, Time: 0.1061 Steps: 35000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014880, Sample Num: 238080, Cur Loss: 0.00005538, Cur Avg Loss: 0.00722308, Log Avg loss: 0.00656667, Global Avg Loss: 0.01608271, Time: 0.1125 Steps: 35200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015080, Sample Num: 241280, Cur Loss: 0.00002767, Cur Avg Loss: 0.00718211, Log Avg loss: 0.00413359, Global Avg Loss: 0.01601520, Time: 0.0496 Steps: 35400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015280, Sample Num: 244480, Cur Loss: 0.00000595, Cur Avg Loss: 0.00713278, Log Avg loss: 0.00341324, Global Avg Loss: 0.01594440, Time: 0.0573 Steps: 35600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015480, Sample Num: 247680, Cur Loss: 0.00262859, Cur Avg Loss: 0.00710381, Log Avg loss: 0.00489048, Global Avg Loss: 0.01588265, Time: 0.1911 Steps: 35800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 015680, Sample Num: 250880, Cur Loss: 0.00001781, Cur Avg Loss: 0.00704642, Log Avg loss: 0.00260420, Global Avg Loss: 0.01580888, Time: 0.0659 Steps: 36000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 015880, Sample Num: 254080, Cur Loss: 0.00001397, Cur Avg Loss: 0.00699794, Log Avg loss: 0.00319757, Global Avg Loss: 0.01573920, Time: 0.2035 Steps: 36200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016080, Sample Num: 257280, Cur Loss: 0.00001278, Cur Avg Loss: 0.00702742, Log Avg loss: 0.00936807, Global Avg Loss: 0.01570419, Time: 0.1061 Steps: 36400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016280, Sample Num: 260480, Cur Loss: 0.00000737, Cur Avg Loss: 0.00697476, Log Avg loss: 0.00274054, Global Avg Loss: 0.01563335, Time: 0.0647 Steps: 36600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016480, Sample Num: 263680, Cur Loss: 0.00016497, Cur Avg Loss: 0.00691647, Log Avg loss: 0.00217184, Global Avg Loss: 0.01556019, Time: 0.1408 Steps: 36800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016680, Sample Num: 266880, Cur Loss: 0.00000805, Cur Avg Loss: 0.00689101, Log Avg loss: 0.00479354, Global Avg Loss: 0.01550200, Time: 0.0456 Steps: 37000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016880, Sample Num: 270080, Cur Loss: 0.00000168, Cur Avg Loss: 0.00684388, Log Avg loss: 0.00291323, Global Avg Loss: 0.01543431, Time: 0.1432 Steps: 37200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017080, Sample Num: 273280, Cur Loss: 0.00002298, Cur Avg Loss: 0.00681333, Log Avg loss: 0.00423465, Global Avg Loss: 0.01537442, Time: 0.0564 Steps: 37400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017280, Sample Num: 276480, Cur Loss: 0.00008201, Cur Avg Loss: 0.00686952, Log Avg loss: 0.01166820, Global Avg Loss: 0.01535471, Time: 0.1807 Steps: 37600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017480, Sample Num: 279680, Cur Loss: 0.00127860, Cur Avg Loss: 0.00684512, Log Avg loss: 0.00473702, Global Avg Loss: 0.01529853, Time: 0.1452 Steps: 37800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 017680, Sample Num: 282880, Cur Loss: 0.00018932, Cur Avg Loss: 0.00683552, Log Avg loss: 0.00599677, Global Avg Loss: 0.01524957, Time: 0.1410 Steps: 38000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 017880, Sample Num: 286080, Cur Loss: 0.00000480, Cur Avg Loss: 0.00680180, Log Avg loss: 0.00382077, Global Avg Loss: 0.01518974, Time: 0.0645 Steps: 38200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018080, Sample Num: 289280, Cur Loss: 0.00000084, Cur Avg Loss: 0.00674659, Log Avg loss: 0.00181103, Global Avg Loss: 0.01512006, Time: 0.1491 Steps: 38400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018280, Sample Num: 292480, Cur Loss: 0.00017271, Cur Avg Loss: 0.00673038, Log Avg loss: 0.00526454, Global Avg Loss: 0.01506899, Time: 0.0443 Steps: 38600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018480, Sample Num: 295680, Cur Loss: 0.00000689, Cur Avg Loss: 0.00666850, Log Avg loss: 0.00101237, Global Avg Loss: 0.01499654, Time: 0.1184 Steps: 38800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018680, Sample Num: 298880, Cur Loss: 0.00000507, Cur Avg Loss: 0.00662774, Log Avg loss: 0.00286226, Global Avg Loss: 0.01493431, Time: 0.0385 Steps: 39000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018880, Sample Num: 302080, Cur Loss: 0.00000230, Cur Avg Loss: 0.00661778, Log Avg loss: 0.00568685, Global Avg Loss: 0.01488713, Time: 0.1952 Steps: 39200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019080, Sample Num: 305280, Cur Loss: 0.00000177, Cur Avg Loss: 0.00664737, Log Avg loss: 0.00944074, Global Avg Loss: 0.01485948, Time: 0.0582 Steps: 39400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019280, Sample Num: 308480, Cur Loss: 0.00003936, Cur Avg Loss: 0.00663716, Log Avg loss: 0.00566367, Global Avg Loss: 0.01481304, Time: 0.0247 Steps: 39600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019480, Sample Num: 311680, Cur Loss: 0.00003087, Cur Avg Loss: 0.00663500, Log Avg loss: 0.00642626, Global Avg Loss: 0.01477089, Time: 0.0582 Steps: 39800, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 019680, Sample Num: 314880, Cur Loss: 0.00000725, Cur Avg Loss: 0.00662079, Log Avg loss: 0.00523658, Global Avg Loss: 0.01472322, Time: 0.0443 Steps: 40000, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 019880, Sample Num: 318080, Cur Loss: 0.00020370, Cur Avg Loss: 0.00658777, Log Avg loss: 0.00333927, Global Avg Loss: 0.01466658, Time: 0.1004 Steps: 40200, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 020080, Sample Num: 321280, Cur Loss: 0.00004678, Cur Avg Loss: 0.00653693, Log Avg loss: 0.00148287, Global Avg Loss: 0.01460132, Time: 0.0399 Steps: 40400, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 020280, Sample Num: 324480, Cur Loss: 0.03668340, Cur Avg Loss: 0.00657100, Log Avg loss: 0.00999204, Global Avg Loss: 0.01457861, Time: 0.0408 Steps: 40600, Updated lr: 0.000080 ***** Running evaluation checkpoint-40640 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-40640 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1893.408566, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.010647, "eval_total_loss": 28.854183, "eval_acc": 0.997624, "eval_prec": 0.997377, "eval_recall": 0.997882, "eval_f1": 0.99763, "eval_roc_auc": 0.999874, "eval_pr_auc": 0.999817, "eval_confusion_matrix": {"tn": 21570, "fp": 57, "fn": 46, "tp": 21676}, "eval_mcc2": 0.995248, "eval_mcc": 0.995248, "eval_sn": 0.997882, "eval_sp": 0.997364, "update_flag": true, "test_avg_loss": 0.009618, "test_total_loss": 39.086802, "test_acc": 0.997755, "test_prec": 0.997081, "test_recall": 0.998431, "test_f1": 0.997756, "test_roc_auc": 0.999897, "test_pr_auc": 0.999861, "test_confusion_matrix": {"tn": 32422, "fp": 95, "fn": 51, "tp": 32454}, "test_mcc2": 0.99551, "test_mcc": 0.99551, "test_sn": 0.998431, "test_sp": 0.997078, "lr": 8.007881773399016e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.014574004338816428, "train_cur_epoch_loss": 133.65578143421567, "train_cur_epoch_avg_loss": 0.006577548298927936, "train_cur_epoch_time": 1893.408566236496, "train_cur_epoch_avg_time": 0.0931795554250244, "epoch": 2, "step": 40640} ################################################## Training, Epoch: 0003, Batch: 000160, Sample Num: 2560, Cur Loss: 0.00000571, Cur Avg Loss: 0.00255555, Log Avg loss: 0.00402369, Global Avg Loss: 0.01452687, Time: 0.0555 Steps: 40800, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000360, Sample Num: 5760, Cur Loss: 0.00002316, Cur Avg Loss: 0.00390479, Log Avg loss: 0.00498418, Global Avg Loss: 0.01448032, Time: 0.0440 Steps: 41000, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00000198, Cur Avg Loss: 0.00328235, Log Avg loss: 0.00216195, Global Avg Loss: 0.01442053, Time: 0.0669 Steps: 41200, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000760, Sample Num: 12160, Cur Loss: 0.00002536, Cur Avg Loss: 0.00362933, Log Avg loss: 0.00460087, Global Avg Loss: 0.01437309, Time: 0.1027 Steps: 41400, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000960, Sample Num: 15360, Cur Loss: 0.00000149, Cur Avg Loss: 0.00319718, Log Avg loss: 0.00155502, Global Avg Loss: 0.01431146, Time: 0.0492 Steps: 41600, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 001160, Sample Num: 18560, Cur Loss: 0.00000237, Cur Avg Loss: 0.00268841, Log Avg loss: 0.00024633, Global Avg Loss: 0.01424416, Time: 0.1020 Steps: 41800, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00001705, Cur Avg Loss: 0.00237566, Log Avg loss: 0.00056167, Global Avg Loss: 0.01417901, Time: 0.0267 Steps: 42000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001560, Sample Num: 24960, Cur Loss: 0.00000183, Cur Avg Loss: 0.00234105, Log Avg loss: 0.00210571, Global Avg Loss: 0.01412179, Time: 0.0560 Steps: 42200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001760, Sample Num: 28160, Cur Loss: 0.00000329, Cur Avg Loss: 0.00230728, Log Avg loss: 0.00204392, Global Avg Loss: 0.01406482, Time: 0.0615 Steps: 42400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001960, Sample Num: 31360, Cur Loss: 0.00000020, Cur Avg Loss: 0.00210204, Log Avg loss: 0.00029593, Global Avg Loss: 0.01400018, Time: 0.0624 Steps: 42600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002160, Sample Num: 34560, Cur Loss: 0.00000408, Cur Avg Loss: 0.00281659, Log Avg loss: 0.00981919, Global Avg Loss: 0.01398064, Time: 0.1450 Steps: 42800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00000013, Cur Avg Loss: 0.00259285, Log Avg loss: 0.00017639, Global Avg Loss: 0.01391643, Time: 0.1112 Steps: 43000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002560, Sample Num: 40960, Cur Loss: 0.00000669, Cur Avg Loss: 0.00251843, Log Avg loss: 0.00164031, Global Avg Loss: 0.01385960, Time: 0.1571 Steps: 43200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002760, Sample Num: 44160, Cur Loss: 0.00008734, Cur Avg Loss: 0.00278484, Log Avg loss: 0.00619495, Global Avg Loss: 0.01382428, Time: 0.1375 Steps: 43400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002960, Sample Num: 47360, Cur Loss: 0.00000209, Cur Avg Loss: 0.00302344, Log Avg loss: 0.00631606, Global Avg Loss: 0.01378984, Time: 0.0431 Steps: 43600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00000018, Cur Avg Loss: 0.00319465, Log Avg loss: 0.00572861, Global Avg Loss: 0.01375303, Time: 0.0566 Steps: 43800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 003360, Sample Num: 53760, Cur Loss: 0.00002494, Cur Avg Loss: 0.00316265, Log Avg loss: 0.00265693, Global Avg Loss: 0.01370259, Time: 0.0645 Steps: 44000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003560, Sample Num: 56960, Cur Loss: 0.00002285, Cur Avg Loss: 0.00320551, Log Avg loss: 0.00392558, Global Avg Loss: 0.01365835, Time: 0.0387 Steps: 44200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003760, Sample Num: 60160, Cur Loss: 0.00000409, Cur Avg Loss: 0.00320432, Log Avg loss: 0.00318321, Global Avg Loss: 0.01361117, Time: 0.0990 Steps: 44400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003960, Sample Num: 63360, Cur Loss: 0.00000368, Cur Avg Loss: 0.00313452, Log Avg loss: 0.00182225, Global Avg Loss: 0.01355830, Time: 0.0249 Steps: 44600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00000188, Cur Avg Loss: 0.00312975, Log Avg loss: 0.00303527, Global Avg Loss: 0.01351132, Time: 0.0678 Steps: 44800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004360, Sample Num: 69760, Cur Loss: 0.00017646, Cur Avg Loss: 0.00325097, Log Avg loss: 0.00577235, Global Avg Loss: 0.01347693, Time: 0.1467 Steps: 45000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004560, Sample Num: 72960, Cur Loss: 0.00000239, Cur Avg Loss: 0.00338706, Log Avg loss: 0.00635381, Global Avg Loss: 0.01344541, Time: 0.0600 Steps: 45200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004760, Sample Num: 76160, Cur Loss: 0.00000037, Cur Avg Loss: 0.00336869, Log Avg loss: 0.00294988, Global Avg Loss: 0.01339917, Time: 0.0676 Steps: 45400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004960, Sample Num: 79360, Cur Loss: 0.00011297, Cur Avg Loss: 0.00330890, Log Avg loss: 0.00188605, Global Avg Loss: 0.01334868, Time: 0.0622 Steps: 45600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 005160, Sample Num: 82560, Cur Loss: 0.00000777, Cur Avg Loss: 0.00329600, Log Avg loss: 0.00297590, Global Avg Loss: 0.01330338, Time: 0.1232 Steps: 45800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 005360, Sample Num: 85760, Cur Loss: 0.00000080, Cur Avg Loss: 0.00332603, Log Avg loss: 0.00410080, Global Avg Loss: 0.01326337, Time: 0.0986 Steps: 46000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00001123, Cur Avg Loss: 0.00323420, Log Avg loss: 0.00077328, Global Avg Loss: 0.01320930, Time: 0.1289 Steps: 46200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005760, Sample Num: 92160, Cur Loss: 0.00000044, Cur Avg Loss: 0.00332156, Log Avg loss: 0.00575025, Global Avg Loss: 0.01317715, Time: 0.1033 Steps: 46400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00004817, Cur Avg Loss: 0.00352688, Log Avg loss: 0.00943989, Global Avg Loss: 0.01316111, Time: 0.1829 Steps: 46600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006160, Sample Num: 98560, Cur Loss: 0.00001614, Cur Avg Loss: 0.00367562, Log Avg loss: 0.00810821, Global Avg Loss: 0.01313952, Time: 0.1660 Steps: 46800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006360, Sample Num: 101760, Cur Loss: 0.00002288, Cur Avg Loss: 0.00356459, Log Avg loss: 0.00014481, Global Avg Loss: 0.01308422, Time: 0.1713 Steps: 47000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006560, Sample Num: 104960, Cur Loss: 0.00001381, Cur Avg Loss: 0.00350512, Log Avg loss: 0.00161398, Global Avg Loss: 0.01303562, Time: 0.1017 Steps: 47200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006760, Sample Num: 108160, Cur Loss: 0.00042199, Cur Avg Loss: 0.00365190, Log Avg loss: 0.00846626, Global Avg Loss: 0.01301634, Time: 0.1131 Steps: 47400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006960, Sample Num: 111360, Cur Loss: 0.00004420, Cur Avg Loss: 0.00361047, Log Avg loss: 0.00221013, Global Avg Loss: 0.01297093, Time: 0.1110 Steps: 47600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 007160, Sample Num: 114560, Cur Loss: 0.00002547, Cur Avg Loss: 0.00360903, Log Avg loss: 0.00355908, Global Avg Loss: 0.01293155, Time: 0.0312 Steps: 47800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 007360, Sample Num: 117760, Cur Loss: 0.00000033, Cur Avg Loss: 0.00361815, Log Avg loss: 0.00394460, Global Avg Loss: 0.01289411, Time: 0.1441 Steps: 48000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007560, Sample Num: 120960, Cur Loss: 0.00012320, Cur Avg Loss: 0.00364929, Log Avg loss: 0.00479505, Global Avg Loss: 0.01286050, Time: 0.1032 Steps: 48200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00000143, Cur Avg Loss: 0.00368379, Log Avg loss: 0.00498792, Global Avg Loss: 0.01282797, Time: 0.0987 Steps: 48400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007960, Sample Num: 127360, Cur Loss: 0.00002826, Cur Avg Loss: 0.00390159, Log Avg loss: 0.01235246, Global Avg Loss: 0.01282601, Time: 0.1154 Steps: 48600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00001009, Cur Avg Loss: 0.00386172, Log Avg loss: 0.00227487, Global Avg Loss: 0.01278277, Time: 0.0746 Steps: 48800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00169606, Cur Avg Loss: 0.00385725, Log Avg loss: 0.00367482, Global Avg Loss: 0.01274560, Time: 0.0262 Steps: 49000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008560, Sample Num: 136960, Cur Loss: 0.00000060, Cur Avg Loss: 0.00385492, Log Avg loss: 0.00375765, Global Avg Loss: 0.01270906, Time: 0.0515 Steps: 49200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008760, Sample Num: 140160, Cur Loss: 0.00000697, Cur Avg Loss: 0.00377972, Log Avg loss: 0.00056090, Global Avg Loss: 0.01265988, Time: 0.0477 Steps: 49400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008960, Sample Num: 143360, Cur Loss: 0.00000180, Cur Avg Loss: 0.00383824, Log Avg loss: 0.00640158, Global Avg Loss: 0.01263464, Time: 0.0589 Steps: 49600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 009160, Sample Num: 146560, Cur Loss: 0.00000194, Cur Avg Loss: 0.00381776, Log Avg loss: 0.00290011, Global Avg Loss: 0.01259555, Time: 0.2128 Steps: 49800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00000027, Cur Avg Loss: 0.00373848, Log Avg loss: 0.00010750, Global Avg Loss: 0.01254559, Time: 0.0584 Steps: 50000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009560, Sample Num: 152960, Cur Loss: 0.00145237, Cur Avg Loss: 0.00373617, Log Avg loss: 0.00362820, Global Avg Loss: 0.01251007, Time: 0.0193 Steps: 50200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009760, Sample Num: 156160, Cur Loss: 0.00000015, Cur Avg Loss: 0.00371296, Log Avg loss: 0.00260335, Global Avg Loss: 0.01247075, Time: 0.0687 Steps: 50400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009960, Sample Num: 159360, Cur Loss: 0.00001159, Cur Avg Loss: 0.00373259, Log Avg loss: 0.00469044, Global Avg Loss: 0.01244000, Time: 0.0608 Steps: 50600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010160, Sample Num: 162560, Cur Loss: 0.00000113, Cur Avg Loss: 0.00366162, Log Avg loss: 0.00012755, Global Avg Loss: 0.01239153, Time: 0.0314 Steps: 50800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00000044, Cur Avg Loss: 0.00363126, Log Avg loss: 0.00208874, Global Avg Loss: 0.01235112, Time: 0.0399 Steps: 51000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010560, Sample Num: 168960, Cur Loss: 0.00000384, Cur Avg Loss: 0.00367141, Log Avg loss: 0.00575131, Global Avg Loss: 0.01232534, Time: 0.0527 Steps: 51200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010760, Sample Num: 172160, Cur Loss: 0.00000218, Cur Avg Loss: 0.00363648, Log Avg loss: 0.00179200, Global Avg Loss: 0.01228436, Time: 0.1067 Steps: 51400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010960, Sample Num: 175360, Cur Loss: 0.00000656, Cur Avg Loss: 0.00357598, Log Avg loss: 0.00032131, Global Avg Loss: 0.01223799, Time: 0.0406 Steps: 51600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00000587, Cur Avg Loss: 0.00361916, Log Avg loss: 0.00598509, Global Avg Loss: 0.01221385, Time: 0.1098 Steps: 51800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 011360, Sample Num: 181760, Cur Loss: 0.00001154, Cur Avg Loss: 0.00368592, Log Avg loss: 0.00741139, Global Avg Loss: 0.01219538, Time: 0.0275 Steps: 52000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00000054, Cur Avg Loss: 0.00368289, Log Avg loss: 0.00351092, Global Avg Loss: 0.01216210, Time: 0.1466 Steps: 52200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00000154, Cur Avg Loss: 0.00362348, Log Avg loss: 0.00018964, Global Avg Loss: 0.01211641, Time: 0.1799 Steps: 52400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011960, Sample Num: 191360, Cur Loss: 0.00000028, Cur Avg Loss: 0.00364550, Log Avg loss: 0.00494014, Global Avg Loss: 0.01208912, Time: 0.1499 Steps: 52600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012160, Sample Num: 194560, Cur Loss: 0.00000186, Cur Avg Loss: 0.00359755, Log Avg loss: 0.00072986, Global Avg Loss: 0.01204609, Time: 0.0721 Steps: 52800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012360, Sample Num: 197760, Cur Loss: 0.00000017, Cur Avg Loss: 0.00364541, Log Avg loss: 0.00655570, Global Avg Loss: 0.01202537, Time: 0.0556 Steps: 53000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012560, Sample Num: 200960, Cur Loss: 0.00000016, Cur Avg Loss: 0.00362895, Log Avg loss: 0.00261166, Global Avg Loss: 0.01198998, Time: 0.2155 Steps: 53200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012760, Sample Num: 204160, Cur Loss: 0.00000113, Cur Avg Loss: 0.00361711, Log Avg loss: 0.00287342, Global Avg Loss: 0.01195584, Time: 0.1335 Steps: 53400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012960, Sample Num: 207360, Cur Loss: 0.00000025, Cur Avg Loss: 0.00359364, Log Avg loss: 0.00209637, Global Avg Loss: 0.01191905, Time: 0.0532 Steps: 53600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 013160, Sample Num: 210560, Cur Loss: 0.00001465, Cur Avg Loss: 0.00359255, Log Avg loss: 0.00352190, Global Avg Loss: 0.01188783, Time: 0.0442 Steps: 53800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 013360, Sample Num: 213760, Cur Loss: 0.00000020, Cur Avg Loss: 0.00353975, Log Avg loss: 0.00006555, Global Avg Loss: 0.01184405, Time: 0.1019 Steps: 54000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013560, Sample Num: 216960, Cur Loss: 0.00001029, Cur Avg Loss: 0.00349468, Log Avg loss: 0.00048375, Global Avg Loss: 0.01180213, Time: 0.2559 Steps: 54200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013760, Sample Num: 220160, Cur Loss: 0.00000207, Cur Avg Loss: 0.00345456, Log Avg loss: 0.00073481, Global Avg Loss: 0.01176144, Time: 0.1997 Steps: 54400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013960, Sample Num: 223360, Cur Loss: 0.00001399, Cur Avg Loss: 0.00344817, Log Avg loss: 0.00300854, Global Avg Loss: 0.01172938, Time: 0.1232 Steps: 54600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014160, Sample Num: 226560, Cur Loss: 0.00000048, Cur Avg Loss: 0.00340209, Log Avg loss: 0.00018540, Global Avg Loss: 0.01168725, Time: 0.1006 Steps: 54800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014360, Sample Num: 229760, Cur Loss: 0.00000162, Cur Avg Loss: 0.00340569, Log Avg loss: 0.00366046, Global Avg Loss: 0.01165806, Time: 0.0634 Steps: 55000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014560, Sample Num: 232960, Cur Loss: 0.00000407, Cur Avg Loss: 0.00338453, Log Avg loss: 0.00186506, Global Avg Loss: 0.01162258, Time: 0.0558 Steps: 55200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014760, Sample Num: 236160, Cur Loss: 0.00000376, Cur Avg Loss: 0.00346531, Log Avg loss: 0.00934614, Global Avg Loss: 0.01161436, Time: 0.0374 Steps: 55400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014960, Sample Num: 239360, Cur Loss: 0.00000021, Cur Avg Loss: 0.00345651, Log Avg loss: 0.00280724, Global Avg Loss: 0.01158268, Time: 0.2230 Steps: 55600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015160, Sample Num: 242560, Cur Loss: 0.00000021, Cur Avg Loss: 0.00341212, Log Avg loss: 0.00009157, Global Avg Loss: 0.01154149, Time: 0.1562 Steps: 55800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015360, Sample Num: 245760, Cur Loss: 0.00000642, Cur Avg Loss: 0.00338451, Log Avg loss: 0.00129211, Global Avg Loss: 0.01150489, Time: 0.1471 Steps: 56000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015560, Sample Num: 248960, Cur Loss: 0.00000035, Cur Avg Loss: 0.00334271, Log Avg loss: 0.00013254, Global Avg Loss: 0.01146442, Time: 0.0983 Steps: 56200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 015760, Sample Num: 252160, Cur Loss: 0.00000007, Cur Avg Loss: 0.00333258, Log Avg loss: 0.00254428, Global Avg Loss: 0.01143278, Time: 0.0269 Steps: 56400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 015960, Sample Num: 255360, Cur Loss: 0.00004863, Cur Avg Loss: 0.00334515, Log Avg loss: 0.00433532, Global Avg Loss: 0.01140770, Time: 0.0553 Steps: 56600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016160, Sample Num: 258560, Cur Loss: 0.00000026, Cur Avg Loss: 0.00333773, Log Avg loss: 0.00274610, Global Avg Loss: 0.01137721, Time: 0.0483 Steps: 56800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016360, Sample Num: 261760, Cur Loss: 0.00000081, Cur Avg Loss: 0.00329744, Log Avg loss: 0.00004165, Global Avg Loss: 0.01133743, Time: 0.1729 Steps: 57000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016560, Sample Num: 264960, Cur Loss: 0.00000071, Cur Avg Loss: 0.00328134, Log Avg loss: 0.00196475, Global Avg Loss: 0.01130466, Time: 0.0496 Steps: 57200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016760, Sample Num: 268160, Cur Loss: 0.00000010, Cur Avg Loss: 0.00325049, Log Avg loss: 0.00069574, Global Avg Loss: 0.01126770, Time: 0.2441 Steps: 57400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016960, Sample Num: 271360, Cur Loss: 0.00000004, Cur Avg Loss: 0.00322380, Log Avg loss: 0.00098752, Global Avg Loss: 0.01123200, Time: 0.1154 Steps: 57600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017160, Sample Num: 274560, Cur Loss: 0.00000007, Cur Avg Loss: 0.00328148, Log Avg loss: 0.00817235, Global Avg Loss: 0.01122141, Time: 0.1012 Steps: 57800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017360, Sample Num: 277760, Cur Loss: 0.00000009, Cur Avg Loss: 0.00327021, Log Avg loss: 0.00230353, Global Avg Loss: 0.01119066, Time: 0.0234 Steps: 58000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017560, Sample Num: 280960, Cur Loss: 0.00006788, Cur Avg Loss: 0.00325428, Log Avg loss: 0.00187131, Global Avg Loss: 0.01115864, Time: 0.1819 Steps: 58200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 017760, Sample Num: 284160, Cur Loss: 0.00000008, Cur Avg Loss: 0.00324246, Log Avg loss: 0.00220489, Global Avg Loss: 0.01112797, Time: 0.0367 Steps: 58400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 017960, Sample Num: 287360, Cur Loss: 0.00000053, Cur Avg Loss: 0.00320678, Log Avg loss: 0.00003839, Global Avg Loss: 0.01109012, Time: 0.0553 Steps: 58600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018160, Sample Num: 290560, Cur Loss: 0.00000033, Cur Avg Loss: 0.00317191, Log Avg loss: 0.00004074, Global Avg Loss: 0.01105254, Time: 0.1154 Steps: 58800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018360, Sample Num: 293760, Cur Loss: 0.00002012, Cur Avg Loss: 0.00314000, Log Avg loss: 0.00024279, Global Avg Loss: 0.01101590, Time: 0.0542 Steps: 59000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018560, Sample Num: 296960, Cur Loss: 0.00000007, Cur Avg Loss: 0.00312426, Log Avg loss: 0.00167919, Global Avg Loss: 0.01098436, Time: 0.1228 Steps: 59200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018760, Sample Num: 300160, Cur Loss: 0.00000009, Cur Avg Loss: 0.00309155, Log Avg loss: 0.00005572, Global Avg Loss: 0.01094756, Time: 0.0967 Steps: 59400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018960, Sample Num: 303360, Cur Loss: 0.00000001, Cur Avg Loss: 0.00309602, Log Avg loss: 0.00351522, Global Avg Loss: 0.01092262, Time: 0.0568 Steps: 59600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019160, Sample Num: 306560, Cur Loss: 0.00102859, Cur Avg Loss: 0.00311481, Log Avg loss: 0.00489657, Global Avg Loss: 0.01090246, Time: 0.1848 Steps: 59800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019360, Sample Num: 309760, Cur Loss: 0.00000001, Cur Avg Loss: 0.00309643, Log Avg loss: 0.00133502, Global Avg Loss: 0.01087057, Time: 0.0589 Steps: 60000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019560, Sample Num: 312960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00306690, Log Avg loss: 0.00020893, Global Avg Loss: 0.01083515, Time: 0.1416 Steps: 60200, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 019760, Sample Num: 316160, Cur Loss: 0.00000004, Cur Avg Loss: 0.00306809, Log Avg loss: 0.00318379, Global Avg Loss: 0.01080982, Time: 0.1510 Steps: 60400, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 019960, Sample Num: 319360, Cur Loss: 0.00002265, Cur Avg Loss: 0.00304105, Log Avg loss: 0.00036956, Global Avg Loss: 0.01077536, Time: 0.0428 Steps: 60600, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 020160, Sample Num: 322560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00301185, Log Avg loss: 0.00009781, Global Avg Loss: 0.01074024, Time: 0.1723 Steps: 60800, Updated lr: 0.000070 ***** Running evaluation checkpoint-60960 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-60960 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1790.323511, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.014607, "eval_total_loss": 39.583814, "eval_acc": 0.997693, "eval_prec": 0.996783, "eval_recall": 0.998619, "eval_f1": 0.9977, "eval_roc_auc": 0.999834, "eval_pr_auc": 0.999729, "eval_confusion_matrix": {"tn": 21557, "fp": 70, "fn": 30, "tp": 21692}, "eval_mcc2": 0.995388, "eval_mcc": 0.995388, "eval_sn": 0.998619, "eval_sp": 0.996763, "update_flag": true, "test_avg_loss": 0.012258, "test_total_loss": 49.816203, "test_acc": 0.997832, "test_prec": 0.99696, "test_recall": 0.998708, "test_f1": 0.997833, "test_roc_auc": 0.999908, "test_pr_auc": 0.999863, "test_confusion_matrix": {"tn": 32418, "fp": 99, "fn": 42, "tp": 32463}, "test_mcc2": 0.995665, "test_mcc": 0.995665, "test_sn": 0.998708, "test_sp": 0.996955, "lr": 7.006896551724137e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.010741444921274077, "train_cur_epoch_loss": 62.51094607134943, "train_cur_epoch_avg_loss": 0.003076326086188456, "train_cur_epoch_time": 1790.3235111236572, "train_cur_epoch_avg_time": 0.08810647200411699, "epoch": 3, "step": 60960} ################################################## Training, Epoch: 0004, Batch: 000040, Sample Num: 640, Cur Loss: 0.00000009, Cur Avg Loss: 0.00012704, Log Avg loss: 0.00898592, Global Avg Loss: 0.01073448, Time: 0.1008 Steps: 61000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00000096, Cur Avg Loss: 0.00009490, Log Avg loss: 0.00008848, Global Avg Loss: 0.01069969, Time: 0.0411 Steps: 61200, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000440, Sample Num: 7040, Cur Loss: 0.00000487, Cur Avg Loss: 0.00010911, Log Avg loss: 0.00012617, Global Avg Loss: 0.01066525, Time: 0.0515 Steps: 61400, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000640, Sample Num: 10240, Cur Loss: 0.00000001, Cur Avg Loss: 0.00024553, Log Avg loss: 0.00054563, Global Avg Loss: 0.01063240, Time: 0.1276 Steps: 61600, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000840, Sample Num: 13440, Cur Loss: 0.00000055, Cur Avg Loss: 0.00023977, Log Avg loss: 0.00022137, Global Avg Loss: 0.01059870, Time: 0.0623 Steps: 61800, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00000001, Cur Avg Loss: 0.00022986, Log Avg loss: 0.00018822, Global Avg Loss: 0.01056512, Time: 0.0385 Steps: 62000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 001240, Sample Num: 19840, Cur Loss: 0.00000002, Cur Avg Loss: 0.00019971, Log Avg loss: 0.00004290, Global Avg Loss: 0.01053129, Time: 0.1073 Steps: 62200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00017328, Log Avg loss: 0.00000948, Global Avg Loss: 0.01049756, Time: 0.0410 Steps: 62400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00000004, Cur Avg Loss: 0.00015734, Log Avg loss: 0.00004256, Global Avg Loss: 0.01046416, Time: 0.0233 Steps: 62600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001840, Sample Num: 29440, Cur Loss: 0.00000101, Cur Avg Loss: 0.00029158, Log Avg loss: 0.00139236, Global Avg Loss: 0.01043527, Time: 0.0462 Steps: 62800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00000186, Cur Avg Loss: 0.00081512, Log Avg loss: 0.00563167, Global Avg Loss: 0.01042002, Time: 0.1015 Steps: 63000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002240, Sample Num: 35840, Cur Loss: 0.00000001, Cur Avg Loss: 0.00085807, Log Avg loss: 0.00129616, Global Avg Loss: 0.01039115, Time: 0.1347 Steps: 63200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002440, Sample Num: 39040, Cur Loss: 0.00000004, Cur Avg Loss: 0.00082468, Log Avg loss: 0.00045075, Global Avg Loss: 0.01035979, Time: 0.1356 Steps: 63400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002640, Sample Num: 42240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106878, Log Avg loss: 0.00404674, Global Avg Loss: 0.01033994, Time: 0.0311 Steps: 63600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00114892, Log Avg loss: 0.00220684, Global Avg Loss: 0.01031444, Time: 0.1046 Steps: 63800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00000006, Cur Avg Loss: 0.00152677, Log Avg loss: 0.00689221, Global Avg Loss: 0.01030375, Time: 0.0431 Steps: 64000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 003240, Sample Num: 51840, Cur Loss: 0.00000030, Cur Avg Loss: 0.00156656, Log Avg loss: 0.00217127, Global Avg Loss: 0.01027841, Time: 0.0578 Steps: 64200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003440, Sample Num: 55040, Cur Loss: 0.00002714, Cur Avg Loss: 0.00150689, Log Avg loss: 0.00054036, Global Avg Loss: 0.01024817, Time: 0.1011 Steps: 64400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003640, Sample Num: 58240, Cur Loss: 0.00000001, Cur Avg Loss: 0.00162713, Log Avg loss: 0.00369513, Global Avg Loss: 0.01022788, Time: 0.0599 Steps: 64600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003840, Sample Num: 61440, Cur Loss: 0.00000012, Cur Avg Loss: 0.00163211, Log Avg loss: 0.00172280, Global Avg Loss: 0.01020163, Time: 0.1764 Steps: 64800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00000003, Cur Avg Loss: 0.00157774, Log Avg loss: 0.00053382, Global Avg Loss: 0.01017189, Time: 0.0354 Steps: 65000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004240, Sample Num: 67840, Cur Loss: 0.00000106, Cur Avg Loss: 0.00151053, Log Avg loss: 0.00015281, Global Avg Loss: 0.01014115, Time: 0.0652 Steps: 65200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004440, Sample Num: 71040, Cur Loss: 0.00000028, Cur Avg Loss: 0.00178898, Log Avg loss: 0.00769222, Global Avg Loss: 0.01013366, Time: 0.1040 Steps: 65400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00000080, Cur Avg Loss: 0.00198098, Log Avg loss: 0.00624334, Global Avg Loss: 0.01012180, Time: 0.0419 Steps: 65600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00000432, Cur Avg Loss: 0.00190699, Log Avg loss: 0.00019047, Global Avg Loss: 0.01009162, Time: 0.1239 Steps: 65800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 005040, Sample Num: 80640, Cur Loss: 0.00000045, Cur Avg Loss: 0.00197481, Log Avg loss: 0.00361593, Global Avg Loss: 0.01007199, Time: 0.0555 Steps: 66000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 005240, Sample Num: 83840, Cur Loss: 0.00415871, Cur Avg Loss: 0.00190450, Log Avg loss: 0.00013289, Global Avg Loss: 0.01004196, Time: 0.1306 Steps: 66200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005440, Sample Num: 87040, Cur Loss: 0.00000005, Cur Avg Loss: 0.00186951, Log Avg loss: 0.00095260, Global Avg Loss: 0.01001459, Time: 0.1065 Steps: 66400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005640, Sample Num: 90240, Cur Loss: 0.00000003, Cur Avg Loss: 0.00184500, Log Avg loss: 0.00117830, Global Avg Loss: 0.00998805, Time: 0.1356 Steps: 66600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005840, Sample Num: 93440, Cur Loss: 0.00000480, Cur Avg Loss: 0.00199677, Log Avg loss: 0.00627688, Global Avg Loss: 0.00997694, Time: 0.0545 Steps: 66800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006040, Sample Num: 96640, Cur Loss: 0.00000007, Cur Avg Loss: 0.00217627, Log Avg loss: 0.00741760, Global Avg Loss: 0.00996930, Time: 0.0401 Steps: 67000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006240, Sample Num: 99840, Cur Loss: 0.00000058, Cur Avg Loss: 0.00211938, Log Avg loss: 0.00040125, Global Avg Loss: 0.00994082, Time: 0.0642 Steps: 67200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006440, Sample Num: 103040, Cur Loss: 0.00000781, Cur Avg Loss: 0.00205995, Log Avg loss: 0.00020564, Global Avg Loss: 0.00991194, Time: 0.0863 Steps: 67400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006640, Sample Num: 106240, Cur Loss: 0.00000206, Cur Avg Loss: 0.00199807, Log Avg loss: 0.00000573, Global Avg Loss: 0.00988263, Time: 0.0964 Steps: 67600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006840, Sample Num: 109440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00203335, Log Avg loss: 0.00320456, Global Avg Loss: 0.00986293, Time: 0.0264 Steps: 67800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007040, Sample Num: 112640, Cur Loss: 0.00011459, Cur Avg Loss: 0.00198378, Log Avg loss: 0.00028840, Global Avg Loss: 0.00983477, Time: 0.1701 Steps: 68000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007240, Sample Num: 115840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00194800, Log Avg loss: 0.00068857, Global Avg Loss: 0.00980795, Time: 0.1751 Steps: 68200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007440, Sample Num: 119040, Cur Loss: 0.00000007, Cur Avg Loss: 0.00201839, Log Avg loss: 0.00456643, Global Avg Loss: 0.00979262, Time: 0.0453 Steps: 68400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 007640, Sample Num: 122240, Cur Loss: 0.00000408, Cur Avg Loss: 0.00203551, Log Avg loss: 0.00267259, Global Avg Loss: 0.00977186, Time: 0.1651 Steps: 68600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 007840, Sample Num: 125440, Cur Loss: 0.00000001, Cur Avg Loss: 0.00213828, Log Avg loss: 0.00606407, Global Avg Loss: 0.00976108, Time: 0.1002 Steps: 68800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008040, Sample Num: 128640, Cur Loss: 0.00000015, Cur Avg Loss: 0.00239210, Log Avg loss: 0.01234183, Global Avg Loss: 0.00976856, Time: 0.0416 Steps: 69000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008240, Sample Num: 131840, Cur Loss: 0.00000011, Cur Avg Loss: 0.00234300, Log Avg loss: 0.00036901, Global Avg Loss: 0.00974140, Time: 0.2118 Steps: 69200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008440, Sample Num: 135040, Cur Loss: 0.00000905, Cur Avg Loss: 0.00233220, Log Avg loss: 0.00188720, Global Avg Loss: 0.00971876, Time: 0.0915 Steps: 69400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008640, Sample Num: 138240, Cur Loss: 0.00000007, Cur Avg Loss: 0.00228286, Log Avg loss: 0.00020095, Global Avg Loss: 0.00969141, Time: 0.0988 Steps: 69600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008840, Sample Num: 141440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00223312, Log Avg loss: 0.00008432, Global Avg Loss: 0.00966389, Time: 0.0525 Steps: 69800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009040, Sample Num: 144640, Cur Loss: 0.00000007, Cur Avg Loss: 0.00229198, Log Avg loss: 0.00489354, Global Avg Loss: 0.00965026, Time: 0.0426 Steps: 70000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009240, Sample Num: 147840, Cur Loss: 0.00003050, Cur Avg Loss: 0.00224401, Log Avg loss: 0.00007593, Global Avg Loss: 0.00962298, Time: 0.0622 Steps: 70200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009440, Sample Num: 151040, Cur Loss: 0.00000118, Cur Avg Loss: 0.00225087, Log Avg loss: 0.00256750, Global Avg Loss: 0.00960294, Time: 0.0441 Steps: 70400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 009640, Sample Num: 154240, Cur Loss: 0.00000051, Cur Avg Loss: 0.00221430, Log Avg loss: 0.00048822, Global Avg Loss: 0.00957711, Time: 0.0972 Steps: 70600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 009840, Sample Num: 157440, Cur Loss: 0.00000060, Cur Avg Loss: 0.00216998, Log Avg loss: 0.00003385, Global Avg Loss: 0.00955016, Time: 0.1834 Steps: 70800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010040, Sample Num: 160640, Cur Loss: 0.00000142, Cur Avg Loss: 0.00212720, Log Avg loss: 0.00002236, Global Avg Loss: 0.00952332, Time: 0.0459 Steps: 71000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010240, Sample Num: 163840, Cur Loss: 0.00000086, Cur Avg Loss: 0.00209463, Log Avg loss: 0.00045987, Global Avg Loss: 0.00949786, Time: 0.2657 Steps: 71200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010440, Sample Num: 167040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00206390, Log Avg loss: 0.00049038, Global Avg Loss: 0.00947263, Time: 0.1977 Steps: 71400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010640, Sample Num: 170240, Cur Loss: 0.00000006, Cur Avg Loss: 0.00210468, Log Avg loss: 0.00423337, Global Avg Loss: 0.00945799, Time: 0.1640 Steps: 71600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010840, Sample Num: 173440, Cur Loss: 0.00000003, Cur Avg Loss: 0.00208576, Log Avg loss: 0.00107944, Global Avg Loss: 0.00943465, Time: 0.0951 Steps: 71800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011040, Sample Num: 176640, Cur Loss: 0.55973798, Cur Avg Loss: 0.00210054, Log Avg loss: 0.00290125, Global Avg Loss: 0.00941651, Time: 0.0398 Steps: 72000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011240, Sample Num: 179840, Cur Loss: 0.00003310, Cur Avg Loss: 0.00234021, Log Avg loss: 0.01557029, Global Avg Loss: 0.00943355, Time: 0.0999 Steps: 72200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011440, Sample Num: 183040, Cur Loss: 0.00000043, Cur Avg Loss: 0.00232513, Log Avg loss: 0.00147753, Global Avg Loss: 0.00941157, Time: 0.0563 Steps: 72400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 011640, Sample Num: 186240, Cur Loss: 0.00000023, Cur Avg Loss: 0.00234178, Log Avg loss: 0.00329438, Global Avg Loss: 0.00939472, Time: 0.0979 Steps: 72600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 011840, Sample Num: 189440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00230476, Log Avg loss: 0.00015022, Global Avg Loss: 0.00936933, Time: 0.1185 Steps: 72800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012040, Sample Num: 192640, Cur Loss: 0.00000019, Cur Avg Loss: 0.00226685, Log Avg loss: 0.00002251, Global Avg Loss: 0.00934372, Time: 0.2024 Steps: 73000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012240, Sample Num: 195840, Cur Loss: 0.00000006, Cur Avg Loss: 0.00227761, Log Avg loss: 0.00292504, Global Avg Loss: 0.00932618, Time: 0.0628 Steps: 73200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012440, Sample Num: 199040, Cur Loss: 0.00000054, Cur Avg Loss: 0.00224329, Log Avg loss: 0.00014333, Global Avg Loss: 0.00930116, Time: 0.0428 Steps: 73400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012640, Sample Num: 202240, Cur Loss: 0.00000091, Cur Avg Loss: 0.00229192, Log Avg loss: 0.00531635, Global Avg Loss: 0.00929033, Time: 0.1425 Steps: 73600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012840, Sample Num: 205440, Cur Loss: 0.00000143, Cur Avg Loss: 0.00226205, Log Avg loss: 0.00037408, Global Avg Loss: 0.00926617, Time: 0.0372 Steps: 73800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013040, Sample Num: 208640, Cur Loss: 0.00000158, Cur Avg Loss: 0.00228714, Log Avg loss: 0.00389848, Global Avg Loss: 0.00925166, Time: 0.1112 Steps: 74000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013240, Sample Num: 211840, Cur Loss: 0.00000018, Cur Avg Loss: 0.00225285, Log Avg loss: 0.00001655, Global Avg Loss: 0.00922677, Time: 0.0699 Steps: 74200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013440, Sample Num: 215040, Cur Loss: 0.00000403, Cur Avg Loss: 0.00221949, Log Avg loss: 0.00001165, Global Avg Loss: 0.00920200, Time: 0.1041 Steps: 74400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 013640, Sample Num: 218240, Cur Loss: 0.00000048, Cur Avg Loss: 0.00219560, Log Avg loss: 0.00059001, Global Avg Loss: 0.00917891, Time: 0.0551 Steps: 74600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 013840, Sample Num: 221440, Cur Loss: 0.00000005, Cur Avg Loss: 0.00220262, Log Avg loss: 0.00268129, Global Avg Loss: 0.00916153, Time: 0.0818 Steps: 74800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014040, Sample Num: 224640, Cur Loss: 0.00000273, Cur Avg Loss: 0.00217495, Log Avg loss: 0.00026026, Global Avg Loss: 0.00913780, Time: 0.0613 Steps: 75000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014240, Sample Num: 227840, Cur Loss: 0.00000001, Cur Avg Loss: 0.00217802, Log Avg loss: 0.00239359, Global Avg Loss: 0.00911986, Time: 0.0423 Steps: 75200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014440, Sample Num: 231040, Cur Loss: 0.00000233, Cur Avg Loss: 0.00214925, Log Avg loss: 0.00010090, Global Avg Loss: 0.00909594, Time: 0.0603 Steps: 75400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014640, Sample Num: 234240, Cur Loss: 0.00000006, Cur Avg Loss: 0.00213181, Log Avg loss: 0.00087208, Global Avg Loss: 0.00907418, Time: 0.1030 Steps: 75600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014840, Sample Num: 237440, Cur Loss: 0.00000002, Cur Avg Loss: 0.00218723, Log Avg loss: 0.00624434, Global Avg Loss: 0.00906671, Time: 0.0760 Steps: 75800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015040, Sample Num: 240640, Cur Loss: 0.00000961, Cur Avg Loss: 0.00217722, Log Avg loss: 0.00143453, Global Avg Loss: 0.00904663, Time: 0.1472 Steps: 76000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015240, Sample Num: 243840, Cur Loss: 0.00000113, Cur Avg Loss: 0.00214869, Log Avg loss: 0.00000317, Global Avg Loss: 0.00902289, Time: 0.0704 Steps: 76200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015440, Sample Num: 247040, Cur Loss: 0.00000024, Cur Avg Loss: 0.00214051, Log Avg loss: 0.00151689, Global Avg Loss: 0.00900324, Time: 0.1083 Steps: 76400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 015640, Sample Num: 250240, Cur Loss: 0.00000005, Cur Avg Loss: 0.00211339, Log Avg loss: 0.00002032, Global Avg Loss: 0.00897979, Time: 0.0566 Steps: 76600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 015840, Sample Num: 253440, Cur Loss: 0.00000002, Cur Avg Loss: 0.00208817, Log Avg loss: 0.00011543, Global Avg Loss: 0.00895671, Time: 0.0796 Steps: 76800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016040, Sample Num: 256640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00209471, Log Avg loss: 0.00261278, Global Avg Loss: 0.00894023, Time: 0.0455 Steps: 77000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016240, Sample Num: 259840, Cur Loss: 0.00000004, Cur Avg Loss: 0.00206899, Log Avg loss: 0.00000606, Global Avg Loss: 0.00891708, Time: 0.0683 Steps: 77200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016440, Sample Num: 263040, Cur Loss: 0.00000006, Cur Avg Loss: 0.00205918, Log Avg loss: 0.00126321, Global Avg Loss: 0.00889731, Time: 0.0625 Steps: 77400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016640, Sample Num: 266240, Cur Loss: 0.00000003, Cur Avg Loss: 0.00203454, Log Avg loss: 0.00000904, Global Avg Loss: 0.00887440, Time: 0.0758 Steps: 77600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016840, Sample Num: 269440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00201262, Log Avg loss: 0.00018830, Global Avg Loss: 0.00885207, Time: 0.0922 Steps: 77800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017040, Sample Num: 272640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00198906, Log Avg loss: 0.00000594, Global Avg Loss: 0.00882939, Time: 0.0449 Steps: 78000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017240, Sample Num: 275840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00206986, Log Avg loss: 0.00895347, Global Avg Loss: 0.00882970, Time: 0.1049 Steps: 78200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017440, Sample Num: 279040, Cur Loss: 0.00000001, Cur Avg Loss: 0.00204730, Log Avg loss: 0.00010325, Global Avg Loss: 0.00880744, Time: 0.0434 Steps: 78400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 017640, Sample Num: 282240, Cur Loss: 0.00000063, Cur Avg Loss: 0.00202431, Log Avg loss: 0.00001898, Global Avg Loss: 0.00878508, Time: 0.0479 Steps: 78600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 017840, Sample Num: 285440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00200342, Log Avg loss: 0.00016134, Global Avg Loss: 0.00876319, Time: 0.1090 Steps: 78800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018040, Sample Num: 288640, Cur Loss: 0.00000013, Cur Avg Loss: 0.00198131, Log Avg loss: 0.00000925, Global Avg Loss: 0.00874103, Time: 0.0508 Steps: 79000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018240, Sample Num: 291840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00196255, Log Avg loss: 0.00027002, Global Avg Loss: 0.00871964, Time: 0.0813 Steps: 79200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018440, Sample Num: 295040, Cur Loss: 0.00000006, Cur Avg Loss: 0.00194842, Log Avg loss: 0.00065991, Global Avg Loss: 0.00869934, Time: 0.1867 Steps: 79400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018640, Sample Num: 298240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00192759, Log Avg loss: 0.00000660, Global Avg Loss: 0.00867750, Time: 0.1694 Steps: 79600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018840, Sample Num: 301440, Cur Loss: 0.00000018, Cur Avg Loss: 0.00192479, Log Avg loss: 0.00166440, Global Avg Loss: 0.00865992, Time: 0.0655 Steps: 79800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019040, Sample Num: 304640, Cur Loss: 0.00000004, Cur Avg Loss: 0.00191235, Log Avg loss: 0.00074008, Global Avg Loss: 0.00864012, Time: 0.1133 Steps: 80000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019240, Sample Num: 307840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00194093, Log Avg loss: 0.00466204, Global Avg Loss: 0.00863020, Time: 0.0995 Steps: 80200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019440, Sample Num: 311040, Cur Loss: 0.00000323, Cur Avg Loss: 0.00192284, Log Avg loss: 0.00018242, Global Avg Loss: 0.00860918, Time: 0.1596 Steps: 80400, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 019640, Sample Num: 314240, Cur Loss: 0.00000685, Cur Avg Loss: 0.00190331, Log Avg loss: 0.00000479, Global Avg Loss: 0.00858783, Time: 0.1176 Steps: 80600, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 019840, Sample Num: 317440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00188414, Log Avg loss: 0.00000195, Global Avg Loss: 0.00856658, Time: 0.0765 Steps: 80800, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 020040, Sample Num: 320640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00186556, Log Avg loss: 0.00002200, Global Avg Loss: 0.00854548, Time: 0.0309 Steps: 81000, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 020240, Sample Num: 323840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00189745, Log Avg loss: 0.00509351, Global Avg Loss: 0.00853698, Time: 0.1595 Steps: 81200, Updated lr: 0.000060 ***** Running evaluation checkpoint-81280 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-81280 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1912.762583, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.015118, "eval_total_loss": 40.969204, "eval_acc": 0.997901, "eval_prec": 0.997928, "eval_recall": 0.997882, "eval_f1": 0.997905, "eval_roc_auc": 0.999787, "eval_pr_auc": 0.999607, "eval_confusion_matrix": {"tn": 21582, "fp": 45, "fn": 46, "tp": 21676}, "eval_mcc2": 0.995801, "eval_mcc": 0.995801, "eval_sn": 0.997882, "eval_sp": 0.997919, "update_flag": true, "test_avg_loss": 0.013253, "test_total_loss": 53.860972, "test_acc": 0.998078, "test_prec": 0.99797, "test_recall": 0.998185, "test_f1": 0.998077, "test_roc_auc": 0.999924, "test_pr_auc": 0.999887, "test_confusion_matrix": {"tn": 32451, "fp": 66, "fn": 59, "tp": 32446}, "test_mcc2": 0.996155, "test_mcc": 0.996155, "test_sn": 0.998185, "test_sp": 0.99797, "lr": 6.005911330049261e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.008534611622543486, "train_cur_epoch_loss": 38.8947502794291, "train_cur_epoch_avg_loss": 0.0019141117263498572, "train_cur_epoch_time": 1912.7625832557678, "train_cur_epoch_avg_time": 0.09413201689250826, "epoch": 4, "step": 81280} ################################################## Training, Epoch: 0005, Batch: 000120, Sample Num: 1920, Cur Loss: 0.00000034, Cur Avg Loss: 0.00002224, Log Avg loss: 0.00246490, Global Avg Loss: 0.00852206, Time: 0.1443 Steps: 81400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000320, Sample Num: 5120, Cur Loss: 0.00003309, Cur Avg Loss: 0.00001156, Log Avg loss: 0.00000515, Global Avg Loss: 0.00850119, Time: 0.1489 Steps: 81600, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00005338, Cur Avg Loss: 0.00000884, Log Avg loss: 0.00000448, Global Avg Loss: 0.00848041, Time: 0.1732 Steps: 81800, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000720, Sample Num: 11520, Cur Loss: 0.00000007, Cur Avg Loss: 0.00000934, Log Avg loss: 0.00001064, Global Avg Loss: 0.00845976, Time: 0.0304 Steps: 82000, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000920, Sample Num: 14720, Cur Loss: 0.00000095, Cur Avg Loss: 0.00001304, Log Avg loss: 0.00002639, Global Avg Loss: 0.00843924, Time: 0.0990 Steps: 82200, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 001120, Sample Num: 17920, Cur Loss: 0.00000318, Cur Avg Loss: 0.00001204, Log Avg loss: 0.00000744, Global Avg Loss: 0.00841877, Time: 0.0360 Steps: 82400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 001320, Sample Num: 21120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001049, Log Avg loss: 0.00000181, Global Avg Loss: 0.00839839, Time: 0.0280 Steps: 82600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000939, Log Avg loss: 0.00000212, Global Avg Loss: 0.00837811, Time: 0.1271 Steps: 82800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001720, Sample Num: 27520, Cur Loss: 0.00000060, Cur Avg Loss: 0.00000835, Log Avg loss: 0.00000042, Global Avg Loss: 0.00835792, Time: 0.1254 Steps: 83000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000756, Log Avg loss: 0.00000076, Global Avg Loss: 0.00833783, Time: 0.0270 Steps: 83200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002120, Sample Num: 33920, Cur Loss: 0.00000004, Cur Avg Loss: 0.00065679, Log Avg loss: 0.00688936, Global Avg Loss: 0.00833436, Time: 0.0383 Steps: 83400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00060075, Log Avg loss: 0.00000682, Global Avg Loss: 0.00831444, Time: 0.1419 Steps: 83600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002520, Sample Num: 40320, Cur Loss: 0.01037023, Cur Avg Loss: 0.00055720, Log Avg loss: 0.00005203, Global Avg Loss: 0.00829472, Time: 0.1052 Steps: 83800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106570, Log Avg loss: 0.00747281, Global Avg Loss: 0.00829276, Time: 0.0578 Steps: 84000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002920, Sample Num: 46720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140414, Log Avg loss: 0.00600685, Global Avg Loss: 0.00828733, Time: 0.1247 Steps: 84200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 003120, Sample Num: 49920, Cur Loss: 0.00000328, Cur Avg Loss: 0.00154927, Log Avg loss: 0.00366811, Global Avg Loss: 0.00827639, Time: 0.1033 Steps: 84400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 003320, Sample Num: 53120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162407, Log Avg loss: 0.00279099, Global Avg Loss: 0.00826342, Time: 0.0507 Steps: 84600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153337, Log Avg loss: 0.00002785, Global Avg Loss: 0.00824399, Time: 0.1916 Steps: 84800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003720, Sample Num: 59520, Cur Loss: 0.00000060, Cur Avg Loss: 0.00169911, Log Avg loss: 0.00461599, Global Avg Loss: 0.00823546, Time: 0.0829 Steps: 85000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003920, Sample Num: 62720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00168204, Log Avg loss: 0.00136455, Global Avg Loss: 0.00821933, Time: 0.0968 Steps: 85200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160340, Log Avg loss: 0.00006215, Global Avg Loss: 0.00820023, Time: 0.0988 Steps: 85400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004320, Sample Num: 69120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153782, Log Avg loss: 0.00018676, Global Avg Loss: 0.00818150, Time: 0.1780 Steps: 85600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00187064, Log Avg loss: 0.00905963, Global Avg Loss: 0.00818355, Time: 0.0487 Steps: 85800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004720, Sample Num: 75520, Cur Loss: 0.00000104, Cur Avg Loss: 0.00200817, Log Avg loss: 0.00511629, Global Avg Loss: 0.00817642, Time: 0.0254 Steps: 86000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004920, Sample Num: 78720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00193201, Log Avg loss: 0.00013466, Global Avg Loss: 0.00815776, Time: 0.1260 Steps: 86200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 005120, Sample Num: 81920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00208482, Log Avg loss: 0.00584384, Global Avg Loss: 0.00815240, Time: 0.1045 Steps: 86400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 005320, Sample Num: 85120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00200671, Log Avg loss: 0.00000727, Global Avg Loss: 0.00813359, Time: 0.1050 Steps: 86600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005520, Sample Num: 88320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00193405, Log Avg loss: 0.00000119, Global Avg Loss: 0.00811485, Time: 0.0980 Steps: 86800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005720, Sample Num: 91520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00208284, Log Avg loss: 0.00618947, Global Avg Loss: 0.00811043, Time: 0.1135 Steps: 87000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005920, Sample Num: 94720, Cur Loss: 0.00000004, Cur Avg Loss: 0.00234971, Log Avg loss: 0.00998209, Global Avg Loss: 0.00811472, Time: 0.0412 Steps: 87200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006120, Sample Num: 97920, Cur Loss: 0.00000060, Cur Avg Loss: 0.00242786, Log Avg loss: 0.00474111, Global Avg Loss: 0.00810700, Time: 0.0858 Steps: 87400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006320, Sample Num: 101120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00235138, Log Avg loss: 0.00001110, Global Avg Loss: 0.00808852, Time: 0.1376 Steps: 87600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006520, Sample Num: 104320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00228069, Log Avg loss: 0.00004696, Global Avg Loss: 0.00807020, Time: 0.1423 Steps: 87800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006720, Sample Num: 107520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00221316, Log Avg loss: 0.00001165, Global Avg Loss: 0.00805188, Time: 0.2461 Steps: 88000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006920, Sample Num: 110720, Cur Loss: 0.00000001, Cur Avg Loss: 0.00214941, Log Avg loss: 0.00000753, Global Avg Loss: 0.00803364, Time: 0.1795 Steps: 88200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 007120, Sample Num: 113920, Cur Loss: 0.00000001, Cur Avg Loss: 0.00208922, Log Avg loss: 0.00000677, Global Avg Loss: 0.00801548, Time: 0.1178 Steps: 88400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 007320, Sample Num: 117120, Cur Loss: 0.00000001, Cur Avg Loss: 0.00207830, Log Avg loss: 0.00168943, Global Avg Loss: 0.00800120, Time: 0.0392 Steps: 88600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007520, Sample Num: 120320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00212623, Log Avg loss: 0.00388058, Global Avg Loss: 0.00799192, Time: 0.0478 Steps: 88800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007720, Sample Num: 123520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00215497, Log Avg loss: 0.00323544, Global Avg Loss: 0.00798123, Time: 0.1295 Steps: 89000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007920, Sample Num: 126720, Cur Loss: 0.00000001, Cur Avg Loss: 0.00239213, Log Avg loss: 0.01154648, Global Avg Loss: 0.00798923, Time: 0.0688 Steps: 89200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008120, Sample Num: 129920, Cur Loss: 0.00000876, Cur Avg Loss: 0.00239219, Log Avg loss: 0.00239446, Global Avg Loss: 0.00797671, Time: 0.1283 Steps: 89400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008320, Sample Num: 133120, Cur Loss: 0.00011119, Cur Avg Loss: 0.00235403, Log Avg loss: 0.00080466, Global Avg Loss: 0.00796070, Time: 0.2905 Steps: 89600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008520, Sample Num: 136320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00229958, Log Avg loss: 0.00003465, Global Avg Loss: 0.00794305, Time: 0.0821 Steps: 89800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008720, Sample Num: 139520, Cur Loss: 0.00000563, Cur Avg Loss: 0.00224715, Log Avg loss: 0.00001344, Global Avg Loss: 0.00792543, Time: 0.0518 Steps: 90000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008920, Sample Num: 142720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00219711, Log Avg loss: 0.00001561, Global Avg Loss: 0.00790789, Time: 0.2231 Steps: 90200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 009120, Sample Num: 145920, Cur Loss: 0.00000003, Cur Avg Loss: 0.00221081, Log Avg loss: 0.00282182, Global Avg Loss: 0.00789664, Time: 0.0565 Steps: 90400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 009320, Sample Num: 149120, Cur Loss: 0.00000001, Cur Avg Loss: 0.00216342, Log Avg loss: 0.00000224, Global Avg Loss: 0.00787921, Time: 0.1009 Steps: 90600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009520, Sample Num: 152320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00220391, Log Avg loss: 0.00409091, Global Avg Loss: 0.00787086, Time: 0.2100 Steps: 90800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009720, Sample Num: 155520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00215907, Log Avg loss: 0.00002466, Global Avg Loss: 0.00785362, Time: 0.1259 Steps: 91000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009920, Sample Num: 158720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00211554, Log Avg loss: 0.00000021, Global Avg Loss: 0.00783640, Time: 0.1759 Steps: 91200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010120, Sample Num: 161920, Cur Loss: 0.00000002, Cur Avg Loss: 0.00207374, Log Avg loss: 0.00000043, Global Avg Loss: 0.00781925, Time: 0.1585 Steps: 91400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010320, Sample Num: 165120, Cur Loss: 0.00000018, Cur Avg Loss: 0.00203357, Log Avg loss: 0.00000086, Global Avg Loss: 0.00780218, Time: 0.1725 Steps: 91600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010520, Sample Num: 168320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00208659, Log Avg loss: 0.00482235, Global Avg Loss: 0.00779569, Time: 0.0617 Steps: 91800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010720, Sample Num: 171520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00204770, Log Avg loss: 0.00000222, Global Avg Loss: 0.00777875, Time: 0.1035 Steps: 92000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010920, Sample Num: 174720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00201020, Log Avg loss: 0.00000013, Global Avg Loss: 0.00776187, Time: 0.0435 Steps: 92200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 011120, Sample Num: 177920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00219451, Log Avg loss: 0.01225761, Global Avg Loss: 0.00777160, Time: 0.0477 Steps: 92400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 011320, Sample Num: 181120, Cur Loss: 0.00003524, Cur Avg Loss: 0.00223543, Log Avg loss: 0.00451091, Global Avg Loss: 0.00776456, Time: 0.0532 Steps: 92600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011520, Sample Num: 184320, Cur Loss: 0.00000048, Cur Avg Loss: 0.00226126, Log Avg loss: 0.00372334, Global Avg Loss: 0.00775585, Time: 0.0994 Steps: 92800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011720, Sample Num: 187520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00222289, Log Avg loss: 0.00001252, Global Avg Loss: 0.00773920, Time: 0.0652 Steps: 93000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011920, Sample Num: 190720, Cur Loss: 0.00000008, Cur Avg Loss: 0.00218567, Log Avg loss: 0.00000461, Global Avg Loss: 0.00772260, Time: 0.0725 Steps: 93200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012120, Sample Num: 193920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00214964, Log Avg loss: 0.00000240, Global Avg Loss: 0.00770607, Time: 0.0423 Steps: 93400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012320, Sample Num: 197120, Cur Loss: 0.01028511, Cur Avg Loss: 0.00211578, Log Avg loss: 0.00006388, Global Avg Loss: 0.00768974, Time: 0.0998 Steps: 93600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012520, Sample Num: 200320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00208200, Log Avg loss: 0.00000073, Global Avg Loss: 0.00767335, Time: 0.0390 Steps: 93800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012720, Sample Num: 203520, Cur Loss: 0.00000001, Cur Avg Loss: 0.00213784, Log Avg loss: 0.00563390, Global Avg Loss: 0.00766901, Time: 0.1185 Steps: 94000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012920, Sample Num: 206720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00210482, Log Avg loss: 0.00000441, Global Avg Loss: 0.00765273, Time: 0.2209 Steps: 94200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 013120, Sample Num: 209920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00214980, Log Avg loss: 0.00505543, Global Avg Loss: 0.00764723, Time: 0.2017 Steps: 94400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 013320, Sample Num: 213120, Cur Loss: 0.00000130, Cur Avg Loss: 0.00211754, Log Avg loss: 0.00000175, Global Avg Loss: 0.00763107, Time: 0.0604 Steps: 94600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013520, Sample Num: 216320, Cur Loss: 0.00000001, Cur Avg Loss: 0.00208623, Log Avg loss: 0.00000063, Global Avg Loss: 0.00761497, Time: 0.1401 Steps: 94800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013720, Sample Num: 219520, Cur Loss: 0.00000009, Cur Avg Loss: 0.00205589, Log Avg loss: 0.00000473, Global Avg Loss: 0.00759895, Time: 0.0398 Steps: 95000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013920, Sample Num: 222720, Cur Loss: 0.00000003, Cur Avg Loss: 0.00206795, Log Avg loss: 0.00289527, Global Avg Loss: 0.00758907, Time: 0.1027 Steps: 95200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014120, Sample Num: 225920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00203870, Log Avg loss: 0.00000335, Global Avg Loss: 0.00757316, Time: 0.1208 Steps: 95400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014320, Sample Num: 229120, Cur Loss: 0.00000073, Cur Avg Loss: 0.00203853, Log Avg loss: 0.00202659, Global Avg Loss: 0.00756156, Time: 0.1026 Steps: 95600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014520, Sample Num: 232320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00201050, Log Avg loss: 0.00000324, Global Avg Loss: 0.00754578, Time: 0.2150 Steps: 95800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014720, Sample Num: 235520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00200793, Log Avg loss: 0.00182104, Global Avg Loss: 0.00753385, Time: 0.1714 Steps: 96000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014920, Sample Num: 238720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00198141, Log Avg loss: 0.00002981, Global Avg Loss: 0.00751825, Time: 0.0362 Steps: 96200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015120, Sample Num: 241920, Cur Loss: 0.00000098, Cur Avg Loss: 0.00195556, Log Avg loss: 0.00002688, Global Avg Loss: 0.00750271, Time: 0.2229 Steps: 96400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015320, Sample Num: 245120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00193004, Log Avg loss: 0.00000136, Global Avg Loss: 0.00748718, Time: 0.0309 Steps: 96600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015520, Sample Num: 248320, Cur Loss: 0.00000256, Cur Avg Loss: 0.00191891, Log Avg loss: 0.00106564, Global Avg Loss: 0.00747391, Time: 0.0473 Steps: 96800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 015720, Sample Num: 251520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00189450, Log Avg loss: 0.00000101, Global Avg Loss: 0.00745850, Time: 0.0379 Steps: 97000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 015920, Sample Num: 254720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00187073, Log Avg loss: 0.00000232, Global Avg Loss: 0.00744316, Time: 0.1531 Steps: 97200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016120, Sample Num: 257920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00185004, Log Avg loss: 0.00020286, Global Avg Loss: 0.00742829, Time: 0.0447 Steps: 97400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016320, Sample Num: 261120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00182738, Log Avg loss: 0.00000069, Global Avg Loss: 0.00741307, Time: 0.2227 Steps: 97600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016520, Sample Num: 264320, Cur Loss: 0.00000001, Cur Avg Loss: 0.00180571, Log Avg loss: 0.00003737, Global Avg Loss: 0.00739799, Time: 0.1040 Steps: 97800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016720, Sample Num: 267520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00178756, Log Avg loss: 0.00028848, Global Avg Loss: 0.00738348, Time: 0.0950 Steps: 98000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016920, Sample Num: 270720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00176647, Log Avg loss: 0.00000350, Global Avg Loss: 0.00736845, Time: 0.1494 Steps: 98200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017120, Sample Num: 273920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00182084, Log Avg loss: 0.00642035, Global Avg Loss: 0.00736652, Time: 0.0458 Steps: 98400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017320, Sample Num: 277120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00184411, Log Avg loss: 0.00383628, Global Avg Loss: 0.00735936, Time: 0.0587 Steps: 98600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017520, Sample Num: 280320, Cur Loss: 0.00000007, Cur Avg Loss: 0.00182364, Log Avg loss: 0.00005074, Global Avg Loss: 0.00734457, Time: 0.1244 Steps: 98800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 017720, Sample Num: 283520, Cur Loss: 0.00000002, Cur Avg Loss: 0.00180318, Log Avg loss: 0.00001077, Global Avg Loss: 0.00732975, Time: 0.0782 Steps: 99000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 017920, Sample Num: 286720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00178311, Log Avg loss: 0.00000517, Global Avg Loss: 0.00731499, Time: 0.1818 Steps: 99200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018120, Sample Num: 289920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00176344, Log Avg loss: 0.00000088, Global Avg Loss: 0.00730027, Time: 0.0361 Steps: 99400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018320, Sample Num: 293120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00174427, Log Avg loss: 0.00000756, Global Avg Loss: 0.00728562, Time: 0.1102 Steps: 99600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018520, Sample Num: 296320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00172564, Log Avg loss: 0.00001928, Global Avg Loss: 0.00727106, Time: 0.0470 Steps: 99800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018720, Sample Num: 299520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170721, Log Avg loss: 0.00000010, Global Avg Loss: 0.00725652, Time: 0.0528 Steps: 100000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018920, Sample Num: 302720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170424, Log Avg loss: 0.00142656, Global Avg Loss: 0.00724488, Time: 0.1021 Steps: 100200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019120, Sample Num: 305920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00168653, Log Avg loss: 0.00001151, Global Avg Loss: 0.00723048, Time: 0.1221 Steps: 100400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019320, Sample Num: 309120, Cur Loss: 0.00000590, Cur Avg Loss: 0.00167135, Log Avg loss: 0.00021999, Global Avg Loss: 0.00721654, Time: 0.0584 Steps: 100600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019520, Sample Num: 312320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165423, Log Avg loss: 0.00000058, Global Avg Loss: 0.00720222, Time: 0.0469 Steps: 100800, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 019720, Sample Num: 315520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00163746, Log Avg loss: 0.00000052, Global Avg Loss: 0.00718796, Time: 0.1396 Steps: 101000, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 019920, Sample Num: 318720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162103, Log Avg loss: 0.00000070, Global Avg Loss: 0.00717376, Time: 0.1586 Steps: 101200, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 020120, Sample Num: 321920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160492, Log Avg loss: 0.00000026, Global Avg Loss: 0.00715961, Time: 0.0301 Steps: 101400, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 020320, Sample Num: 325113, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165965, Log Avg loss: 0.00716571, Global Avg Loss: 0.00715962, Time: 0.0650 Steps: 101600, Updated lr: 0.000050 ***** Running evaluation checkpoint-101600 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-101600 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1838.653873, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.017001, "eval_total_loss": 46.071856, "eval_acc": 0.998131, "eval_prec": 0.997975, "eval_recall": 0.998297, "eval_f1": 0.998136, "eval_roc_auc": 0.999759, "eval_pr_auc": 0.999543, "eval_confusion_matrix": {"tn": 21583, "fp": 44, "fn": 37, "tp": 21685}, "eval_mcc2": 0.996263, "eval_mcc": 0.996263, "eval_sn": 0.998297, "eval_sp": 0.997966, "update_flag": true, "test_avg_loss": 0.016787, "test_total_loss": 68.22411, "test_acc": 0.998031, "test_prec": 0.997817, "test_recall": 0.998246, "test_f1": 0.998031, "test_roc_auc": 0.999808, "test_pr_auc": 0.999647, "test_confusion_matrix": {"tn": 32446, "fp": 71, "fn": 57, "tp": 32448}, "test_mcc2": 0.996063, "test_mcc": 0.996063, "test_sn": 0.998246, "test_sp": 0.997817, "lr": 5.0049261083743846e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.007159618863640025, "train_cur_epoch_loss": 33.724043865474655, "train_cur_epoch_avg_loss": 0.0016596478280253275, "train_cur_epoch_time": 1838.6538729667664, "train_cur_epoch_avg_time": 0.09048493469324638, "epoch": 5, "step": 101600} ################################################## Training, Epoch: 0006, Batch: 000200, Sample Num: 3200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000985, Log Avg loss: 0.00000985, Global Avg Loss: 0.00714557, Time: 0.2355 Steps: 101800, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000400, Sample Num: 6400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000513, Log Avg loss: 0.00000042, Global Avg Loss: 0.00713156, Time: 0.0371 Steps: 102000, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000600, Sample Num: 9600, Cur Loss: 0.00000016, Cur Avg Loss: 0.00000349, Log Avg loss: 0.00000021, Global Avg Loss: 0.00711761, Time: 0.0901 Steps: 102200, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000800, Sample Num: 12800, Cur Loss: 0.00000026, Cur Avg Loss: 0.00000292, Log Avg loss: 0.00000121, Global Avg Loss: 0.00710371, Time: 0.1023 Steps: 102400, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 001000, Sample Num: 16000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000243, Log Avg loss: 0.00000047, Global Avg Loss: 0.00708986, Time: 0.0432 Steps: 102600, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 001200, Sample Num: 19200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000283, Log Avg loss: 0.00000485, Global Avg Loss: 0.00707608, Time: 0.2356 Steps: 102800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001400, Sample Num: 22400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000250, Log Avg loss: 0.00000048, Global Avg Loss: 0.00706234, Time: 0.1017 Steps: 103000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000229, Log Avg loss: 0.00000085, Global Avg Loss: 0.00704865, Time: 0.0989 Steps: 103200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001800, Sample Num: 28800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000204, Log Avg loss: 0.00000004, Global Avg Loss: 0.00703502, Time: 0.0904 Steps: 103400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00041257, Log Avg loss: 0.00410732, Global Avg Loss: 0.00702937, Time: 0.0324 Steps: 103600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002200, Sample Num: 35200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00049683, Log Avg loss: 0.00133939, Global Avg Loss: 0.00701840, Time: 0.0587 Steps: 103800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002400, Sample Num: 38400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00045547, Log Avg loss: 0.00000050, Global Avg Loss: 0.00700491, Time: 0.0637 Steps: 104000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00054917, Log Avg loss: 0.00167367, Global Avg Loss: 0.00699467, Time: 0.2275 Steps: 104200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102441, Log Avg loss: 0.00720245, Global Avg Loss: 0.00699507, Time: 0.0432 Steps: 104400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00123773, Log Avg loss: 0.00422428, Global Avg Loss: 0.00698978, Time: 0.0430 Steps: 104600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00116064, Log Avg loss: 0.00000434, Global Avg Loss: 0.00697644, Time: 0.1759 Steps: 104800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003400, Sample Num: 54400, Cur Loss: 0.00000048, Cur Avg Loss: 0.00128647, Log Avg loss: 0.00329967, Global Avg Loss: 0.00696944, Time: 0.0478 Steps: 105000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003600, Sample Num: 57600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126912, Log Avg loss: 0.00097423, Global Avg Loss: 0.00695804, Time: 0.0603 Steps: 105200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003800, Sample Num: 60800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00128120, Log Avg loss: 0.00149866, Global Avg Loss: 0.00694768, Time: 0.0467 Steps: 105400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121799, Log Avg loss: 0.00001691, Global Avg Loss: 0.00693456, Time: 0.0492 Steps: 105600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004200, Sample Num: 67200, Cur Loss: 0.00000006, Cur Avg Loss: 0.00116003, Log Avg loss: 0.00000093, Global Avg Loss: 0.00692145, Time: 0.1516 Steps: 105800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137113, Log Avg loss: 0.00580406, Global Avg Loss: 0.00691934, Time: 0.0407 Steps: 106000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00000137, Cur Avg Loss: 0.00178930, Log Avg loss: 0.01098917, Global Avg Loss: 0.00692701, Time: 0.0556 Steps: 106200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004800, Sample Num: 76800, Cur Loss: 0.00000024, Cur Avg Loss: 0.00171482, Log Avg loss: 0.00000181, Global Avg Loss: 0.00691399, Time: 0.2021 Steps: 106400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 005000, Sample Num: 80000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164632, Log Avg loss: 0.00000224, Global Avg Loss: 0.00690102, Time: 0.0448 Steps: 106600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158623, Log Avg loss: 0.00008398, Global Avg Loss: 0.00688826, Time: 0.1596 Steps: 106800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00000005, Cur Avg Loss: 0.00153776, Log Avg loss: 0.00027753, Global Avg Loss: 0.00687590, Time: 0.1365 Steps: 107000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005600, Sample Num: 89600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148927, Log Avg loss: 0.00018008, Global Avg Loss: 0.00686341, Time: 0.1791 Steps: 107200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005800, Sample Num: 92800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167529, Log Avg loss: 0.00688390, Global Avg Loss: 0.00686344, Time: 0.1101 Steps: 107400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006000, Sample Num: 96000, Cur Loss: 0.00000058, Cur Avg Loss: 0.00172918, Log Avg loss: 0.00329201, Global Avg Loss: 0.00685681, Time: 0.0631 Steps: 107600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006200, Sample Num: 99200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00178827, Log Avg loss: 0.00356098, Global Avg Loss: 0.00685069, Time: 0.1496 Steps: 107800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00000010, Cur Avg Loss: 0.00173339, Log Avg loss: 0.00003207, Global Avg Loss: 0.00683806, Time: 0.1152 Steps: 108000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006600, Sample Num: 105600, Cur Loss: 0.00000001, Cur Avg Loss: 0.00168087, Log Avg loss: 0.00000032, Global Avg Loss: 0.00682543, Time: 0.1812 Steps: 108200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006800, Sample Num: 108800, Cur Loss: 0.00000007, Cur Avg Loss: 0.00163145, Log Avg loss: 0.00000036, Global Avg Loss: 0.00681283, Time: 0.0662 Steps: 108400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158486, Log Avg loss: 0.00000103, Global Avg Loss: 0.00680029, Time: 0.1223 Steps: 108600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007200, Sample Num: 115200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154089, Log Avg loss: 0.00000171, Global Avg Loss: 0.00678779, Time: 0.0830 Steps: 108800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00001013, Cur Avg Loss: 0.00150908, Log Avg loss: 0.00036389, Global Avg Loss: 0.00677600, Time: 0.2236 Steps: 109000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00000095, Cur Avg Loss: 0.00160034, Log Avg loss: 0.00497692, Global Avg Loss: 0.00677271, Time: 0.1187 Steps: 109200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 007800, Sample Num: 124800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00174690, Log Avg loss: 0.00731644, Global Avg Loss: 0.00677370, Time: 0.0967 Steps: 109400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00000023, Cur Avg Loss: 0.00195467, Log Avg loss: 0.01005765, Global Avg Loss: 0.00677970, Time: 0.4189 Steps: 109600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00000411, Cur Avg Loss: 0.00190722, Log Avg loss: 0.00000910, Global Avg Loss: 0.00676736, Time: 0.1538 Steps: 109800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008400, Sample Num: 134400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00186186, Log Avg loss: 0.00000226, Global Avg Loss: 0.00675506, Time: 0.1002 Steps: 110000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008600, Sample Num: 137600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00186779, Log Avg loss: 0.00211671, Global Avg Loss: 0.00674664, Time: 0.1124 Steps: 110200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00182617, Log Avg loss: 0.00003651, Global Avg Loss: 0.00673449, Time: 0.1005 Steps: 110400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00000006, Cur Avg Loss: 0.00189392, Log Avg loss: 0.00487491, Global Avg Loss: 0.00673113, Time: 0.0577 Steps: 110600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00000012, Cur Avg Loss: 0.00186010, Log Avg loss: 0.00033816, Global Avg Loss: 0.00671959, Time: 0.0613 Steps: 110800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00182053, Log Avg loss: 0.00000051, Global Avg Loss: 0.00670748, Time: 0.1669 Steps: 111000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00000601, Cur Avg Loss: 0.00187847, Log Avg loss: 0.00460174, Global Avg Loss: 0.00670369, Time: 0.1080 Steps: 111200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 009800, Sample Num: 156800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00184045, Log Avg loss: 0.00001510, Global Avg Loss: 0.00669168, Time: 0.1203 Steps: 111400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00180367, Log Avg loss: 0.00000144, Global Avg Loss: 0.00667969, Time: 0.1205 Steps: 111600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010200, Sample Num: 163200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00176830, Log Avg loss: 0.00000014, Global Avg Loss: 0.00666775, Time: 0.1863 Steps: 111800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010400, Sample Num: 166400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00173431, Log Avg loss: 0.00000052, Global Avg Loss: 0.00665584, Time: 0.0963 Steps: 112000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00177278, Log Avg loss: 0.00377332, Global Avg Loss: 0.00665070, Time: 0.0983 Steps: 112200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00173996, Log Avg loss: 0.00000039, Global Avg Loss: 0.00663887, Time: 0.1070 Steps: 112400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00000244, Cur Avg Loss: 0.00170902, Log Avg loss: 0.00003865, Global Avg Loss: 0.00662715, Time: 0.1566 Steps: 112600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00186726, Log Avg loss: 0.01057020, Global Avg Loss: 0.00663414, Time: 0.2755 Steps: 112800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00188151, Log Avg loss: 0.00267933, Global Avg Loss: 0.00662714, Time: 0.0392 Steps: 113000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00190051, Log Avg loss: 0.00298404, Global Avg Loss: 0.00662070, Time: 0.0414 Steps: 113200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 011800, Sample Num: 188800, Cur Loss: 0.00000006, Cur Avg Loss: 0.00186831, Log Avg loss: 0.00000051, Global Avg Loss: 0.00660902, Time: 0.0428 Steps: 113400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012000, Sample Num: 192000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00183721, Log Avg loss: 0.00000236, Global Avg Loss: 0.00659739, Time: 0.0490 Steps: 113600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012200, Sample Num: 195200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00180710, Log Avg loss: 0.00000036, Global Avg Loss: 0.00658580, Time: 0.0703 Steps: 113800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012400, Sample Num: 198400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00177796, Log Avg loss: 0.00000014, Global Avg Loss: 0.00657424, Time: 0.0435 Steps: 114000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012600, Sample Num: 201600, Cur Loss: 0.00000119, Cur Avg Loss: 0.00175004, Log Avg loss: 0.00001956, Global Avg Loss: 0.00656277, Time: 0.1011 Steps: 114200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012800, Sample Num: 204800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00177261, Log Avg loss: 0.00319435, Global Avg Loss: 0.00655688, Time: 0.0756 Steps: 114400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013000, Sample Num: 208000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00174535, Log Avg loss: 0.00000049, Global Avg Loss: 0.00654543, Time: 0.0587 Steps: 114600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013200, Sample Num: 211200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00178792, Log Avg loss: 0.00455504, Global Avg Loss: 0.00654197, Time: 0.0437 Steps: 114800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013400, Sample Num: 214400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00176124, Log Avg loss: 0.00000018, Global Avg Loss: 0.00653059, Time: 0.1013 Steps: 115000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 013600, Sample Num: 217600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00173534, Log Avg loss: 0.00000032, Global Avg Loss: 0.00651925, Time: 0.1090 Steps: 115200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 013800, Sample Num: 220800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00171019, Log Avg loss: 0.00000007, Global Avg Loss: 0.00650795, Time: 0.0810 Steps: 115400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014000, Sample Num: 224000, Cur Loss: 0.00000001, Cur Avg Loss: 0.00172074, Log Avg loss: 0.00244825, Global Avg Loss: 0.00650093, Time: 0.1155 Steps: 115600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014200, Sample Num: 227200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00169651, Log Avg loss: 0.00000038, Global Avg Loss: 0.00648970, Time: 0.0624 Steps: 115800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014400, Sample Num: 230400, Cur Loss: 0.00000012, Cur Avg Loss: 0.00167371, Log Avg loss: 0.00005503, Global Avg Loss: 0.00647861, Time: 0.0557 Steps: 116000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014600, Sample Num: 233600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165079, Log Avg loss: 0.00000091, Global Avg Loss: 0.00646746, Time: 0.1582 Steps: 116200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014800, Sample Num: 236800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166694, Log Avg loss: 0.00284570, Global Avg Loss: 0.00646124, Time: 0.0837 Steps: 116400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015000, Sample Num: 240000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167324, Log Avg loss: 0.00213959, Global Avg Loss: 0.00645382, Time: 0.3043 Steps: 116600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015200, Sample Num: 243200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165123, Log Avg loss: 0.00000055, Global Avg Loss: 0.00644277, Time: 0.4301 Steps: 116800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015400, Sample Num: 246400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162985, Log Avg loss: 0.00000451, Global Avg Loss: 0.00643177, Time: 0.1091 Steps: 117000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 015600, Sample Num: 249600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160897, Log Avg loss: 0.00000154, Global Avg Loss: 0.00642080, Time: 0.2232 Steps: 117200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 015800, Sample Num: 252800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158861, Log Avg loss: 0.00000026, Global Avg Loss: 0.00640986, Time: 0.0357 Steps: 117400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016000, Sample Num: 256000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00156899, Log Avg loss: 0.00001955, Global Avg Loss: 0.00639899, Time: 0.1154 Steps: 117600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016200, Sample Num: 259200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154962, Log Avg loss: 0.00000003, Global Avg Loss: 0.00638813, Time: 0.0525 Steps: 117800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016400, Sample Num: 262400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153073, Log Avg loss: 0.00000036, Global Avg Loss: 0.00637730, Time: 0.1597 Steps: 118000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016600, Sample Num: 265600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00151232, Log Avg loss: 0.00000240, Global Avg Loss: 0.00636651, Time: 0.0430 Steps: 118200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016800, Sample Num: 268800, Cur Loss: 0.00000024, Cur Avg Loss: 0.00149596, Log Avg loss: 0.00013814, Global Avg Loss: 0.00635599, Time: 0.1206 Steps: 118400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017000, Sample Num: 272000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147836, Log Avg loss: 0.00000007, Global Avg Loss: 0.00634527, Time: 0.0378 Steps: 118600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017200, Sample Num: 275200, Cur Loss: 0.00000030, Cur Avg Loss: 0.00157728, Log Avg loss: 0.00998550, Global Avg Loss: 0.00635140, Time: 0.0302 Steps: 118800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017400, Sample Num: 278400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00155916, Log Avg loss: 0.00000098, Global Avg Loss: 0.00634073, Time: 0.0470 Steps: 119000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 017600, Sample Num: 281600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154155, Log Avg loss: 0.00000976, Global Avg Loss: 0.00633011, Time: 0.1201 Steps: 119200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 017800, Sample Num: 284800, Cur Loss: 0.00000012, Cur Avg Loss: 0.00152427, Log Avg loss: 0.00000289, Global Avg Loss: 0.00631951, Time: 0.1585 Steps: 119400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018000, Sample Num: 288000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00150734, Log Avg loss: 0.00000097, Global Avg Loss: 0.00630894, Time: 0.1138 Steps: 119600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018200, Sample Num: 291200, Cur Loss: 0.00002137, Cur Avg Loss: 0.00149078, Log Avg loss: 0.00000020, Global Avg Loss: 0.00629841, Time: 0.1095 Steps: 119800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018400, Sample Num: 294400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147458, Log Avg loss: 0.00000030, Global Avg Loss: 0.00628791, Time: 0.1161 Steps: 120000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018600, Sample Num: 297600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147911, Log Avg loss: 0.00189579, Global Avg Loss: 0.00628060, Time: 0.0656 Steps: 120200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018800, Sample Num: 300800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00149778, Log Avg loss: 0.00323402, Global Avg Loss: 0.00627554, Time: 0.0529 Steps: 120400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019000, Sample Num: 304000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148203, Log Avg loss: 0.00000199, Global Avg Loss: 0.00626514, Time: 0.0600 Steps: 120600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019200, Sample Num: 307200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146717, Log Avg loss: 0.00005541, Global Avg Loss: 0.00625486, Time: 0.2272 Steps: 120800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019400, Sample Num: 310400, Cur Loss: 0.00001249, Cur Avg Loss: 0.00145205, Log Avg loss: 0.00000008, Global Avg Loss: 0.00624452, Time: 0.0406 Steps: 121000, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 019600, Sample Num: 313600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00143723, Log Avg loss: 0.00000024, Global Avg Loss: 0.00623422, Time: 0.0679 Steps: 121200, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 019800, Sample Num: 316800, Cur Loss: 0.00000095, Cur Avg Loss: 0.00142772, Log Avg loss: 0.00049522, Global Avg Loss: 0.00622476, Time: 0.0865 Steps: 121400, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 020000, Sample Num: 320000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00141344, Log Avg loss: 0.00000003, Global Avg Loss: 0.00621452, Time: 0.0628 Steps: 121600, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 020200, Sample Num: 323200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00143961, Log Avg loss: 0.00405652, Global Avg Loss: 0.00621098, Time: 0.0647 Steps: 121800, Updated lr: 0.000040 ***** Running evaluation checkpoint-121920 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-121920 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1940.315233, Avg time per batch (s): 0.100000 {"eval_avg_loss": 0.017736, "eval_total_loss": 48.06513, "eval_acc": 0.998016, "eval_prec": 0.998158, "eval_recall": 0.997882, "eval_f1": 0.99802, "eval_roc_auc": 0.999762, "eval_pr_auc": 0.999548, "eval_confusion_matrix": {"tn": 21587, "fp": 40, "fn": 46, "tp": 21676}, "eval_mcc2": 0.996032, "eval_mcc": 0.996032, "eval_sn": 0.997882, "eval_sp": 0.99815, "update_flag": false, "test_avg_loss": 0.017436, "test_total_loss": 70.861655, "test_acc": 0.998016, "test_prec": 0.998062, "test_recall": 0.99797, "test_f1": 0.998016, "test_roc_auc": 0.999786, "test_pr_auc": 0.999595, "test_confusion_matrix": {"tn": 32454, "fp": 63, "fn": 66, "tp": 32439}, "test_mcc2": 0.996032, "test_mcc": 0.996032, "test_sn": 0.99797, "test_sp": 0.998063, "lr": 4.003940886699508e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.006209734470981164, "train_cur_epoch_loss": 29.673550156188057, "train_cur_epoch_avg_loss": 0.00146031250768642, "train_cur_epoch_time": 1940.315232515335, "train_cur_epoch_avg_time": 0.09548795435606965, "epoch": 6, "step": 121920} ################################################## Training, Epoch: 0007, Batch: 000080, Sample Num: 1280, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000390, Log Avg loss: 0.00296891, Global Avg Loss: 0.00620567, Time: 0.1761 Steps: 122000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000280, Sample Num: 4480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000591, Log Avg loss: 0.00000672, Global Avg Loss: 0.00619552, Time: 0.0588 Steps: 122200, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000480, Sample Num: 7680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000361, Log Avg loss: 0.00000039, Global Avg Loss: 0.00618540, Time: 0.0259 Steps: 122400, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000680, Sample Num: 10880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000256, Log Avg loss: 0.00000005, Global Avg Loss: 0.00617531, Time: 0.0571 Steps: 122600, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000880, Sample Num: 14080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000199, Log Avg loss: 0.00000005, Global Avg Loss: 0.00616525, Time: 0.1577 Steps: 122800, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 001080, Sample Num: 17280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000164, Log Avg loss: 0.00000009, Global Avg Loss: 0.00615522, Time: 0.0423 Steps: 123000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 001280, Sample Num: 20480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000139, Log Avg loss: 0.00000004, Global Avg Loss: 0.00614523, Time: 0.1874 Steps: 123200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001480, Sample Num: 23680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000121, Log Avg loss: 0.00000005, Global Avg Loss: 0.00613527, Time: 0.0522 Steps: 123400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001680, Sample Num: 26880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000107, Log Avg loss: 0.00000003, Global Avg Loss: 0.00612534, Time: 0.1530 Steps: 123600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001880, Sample Num: 30080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000096, Log Avg loss: 0.00000003, Global Avg Loss: 0.00611545, Time: 0.1068 Steps: 123800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002080, Sample Num: 33280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00052277, Log Avg loss: 0.00542784, Global Avg Loss: 0.00611434, Time: 0.1489 Steps: 124000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002280, Sample Num: 36480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00047695, Log Avg loss: 0.00000037, Global Avg Loss: 0.00610449, Time: 0.0634 Steps: 124200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002480, Sample Num: 39680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00043859, Log Avg loss: 0.00000133, Global Avg Loss: 0.00609468, Time: 0.0663 Steps: 124400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002680, Sample Num: 42880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00046380, Log Avg loss: 0.00077634, Global Avg Loss: 0.00608615, Time: 0.1026 Steps: 124600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002880, Sample Num: 46080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109381, Log Avg loss: 0.00953606, Global Avg Loss: 0.00609167, Time: 0.0936 Steps: 124800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 003080, Sample Num: 49280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106670, Log Avg loss: 0.00067619, Global Avg Loss: 0.00608301, Time: 0.0450 Steps: 125000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 003280, Sample Num: 52480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121069, Log Avg loss: 0.00342813, Global Avg Loss: 0.00607877, Time: 0.0912 Steps: 125200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003480, Sample Num: 55680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00114118, Log Avg loss: 0.00000129, Global Avg Loss: 0.00606908, Time: 0.0485 Steps: 125400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003680, Sample Num: 58880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112774, Log Avg loss: 0.00089384, Global Avg Loss: 0.00606084, Time: 0.0363 Steps: 125600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003880, Sample Num: 62080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00117805, Log Avg loss: 0.00210387, Global Avg Loss: 0.00605454, Time: 0.1826 Steps: 125800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004080, Sample Num: 65280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112042, Log Avg loss: 0.00000235, Global Avg Loss: 0.00604494, Time: 0.0453 Steps: 126000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004280, Sample Num: 68480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106810, Log Avg loss: 0.00000075, Global Avg Loss: 0.00603536, Time: 0.1076 Steps: 126200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004480, Sample Num: 71680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00139833, Log Avg loss: 0.00846524, Global Avg Loss: 0.00603920, Time: 0.0469 Steps: 126400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004680, Sample Num: 74880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157490, Log Avg loss: 0.00553013, Global Avg Loss: 0.00603840, Time: 0.1088 Steps: 126600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004880, Sample Num: 78080, Cur Loss: 0.00000024, Cur Avg Loss: 0.00155053, Log Avg loss: 0.00098028, Global Avg Loss: 0.00603042, Time: 0.0359 Steps: 126800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 005080, Sample Num: 81280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00152193, Log Avg loss: 0.00082400, Global Avg Loss: 0.00602222, Time: 0.1074 Steps: 127000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 005280, Sample Num: 84480, Cur Loss: 0.00000018, Cur Avg Loss: 0.00146474, Log Avg loss: 0.00001222, Global Avg Loss: 0.00601277, Time: 0.1845 Steps: 127200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005480, Sample Num: 87680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00141146, Log Avg loss: 0.00000491, Global Avg Loss: 0.00600334, Time: 0.0801 Steps: 127400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005680, Sample Num: 90880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137201, Log Avg loss: 0.00029110, Global Avg Loss: 0.00599439, Time: 0.0405 Steps: 127600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005880, Sample Num: 94080, Cur Loss: 0.00000014, Cur Avg Loss: 0.00153399, Log Avg loss: 0.00613404, Global Avg Loss: 0.00599461, Time: 0.0335 Steps: 127800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006080, Sample Num: 97280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00172484, Log Avg loss: 0.00733594, Global Avg Loss: 0.00599670, Time: 0.1959 Steps: 128000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006280, Sample Num: 100480, Cur Loss: 0.00000005, Cur Avg Loss: 0.00166995, Log Avg loss: 0.00000119, Global Avg Loss: 0.00598735, Time: 0.0624 Steps: 128200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006480, Sample Num: 103680, Cur Loss: 0.00000042, Cur Avg Loss: 0.00161921, Log Avg loss: 0.00002610, Global Avg Loss: 0.00597806, Time: 0.0526 Steps: 128400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006680, Sample Num: 106880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157078, Log Avg loss: 0.00000161, Global Avg Loss: 0.00596877, Time: 0.1214 Steps: 128600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006880, Sample Num: 110080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00152517, Log Avg loss: 0.00000164, Global Avg Loss: 0.00595950, Time: 0.0543 Steps: 128800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 007080, Sample Num: 113280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148208, Log Avg loss: 0.00000002, Global Avg Loss: 0.00595026, Time: 0.1396 Steps: 129000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 007280, Sample Num: 116480, Cur Loss: 0.00000001, Cur Avg Loss: 0.00144204, Log Avg loss: 0.00002447, Global Avg Loss: 0.00594109, Time: 0.1428 Steps: 129200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007480, Sample Num: 119680, Cur Loss: 0.00000006, Cur Avg Loss: 0.00149476, Log Avg loss: 0.00341373, Global Avg Loss: 0.00593718, Time: 0.0600 Steps: 129400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007680, Sample Num: 122880, Cur Loss: 0.00000001, Cur Avg Loss: 0.00146130, Log Avg loss: 0.00020977, Global Avg Loss: 0.00592835, Time: 0.1749 Steps: 129600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007880, Sample Num: 126080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158180, Log Avg loss: 0.00620909, Global Avg Loss: 0.00592878, Time: 0.0989 Steps: 129800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008080, Sample Num: 129280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00179912, Log Avg loss: 0.01036171, Global Avg Loss: 0.00593560, Time: 0.1548 Steps: 130000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008280, Sample Num: 132480, Cur Loss: 0.00000001, Cur Avg Loss: 0.00175570, Log Avg loss: 0.00000134, Global Avg Loss: 0.00592648, Time: 0.0981 Steps: 130200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008480, Sample Num: 135680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00171431, Log Avg loss: 0.00000083, Global Avg Loss: 0.00591739, Time: 0.1183 Steps: 130400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008680, Sample Num: 138880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170559, Log Avg loss: 0.00133593, Global Avg Loss: 0.00591038, Time: 0.0624 Steps: 130600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008880, Sample Num: 142080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166719, Log Avg loss: 0.00000058, Global Avg Loss: 0.00590134, Time: 0.0416 Steps: 130800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 009080, Sample Num: 145280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166042, Log Avg loss: 0.00135983, Global Avg Loss: 0.00589441, Time: 0.1228 Steps: 131000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 009280, Sample Num: 148480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162465, Log Avg loss: 0.00000059, Global Avg Loss: 0.00588542, Time: 0.1084 Steps: 131200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009480, Sample Num: 151680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167991, Log Avg loss: 0.00424423, Global Avg Loss: 0.00588293, Time: 0.0430 Steps: 131400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009680, Sample Num: 154880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164522, Log Avg loss: 0.00000101, Global Avg Loss: 0.00587399, Time: 0.0376 Steps: 131600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009880, Sample Num: 158080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161196, Log Avg loss: 0.00000202, Global Avg Loss: 0.00586508, Time: 0.0358 Steps: 131800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010080, Sample Num: 161280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158002, Log Avg loss: 0.00000218, Global Avg Loss: 0.00585619, Time: 0.0788 Steps: 132000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010280, Sample Num: 164480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154928, Log Avg loss: 0.00000009, Global Avg Loss: 0.00584733, Time: 0.1069 Steps: 132200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010480, Sample Num: 167680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160974, Log Avg loss: 0.00471721, Global Avg Loss: 0.00584563, Time: 0.1476 Steps: 132400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010680, Sample Num: 170880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157993, Log Avg loss: 0.00001792, Global Avg Loss: 0.00583684, Time: 0.1559 Steps: 132600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010880, Sample Num: 174080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00155089, Log Avg loss: 0.00000009, Global Avg Loss: 0.00582805, Time: 0.1207 Steps: 132800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 011080, Sample Num: 177280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164433, Log Avg loss: 0.00672755, Global Avg Loss: 0.00582940, Time: 0.1368 Steps: 133000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 011280, Sample Num: 180480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166922, Log Avg loss: 0.00304797, Global Avg Loss: 0.00582522, Time: 0.0416 Steps: 133200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011480, Sample Num: 183680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170759, Log Avg loss: 0.00387180, Global Avg Loss: 0.00582229, Time: 0.1001 Steps: 133400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011680, Sample Num: 186880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167859, Log Avg loss: 0.00001397, Global Avg Loss: 0.00581360, Time: 0.1049 Steps: 133600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011880, Sample Num: 190080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165034, Log Avg loss: 0.00000029, Global Avg Loss: 0.00580491, Time: 0.0564 Steps: 133800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012080, Sample Num: 193280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162301, Log Avg loss: 0.00000014, Global Avg Loss: 0.00579625, Time: 0.0390 Steps: 134000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012280, Sample Num: 196480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159659, Log Avg loss: 0.00000043, Global Avg Loss: 0.00578761, Time: 0.0597 Steps: 134200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012480, Sample Num: 199680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157100, Log Avg loss: 0.00000016, Global Avg Loss: 0.00577900, Time: 0.0623 Steps: 134400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012680, Sample Num: 202880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00156645, Log Avg loss: 0.00128241, Global Avg Loss: 0.00577231, Time: 0.0621 Steps: 134600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012880, Sample Num: 206080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154213, Log Avg loss: 0.00000025, Global Avg Loss: 0.00576375, Time: 0.1517 Steps: 134800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 013080, Sample Num: 209280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159539, Log Avg loss: 0.00502512, Global Avg Loss: 0.00576266, Time: 0.0307 Steps: 135000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 013280, Sample Num: 212480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157142, Log Avg loss: 0.00000399, Global Avg Loss: 0.00575414, Time: 0.0628 Steps: 135200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013480, Sample Num: 215680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154811, Log Avg loss: 0.00000012, Global Avg Loss: 0.00574564, Time: 0.2265 Steps: 135400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013680, Sample Num: 218880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00152548, Log Avg loss: 0.00000014, Global Avg Loss: 0.00573716, Time: 0.1499 Steps: 135600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013880, Sample Num: 222080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154159, Log Avg loss: 0.00264382, Global Avg Loss: 0.00573261, Time: 0.1073 Steps: 135800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014080, Sample Num: 225280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00151972, Log Avg loss: 0.00000141, Global Avg Loss: 0.00572418, Time: 0.0758 Steps: 136000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014280, Sample Num: 228480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00149862, Log Avg loss: 0.00001355, Global Avg Loss: 0.00571579, Time: 0.0869 Steps: 136200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014480, Sample Num: 231680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147802, Log Avg loss: 0.00000690, Global Avg Loss: 0.00570742, Time: 0.1162 Steps: 136400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014680, Sample Num: 234880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146013, Log Avg loss: 0.00016491, Global Avg Loss: 0.00569931, Time: 0.1015 Steps: 136600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014880, Sample Num: 238080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148101, Log Avg loss: 0.00301405, Global Avg Loss: 0.00569538, Time: 0.1079 Steps: 136800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015080, Sample Num: 241280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146147, Log Avg loss: 0.00000759, Global Avg Loss: 0.00568708, Time: 0.1412 Steps: 137000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015280, Sample Num: 244480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00144234, Log Avg loss: 0.00000003, Global Avg Loss: 0.00567879, Time: 0.1334 Steps: 137200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015480, Sample Num: 247680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00142372, Log Avg loss: 0.00000058, Global Avg Loss: 0.00567052, Time: 0.1482 Steps: 137400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 015680, Sample Num: 250880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140556, Log Avg loss: 0.00000010, Global Avg Loss: 0.00566228, Time: 0.1760 Steps: 137600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 015880, Sample Num: 254080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00138786, Log Avg loss: 0.00000074, Global Avg Loss: 0.00565406, Time: 0.0991 Steps: 137800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016080, Sample Num: 257280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137061, Log Avg loss: 0.00000057, Global Avg Loss: 0.00564587, Time: 0.0556 Steps: 138000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016280, Sample Num: 260480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135377, Log Avg loss: 0.00000004, Global Avg Loss: 0.00563770, Time: 0.0750 Steps: 138200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016480, Sample Num: 263680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00133740, Log Avg loss: 0.00000441, Global Avg Loss: 0.00562956, Time: 0.0399 Steps: 138400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016680, Sample Num: 266880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00132136, Log Avg loss: 0.00000002, Global Avg Loss: 0.00562144, Time: 0.1159 Steps: 138600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016880, Sample Num: 270080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130571, Log Avg loss: 0.00000009, Global Avg Loss: 0.00561334, Time: 0.0646 Steps: 138800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017080, Sample Num: 273280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00139685, Log Avg loss: 0.00908913, Global Avg Loss: 0.00561834, Time: 0.0513 Steps: 139000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017280, Sample Num: 276480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00141917, Log Avg loss: 0.00332560, Global Avg Loss: 0.00561504, Time: 0.2035 Steps: 139200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017480, Sample Num: 279680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140293, Log Avg loss: 0.00000010, Global Avg Loss: 0.00560699, Time: 0.1799 Steps: 139400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 017680, Sample Num: 282880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00138707, Log Avg loss: 0.00000023, Global Avg Loss: 0.00559896, Time: 0.0916 Steps: 139600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 017880, Sample Num: 286080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137155, Log Avg loss: 0.00000017, Global Avg Loss: 0.00559095, Time: 0.0611 Steps: 139800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018080, Sample Num: 289280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135638, Log Avg loss: 0.00000003, Global Avg Loss: 0.00558296, Time: 0.0419 Steps: 140000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018280, Sample Num: 292480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134154, Log Avg loss: 0.00000001, Global Avg Loss: 0.00557499, Time: 0.1601 Steps: 140200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018480, Sample Num: 295680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137404, Log Avg loss: 0.00434436, Global Avg Loss: 0.00557324, Time: 0.0528 Steps: 140400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018680, Sample Num: 298880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135933, Log Avg loss: 0.00000044, Global Avg Loss: 0.00556531, Time: 0.1527 Steps: 140600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018880, Sample Num: 302080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00136644, Log Avg loss: 0.00203069, Global Avg Loss: 0.00556029, Time: 0.0804 Steps: 140800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019080, Sample Num: 305280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135212, Log Avg loss: 0.00000007, Global Avg Loss: 0.00555241, Time: 0.2329 Steps: 141000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019280, Sample Num: 308480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00133810, Log Avg loss: 0.00000010, Global Avg Loss: 0.00554454, Time: 0.0685 Steps: 141200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019480, Sample Num: 311680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00132436, Log Avg loss: 0.00000013, Global Avg Loss: 0.00553670, Time: 0.1042 Steps: 141400, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 019680, Sample Num: 314880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00131452, Log Avg loss: 0.00035576, Global Avg Loss: 0.00552938, Time: 0.0429 Steps: 141600, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 019880, Sample Num: 318080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130133, Log Avg loss: 0.00000373, Global Avg Loss: 0.00552159, Time: 0.1005 Steps: 141800, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 020080, Sample Num: 321280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00128837, Log Avg loss: 0.00000001, Global Avg Loss: 0.00551381, Time: 0.0681 Steps: 142000, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 020280, Sample Num: 324480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134613, Log Avg loss: 0.00714506, Global Avg Loss: 0.00551611, Time: 0.0637 Steps: 142200, Updated lr: 0.000030 ***** Running evaluation checkpoint-142240 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-142240 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1840.135144, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.01861, "eval_total_loss": 50.432662, "eval_acc": 0.997993, "eval_prec": 0.998204, "eval_recall": 0.99779, "eval_f1": 0.997997, "eval_roc_auc": 0.999768, "eval_pr_auc": 0.999557, "eval_confusion_matrix": {"tn": 21588, "fp": 39, "fn": 48, "tp": 21674}, "eval_mcc2": 0.995986, "eval_mcc": 0.995986, "eval_sn": 0.99779, "eval_sp": 0.998197, "update_flag": false, "test_avg_loss": 0.018399, "test_total_loss": 74.775462, "test_acc": 0.998047, "test_prec": 0.998123, "test_recall": 0.99797, "test_f1": 0.998046, "test_roc_auc": 0.999779, "test_pr_auc": 0.999574, "test_confusion_matrix": {"tn": 32456, "fp": 61, "fn": 66, "tp": 32439}, "test_mcc2": 0.996094, "test_mcc": 0.996094, "test_sn": 0.99797, "test_sp": 0.998124, "lr": 3.0029556650246303e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.0055145551796861175, "train_cur_epoch_loss": 27.299502056525903, "train_cur_epoch_avg_loss": 0.0013434794319156448, "train_cur_epoch_time": 1840.135143995285, "train_cur_epoch_avg_time": 0.09055783188953175, "epoch": 7, "step": 142240} ################################################## Training, Epoch: 0008, Batch: 000160, Sample Num: 2560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000015, Log Avg loss: 0.00000049, Global Avg Loss: 0.00550836, Time: 0.1014 Steps: 142400, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000360, Sample Num: 5760, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000016, Log Avg loss: 0.00000017, Global Avg Loss: 0.00550063, Time: 0.0416 Steps: 142600, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000011, Log Avg loss: 0.00000002, Global Avg Loss: 0.00549293, Time: 0.0339 Steps: 142800, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000760, Sample Num: 12160, Cur Loss: 0.00000012, Cur Avg Loss: 0.00000010, Log Avg loss: 0.00000005, Global Avg Loss: 0.00548525, Time: 0.0340 Steps: 143000, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000960, Sample Num: 15360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000008, Log Avg loss: 0.00000002, Global Avg Loss: 0.00547759, Time: 0.2628 Steps: 143200, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 001160, Sample Num: 18560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000007, Log Avg loss: 0.00000004, Global Avg Loss: 0.00546995, Time: 0.1386 Steps: 143400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000006, Log Avg loss: 0.00000001, Global Avg Loss: 0.00546233, Time: 0.1086 Steps: 143600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001560, Sample Num: 24960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000042, Log Avg loss: 0.00000288, Global Avg Loss: 0.00545474, Time: 0.0653 Steps: 143800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001760, Sample Num: 28160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000038, Log Avg loss: 0.00000004, Global Avg Loss: 0.00544716, Time: 0.0485 Steps: 144000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001960, Sample Num: 31360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000034, Log Avg loss: 0.00000003, Global Avg Loss: 0.00543960, Time: 0.0633 Steps: 144200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002160, Sample Num: 34560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00035965, Log Avg loss: 0.00388089, Global Avg Loss: 0.00543745, Time: 0.0666 Steps: 144400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00032918, Log Avg loss: 0.00000003, Global Avg Loss: 0.00542993, Time: 0.0860 Steps: 144600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002560, Sample Num: 40960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00030348, Log Avg loss: 0.00000026, Global Avg Loss: 0.00542243, Time: 0.0430 Steps: 144800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002760, Sample Num: 44160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00030643, Log Avg loss: 0.00034425, Global Avg Loss: 0.00541542, Time: 0.0357 Steps: 145000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002960, Sample Num: 47360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087693, Log Avg loss: 0.00874979, Global Avg Loss: 0.00542001, Time: 0.1083 Steps: 145200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082145, Log Avg loss: 0.00000028, Global Avg Loss: 0.00541256, Time: 0.0437 Steps: 145400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003360, Sample Num: 53760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096378, Log Avg loss: 0.00321263, Global Avg Loss: 0.00540954, Time: 0.1495 Steps: 145600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003560, Sample Num: 56960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00090967, Log Avg loss: 0.00000057, Global Avg Loss: 0.00540212, Time: 0.1124 Steps: 145800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003760, Sample Num: 60160, Cur Loss: 0.45287737, Cur Avg Loss: 0.00098174, Log Avg loss: 0.00226465, Global Avg Loss: 0.00539782, Time: 0.1153 Steps: 146000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003960, Sample Num: 63360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00093217, Log Avg loss: 0.00000019, Global Avg Loss: 0.00539044, Time: 0.0666 Steps: 146200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088738, Log Avg loss: 0.00000064, Global Avg Loss: 0.00538307, Time: 0.0872 Steps: 146400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004360, Sample Num: 69760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084668, Log Avg loss: 0.00000007, Global Avg Loss: 0.00537573, Time: 0.1074 Steps: 146600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004560, Sample Num: 72960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00116893, Log Avg loss: 0.00819388, Global Avg Loss: 0.00537957, Time: 0.0405 Steps: 146800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004760, Sample Num: 76160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134200, Log Avg loss: 0.00528806, Global Avg Loss: 0.00537944, Time: 0.0641 Steps: 147000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004960, Sample Num: 79360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134866, Log Avg loss: 0.00150722, Global Avg Loss: 0.00537418, Time: 0.0615 Steps: 147200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 005160, Sample Num: 82560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129702, Log Avg loss: 0.00001644, Global Avg Loss: 0.00536691, Time: 0.0538 Steps: 147400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005360, Sample Num: 85760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124866, Log Avg loss: 0.00000088, Global Avg Loss: 0.00535964, Time: 0.1048 Steps: 147600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00120376, Log Avg loss: 0.00000051, Global Avg Loss: 0.00535239, Time: 0.1448 Steps: 147800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005760, Sample Num: 92160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126051, Log Avg loss: 0.00283809, Global Avg Loss: 0.00534899, Time: 0.2685 Steps: 148000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00145663, Log Avg loss: 0.00710474, Global Avg Loss: 0.00535136, Time: 0.0464 Steps: 148200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006160, Sample Num: 98560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158507, Log Avg loss: 0.00541273, Global Avg Loss: 0.00535144, Time: 0.0645 Steps: 148400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006360, Sample Num: 101760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153535, Log Avg loss: 0.00000383, Global Avg Loss: 0.00534425, Time: 0.0815 Steps: 148600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006560, Sample Num: 104960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148867, Log Avg loss: 0.00000446, Global Avg Loss: 0.00533707, Time: 0.0414 Steps: 148800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006760, Sample Num: 108160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00144463, Log Avg loss: 0.00000006, Global Avg Loss: 0.00532991, Time: 0.0455 Steps: 149000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006960, Sample Num: 111360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140313, Log Avg loss: 0.00000036, Global Avg Loss: 0.00532276, Time: 0.1882 Steps: 149200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 007160, Sample Num: 114560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00136394, Log Avg loss: 0.00000001, Global Avg Loss: 0.00531564, Time: 0.1426 Steps: 149400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 007360, Sample Num: 117760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00132700, Log Avg loss: 0.00000479, Global Avg Loss: 0.00530854, Time: 0.0441 Steps: 149600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007560, Sample Num: 120960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00139112, Log Avg loss: 0.00375075, Global Avg Loss: 0.00530646, Time: 0.1165 Steps: 149800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135536, Log Avg loss: 0.00000368, Global Avg Loss: 0.00529939, Time: 0.1177 Steps: 150000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007960, Sample Num: 127360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167575, Log Avg loss: 0.01410679, Global Avg Loss: 0.00531111, Time: 0.1138 Steps: 150200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00163468, Log Avg loss: 0.00000023, Global Avg Loss: 0.00530405, Time: 0.0259 Steps: 150400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159562, Log Avg loss: 0.00000189, Global Avg Loss: 0.00529701, Time: 0.0883 Steps: 150600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008560, Sample Num: 136960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00155835, Log Avg loss: 0.00000021, Global Avg Loss: 0.00528999, Time: 0.0903 Steps: 150800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008760, Sample Num: 140160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00152277, Log Avg loss: 0.00000024, Global Avg Loss: 0.00528298, Time: 0.1415 Steps: 151000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008960, Sample Num: 143360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158918, Log Avg loss: 0.00449758, Global Avg Loss: 0.00528194, Time: 0.1068 Steps: 151200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 009160, Sample Num: 146560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00155448, Log Avg loss: 0.00000003, Global Avg Loss: 0.00527496, Time: 0.0683 Steps: 151400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00152127, Log Avg loss: 0.00000030, Global Avg Loss: 0.00526800, Time: 0.1136 Steps: 151600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009560, Sample Num: 152960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160390, Log Avg loss: 0.00547098, Global Avg Loss: 0.00526827, Time: 0.1271 Steps: 151800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009760, Sample Num: 156160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157103, Log Avg loss: 0.00000013, Global Avg Loss: 0.00526134, Time: 0.0659 Steps: 152000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009960, Sample Num: 159360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153949, Log Avg loss: 0.00000017, Global Avg Loss: 0.00525443, Time: 0.0648 Steps: 152200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010160, Sample Num: 162560, Cur Loss: 0.00000001, Cur Avg Loss: 0.00150919, Log Avg loss: 0.00000020, Global Avg Loss: 0.00524753, Time: 0.0389 Steps: 152400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148006, Log Avg loss: 0.00000002, Global Avg Loss: 0.00524065, Time: 0.0878 Steps: 152600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010560, Sample Num: 168960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00152343, Log Avg loss: 0.00377045, Global Avg Loss: 0.00523873, Time: 0.1992 Steps: 152800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010760, Sample Num: 172160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00149528, Log Avg loss: 0.00000872, Global Avg Loss: 0.00523189, Time: 0.1065 Steps: 153000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010960, Sample Num: 175360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146804, Log Avg loss: 0.00000259, Global Avg Loss: 0.00522507, Time: 0.0264 Steps: 153200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160093, Log Avg loss: 0.00888351, Global Avg Loss: 0.00522984, Time: 0.1220 Steps: 153400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 011360, Sample Num: 181760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158536, Log Avg loss: 0.00071630, Global Avg Loss: 0.00522396, Time: 0.0604 Steps: 153600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161487, Log Avg loss: 0.00329084, Global Avg Loss: 0.00522144, Time: 0.1681 Steps: 153800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158741, Log Avg loss: 0.00000012, Global Avg Loss: 0.00521466, Time: 0.1567 Steps: 154000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011960, Sample Num: 191360, Cur Loss: 0.00000006, Cur Avg Loss: 0.00156086, Log Avg loss: 0.00000016, Global Avg Loss: 0.00520790, Time: 0.0382 Steps: 154200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012160, Sample Num: 194560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153524, Log Avg loss: 0.00000301, Global Avg Loss: 0.00520116, Time: 0.1955 Steps: 154400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012360, Sample Num: 197760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00151041, Log Avg loss: 0.00000059, Global Avg Loss: 0.00519443, Time: 0.0695 Steps: 154600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012560, Sample Num: 200960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148636, Log Avg loss: 0.00000000, Global Avg Loss: 0.00518772, Time: 0.0790 Steps: 154800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012760, Sample Num: 204160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146640, Log Avg loss: 0.00021283, Global Avg Loss: 0.00518130, Time: 0.0702 Steps: 155000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012960, Sample Num: 207360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00144377, Log Avg loss: 0.00000007, Global Avg Loss: 0.00517462, Time: 0.3037 Steps: 155200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 013160, Sample Num: 210560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147424, Log Avg loss: 0.00344902, Global Avg Loss: 0.00517240, Time: 0.0403 Steps: 155400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 013360, Sample Num: 213760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00145222, Log Avg loss: 0.00000335, Global Avg Loss: 0.00516576, Time: 0.1076 Steps: 155600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013560, Sample Num: 216960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00143080, Log Avg loss: 0.00000008, Global Avg Loss: 0.00515913, Time: 0.0474 Steps: 155800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013760, Sample Num: 220160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00141002, Log Avg loss: 0.00000115, Global Avg Loss: 0.00515251, Time: 0.0714 Steps: 156000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013960, Sample Num: 223360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00142507, Log Avg loss: 0.00246017, Global Avg Loss: 0.00514907, Time: 0.1098 Steps: 156200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014160, Sample Num: 226560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140494, Log Avg loss: 0.00000001, Global Avg Loss: 0.00514248, Time: 0.0379 Steps: 156400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014360, Sample Num: 229760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00138539, Log Avg loss: 0.00000150, Global Avg Loss: 0.00513592, Time: 0.1557 Steps: 156600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014560, Sample Num: 232960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00136637, Log Avg loss: 0.00000005, Global Avg Loss: 0.00512937, Time: 0.0612 Steps: 156800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014760, Sample Num: 236160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134835, Log Avg loss: 0.00003684, Global Avg Loss: 0.00512288, Time: 0.0667 Steps: 157000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014960, Sample Num: 239360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135955, Log Avg loss: 0.00218571, Global Avg Loss: 0.00511914, Time: 0.0456 Steps: 157200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 015160, Sample Num: 242560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134161, Log Avg loss: 0.00000002, Global Avg Loss: 0.00511264, Time: 0.0699 Steps: 157400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 015360, Sample Num: 245760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00132414, Log Avg loss: 0.00000002, Global Avg Loss: 0.00510615, Time: 0.1117 Steps: 157600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015560, Sample Num: 248960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130712, Log Avg loss: 0.00000002, Global Avg Loss: 0.00509968, Time: 0.0624 Steps: 157800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015760, Sample Num: 252160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129054, Log Avg loss: 0.00000015, Global Avg Loss: 0.00509322, Time: 0.0788 Steps: 158000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015960, Sample Num: 255360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00127437, Log Avg loss: 0.00000020, Global Avg Loss: 0.00508678, Time: 0.2213 Steps: 158200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016160, Sample Num: 258560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00125860, Log Avg loss: 0.00000016, Global Avg Loss: 0.00508036, Time: 0.1153 Steps: 158400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016360, Sample Num: 261760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124321, Log Avg loss: 0.00000001, Global Avg Loss: 0.00507395, Time: 0.0707 Steps: 158600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016560, Sample Num: 264960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122838, Log Avg loss: 0.00001502, Global Avg Loss: 0.00506758, Time: 0.1315 Steps: 158800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016760, Sample Num: 268160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121372, Log Avg loss: 0.00000000, Global Avg Loss: 0.00506121, Time: 0.1084 Steps: 159000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016960, Sample Num: 271360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00119941, Log Avg loss: 0.00000004, Global Avg Loss: 0.00505485, Time: 0.1006 Steps: 159200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 017160, Sample Num: 274560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130889, Log Avg loss: 0.01059305, Global Avg Loss: 0.00506180, Time: 0.0318 Steps: 159400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 017360, Sample Num: 277760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129381, Log Avg loss: 0.00000010, Global Avg Loss: 0.00505546, Time: 0.1487 Steps: 159600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017560, Sample Num: 280960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00127907, Log Avg loss: 0.00000002, Global Avg Loss: 0.00504913, Time: 0.0634 Steps: 159800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017760, Sample Num: 284160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126467, Log Avg loss: 0.00000003, Global Avg Loss: 0.00504282, Time: 0.0381 Steps: 160000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017960, Sample Num: 287360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00125059, Log Avg loss: 0.00000015, Global Avg Loss: 0.00503652, Time: 0.0526 Steps: 160200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018160, Sample Num: 290560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00123682, Log Avg loss: 0.00000000, Global Avg Loss: 0.00503024, Time: 0.0633 Steps: 160400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018360, Sample Num: 293760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122334, Log Avg loss: 0.00000001, Global Avg Loss: 0.00502398, Time: 0.0489 Steps: 160600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018560, Sample Num: 296960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124279, Log Avg loss: 0.00302758, Global Avg Loss: 0.00502150, Time: 0.1436 Steps: 160800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018760, Sample Num: 300160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122954, Log Avg loss: 0.00000001, Global Avg Loss: 0.00501526, Time: 0.0426 Steps: 161000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018960, Sample Num: 303360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122885, Log Avg loss: 0.00116435, Global Avg Loss: 0.00501048, Time: 0.0589 Steps: 161200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 019160, Sample Num: 306560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121602, Log Avg loss: 0.00000002, Global Avg Loss: 0.00500427, Time: 0.0892 Steps: 161400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 019360, Sample Num: 309760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00120346, Log Avg loss: 0.00000001, Global Avg Loss: 0.00499808, Time: 0.1500 Steps: 161600, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019560, Sample Num: 312960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00119115, Log Avg loss: 0.00000002, Global Avg Loss: 0.00499190, Time: 0.0432 Steps: 161800, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019760, Sample Num: 316160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118020, Log Avg loss: 0.00010915, Global Avg Loss: 0.00498587, Time: 0.0610 Steps: 162000, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019960, Sample Num: 319360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00116838, Log Avg loss: 0.00000006, Global Avg Loss: 0.00497972, Time: 0.0686 Steps: 162200, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 020160, Sample Num: 322560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115679, Log Avg loss: 0.00000001, Global Avg Loss: 0.00497359, Time: 0.0499 Steps: 162400, Updated lr: 0.000020 ***** Running evaluation checkpoint-162560 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-162560 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1927.658861, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.020433, "eval_total_loss": 55.373877, "eval_acc": 0.998108, "eval_prec": 0.997883, "eval_recall": 0.998343, "eval_f1": 0.998113, "eval_roc_auc": 0.999748, "eval_pr_auc": 0.999516, "eval_confusion_matrix": {"tn": 21581, "fp": 46, "fn": 36, "tp": 21686}, "eval_mcc2": 0.996217, "eval_mcc": 0.996217, "eval_sn": 0.998343, "eval_sp": 0.997873, "update_flag": false, "test_avg_loss": 0.019911, "test_total_loss": 80.916977, "test_acc": 0.998078, "test_prec": 0.997817, "test_recall": 0.998339, "test_f1": 0.998078, "test_roc_auc": 0.999736, "test_pr_auc": 0.999487, "test_confusion_matrix": {"tn": 32446, "fp": 71, "fn": 54, "tp": 32451}, "test_mcc2": 0.996155, "test_mcc": 0.996155, "test_sn": 0.998339, "test_sp": 0.997817, "lr": 2.001970443349754e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.004977700888840992, "train_cur_epoch_loss": 24.784727731435467, "train_cur_epoch_avg_loss": 0.0012197208529249738, "train_cur_epoch_time": 1927.658861398697, "train_cur_epoch_avg_time": 0.09486510144678627, "epoch": 8, "step": 162560} ################################################## Training, Epoch: 0009, Batch: 000040, Sample Num: 640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000005, Log Avg loss: 0.00731949, Global Avg Loss: 0.00497648, Time: 0.0687 Steps: 162600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000007, Log Avg loss: 0.00000007, Global Avg Loss: 0.00497036, Time: 0.0284 Steps: 162800, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000440, Sample Num: 7040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000004, Log Avg loss: 0.00000001, Global Avg Loss: 0.00496426, Time: 0.0446 Steps: 163000, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000640, Sample Num: 10240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00000001, Global Avg Loss: 0.00495818, Time: 0.0456 Steps: 163200, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000840, Sample Num: 13440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00000001, Global Avg Loss: 0.00495211, Time: 0.0722 Steps: 163400, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00494606, Time: 0.0675 Steps: 163600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 001240, Sample Num: 19840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00494002, Time: 0.0454 Steps: 163800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000001, Global Avg Loss: 0.00493399, Time: 0.0403 Steps: 164000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000010, Log Avg loss: 0.00000065, Global Avg Loss: 0.00492799, Time: 0.0613 Steps: 164200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001840, Sample Num: 29440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000009, Log Avg loss: 0.00000000, Global Avg Loss: 0.00492199, Time: 0.0401 Steps: 164400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000070, Log Avg loss: 0.00000631, Global Avg Loss: 0.00491602, Time: 0.0648 Steps: 164600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002240, Sample Num: 35840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000063, Log Avg loss: 0.00000001, Global Avg Loss: 0.00491005, Time: 0.1152 Steps: 164800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002440, Sample Num: 39040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000058, Log Avg loss: 0.00000003, Global Avg Loss: 0.00490410, Time: 0.0899 Steps: 165000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002640, Sample Num: 42240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000075, Log Avg loss: 0.00000275, Global Avg Loss: 0.00489817, Time: 0.0435 Steps: 165200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00047850, Log Avg loss: 0.00678484, Global Avg Loss: 0.00490045, Time: 0.0298 Steps: 165400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00044704, Log Avg loss: 0.00000031, Global Avg Loss: 0.00489453, Time: 0.1076 Steps: 165600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 003240, Sample Num: 51840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00063360, Log Avg loss: 0.00346931, Global Avg Loss: 0.00489281, Time: 0.1367 Steps: 165800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003440, Sample Num: 55040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00059677, Log Avg loss: 0.00000016, Global Avg Loss: 0.00488692, Time: 0.1233 Steps: 166000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003640, Sample Num: 58240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00056398, Log Avg loss: 0.00000003, Global Avg Loss: 0.00488103, Time: 0.0618 Steps: 166200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003840, Sample Num: 61440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00065350, Log Avg loss: 0.00228277, Global Avg Loss: 0.00487791, Time: 0.0758 Steps: 166400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00062116, Log Avg loss: 0.00000019, Global Avg Loss: 0.00487206, Time: 0.0654 Steps: 166600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004240, Sample Num: 67840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00059186, Log Avg loss: 0.00000002, Global Avg Loss: 0.00486621, Time: 0.0470 Steps: 166800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004440, Sample Num: 71040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00092542, Log Avg loss: 0.00799675, Global Avg Loss: 0.00486996, Time: 0.0911 Steps: 167000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00092357, Log Avg loss: 0.00088250, Global Avg Loss: 0.00486519, Time: 0.0965 Steps: 167200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088541, Log Avg loss: 0.00000019, Global Avg Loss: 0.00485938, Time: 0.1119 Steps: 167400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 005040, Sample Num: 80640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087640, Log Avg loss: 0.00065823, Global Avg Loss: 0.00485437, Time: 0.0658 Steps: 167600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 005240, Sample Num: 83840, Cur Loss: 0.00000006, Cur Avg Loss: 0.00084302, Log Avg loss: 0.00000195, Global Avg Loss: 0.00484858, Time: 0.0285 Steps: 167800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005440, Sample Num: 87040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081205, Log Avg loss: 0.00000066, Global Avg Loss: 0.00484281, Time: 0.1870 Steps: 168000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005640, Sample Num: 90240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00078326, Log Avg loss: 0.00000006, Global Avg Loss: 0.00483705, Time: 0.0342 Steps: 168200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005840, Sample Num: 93440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00095686, Log Avg loss: 0.00585255, Global Avg Loss: 0.00483826, Time: 0.1067 Steps: 168400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006040, Sample Num: 96640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00110165, Log Avg loss: 0.00532930, Global Avg Loss: 0.00483884, Time: 0.1040 Steps: 168600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006240, Sample Num: 99840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115248, Log Avg loss: 0.00268781, Global Avg Loss: 0.00483629, Time: 0.0667 Steps: 168800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006440, Sample Num: 103040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00111693, Log Avg loss: 0.00000776, Global Avg Loss: 0.00483058, Time: 0.0391 Steps: 169000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006640, Sample Num: 106240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108330, Log Avg loss: 0.00000035, Global Avg Loss: 0.00482487, Time: 0.1907 Steps: 169200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006840, Sample Num: 109440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105163, Log Avg loss: 0.00000016, Global Avg Loss: 0.00481917, Time: 0.1129 Steps: 169400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 007040, Sample Num: 112640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102176, Log Avg loss: 0.00000001, Global Avg Loss: 0.00481349, Time: 0.2620 Steps: 169600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 007240, Sample Num: 115840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099354, Log Avg loss: 0.00000049, Global Avg Loss: 0.00480782, Time: 0.1603 Steps: 169800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007440, Sample Num: 119040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106716, Log Avg loss: 0.00373224, Global Avg Loss: 0.00480656, Time: 0.0717 Steps: 170000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007640, Sample Num: 122240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104249, Log Avg loss: 0.00012447, Global Avg Loss: 0.00480106, Time: 0.1363 Steps: 170200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007840, Sample Num: 125440, Cur Loss: 0.00000042, Cur Avg Loss: 0.00114245, Log Avg loss: 0.00496118, Global Avg Loss: 0.00480124, Time: 0.0387 Steps: 170400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008040, Sample Num: 128640, Cur Loss: 0.00000018, Cur Avg Loss: 0.00129828, Log Avg loss: 0.00740676, Global Avg Loss: 0.00480430, Time: 0.0888 Steps: 170600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008240, Sample Num: 131840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126677, Log Avg loss: 0.00000012, Global Avg Loss: 0.00479867, Time: 0.0632 Steps: 170800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008440, Sample Num: 135040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00123679, Log Avg loss: 0.00000148, Global Avg Loss: 0.00479306, Time: 0.1055 Steps: 171000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008640, Sample Num: 138240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00120816, Log Avg loss: 0.00000005, Global Avg Loss: 0.00478746, Time: 0.0896 Steps: 171200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008840, Sample Num: 141440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118083, Log Avg loss: 0.00000006, Global Avg Loss: 0.00478188, Time: 0.0513 Steps: 171400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 009040, Sample Num: 144640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00125329, Log Avg loss: 0.00445621, Global Avg Loss: 0.00478150, Time: 0.1025 Steps: 171600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 009240, Sample Num: 147840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122617, Log Avg loss: 0.00000003, Global Avg Loss: 0.00477593, Time: 0.0915 Steps: 171800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009440, Sample Num: 151040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129629, Log Avg loss: 0.00453607, Global Avg Loss: 0.00477565, Time: 0.1089 Steps: 172000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009640, Sample Num: 154240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126940, Log Avg loss: 0.00000032, Global Avg Loss: 0.00477011, Time: 0.1109 Steps: 172200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009840, Sample Num: 157440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124360, Log Avg loss: 0.00000004, Global Avg Loss: 0.00476457, Time: 0.1156 Steps: 172400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010040, Sample Num: 160640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121890, Log Avg loss: 0.00000351, Global Avg Loss: 0.00475905, Time: 0.1819 Steps: 172600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010240, Sample Num: 163840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00119510, Log Avg loss: 0.00000018, Global Avg Loss: 0.00475355, Time: 0.0828 Steps: 172800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010440, Sample Num: 167040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00117220, Log Avg loss: 0.00000002, Global Avg Loss: 0.00474805, Time: 0.1159 Steps: 173000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010640, Sample Num: 170240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121149, Log Avg loss: 0.00326209, Global Avg Loss: 0.00474634, Time: 0.0569 Steps: 173200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010840, Sample Num: 173440, Cur Loss: 0.00000042, Cur Avg Loss: 0.00118916, Log Avg loss: 0.00000133, Global Avg Loss: 0.00474086, Time: 0.0826 Steps: 173400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 011040, Sample Num: 176640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129235, Log Avg loss: 0.00688513, Global Avg Loss: 0.00474333, Time: 0.1094 Steps: 173600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 011240, Sample Num: 179840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126997, Log Avg loss: 0.00003494, Global Avg Loss: 0.00473791, Time: 0.0654 Steps: 173800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011440, Sample Num: 183040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124817, Log Avg loss: 0.00002274, Global Avg Loss: 0.00473249, Time: 0.1419 Steps: 174000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011640, Sample Num: 186240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129011, Log Avg loss: 0.00368925, Global Avg Loss: 0.00473130, Time: 0.1451 Steps: 174200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011840, Sample Num: 189440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126832, Log Avg loss: 0.00000007, Global Avg Loss: 0.00472587, Time: 0.1932 Steps: 174400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012040, Sample Num: 192640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124725, Log Avg loss: 0.00000007, Global Avg Loss: 0.00472046, Time: 0.0545 Steps: 174600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012240, Sample Num: 195840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122687, Log Avg loss: 0.00000001, Global Avg Loss: 0.00471506, Time: 0.1489 Steps: 174800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012440, Sample Num: 199040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00120715, Log Avg loss: 0.00000019, Global Avg Loss: 0.00470967, Time: 0.1573 Steps: 175000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012640, Sample Num: 202240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118805, Log Avg loss: 0.00000001, Global Avg Loss: 0.00470429, Time: 0.0553 Steps: 175200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012840, Sample Num: 205440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00117224, Log Avg loss: 0.00017317, Global Avg Loss: 0.00469913, Time: 0.1548 Steps: 175400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 013040, Sample Num: 208640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115427, Log Avg loss: 0.00000026, Global Avg Loss: 0.00469377, Time: 0.1190 Steps: 175600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 013240, Sample Num: 211840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118316, Log Avg loss: 0.00306709, Global Avg Loss: 0.00469192, Time: 0.1682 Steps: 175800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013440, Sample Num: 215040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00116556, Log Avg loss: 0.00000024, Global Avg Loss: 0.00468659, Time: 0.1538 Steps: 176000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013640, Sample Num: 218240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00114847, Log Avg loss: 0.00000001, Global Avg Loss: 0.00468127, Time: 0.1056 Steps: 176200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013840, Sample Num: 221440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00116243, Log Avg loss: 0.00211455, Global Avg Loss: 0.00467836, Time: 0.2211 Steps: 176400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014040, Sample Num: 224640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00114587, Log Avg loss: 0.00000015, Global Avg Loss: 0.00467306, Time: 0.1122 Steps: 176600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014240, Sample Num: 227840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112978, Log Avg loss: 0.00000002, Global Avg Loss: 0.00466778, Time: 0.1030 Steps: 176800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014440, Sample Num: 231040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00111413, Log Avg loss: 0.00000006, Global Avg Loss: 0.00466250, Time: 0.1308 Steps: 177000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014640, Sample Num: 234240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109892, Log Avg loss: 0.00000020, Global Avg Loss: 0.00465724, Time: 0.1573 Steps: 177200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014840, Sample Num: 237440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109786, Log Avg loss: 0.00102033, Global Avg Loss: 0.00465314, Time: 0.0335 Steps: 177400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015040, Sample Num: 240640, Cur Loss: 0.00000001, Cur Avg Loss: 0.00108326, Log Avg loss: 0.00000017, Global Avg Loss: 0.00464790, Time: 0.2722 Steps: 177600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015240, Sample Num: 243840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106904, Log Avg loss: 0.00000000, Global Avg Loss: 0.00464267, Time: 0.0405 Steps: 177800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015440, Sample Num: 247040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105520, Log Avg loss: 0.00000000, Global Avg Loss: 0.00463746, Time: 0.1820 Steps: 178000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 015640, Sample Num: 250240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104171, Log Avg loss: 0.00000081, Global Avg Loss: 0.00463225, Time: 0.1017 Steps: 178200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 015840, Sample Num: 253440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102856, Log Avg loss: 0.00000002, Global Avg Loss: 0.00462706, Time: 0.0624 Steps: 178400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016040, Sample Num: 256640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101574, Log Avg loss: 0.00000039, Global Avg Loss: 0.00462188, Time: 0.1544 Steps: 178600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016240, Sample Num: 259840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100323, Log Avg loss: 0.00000002, Global Avg Loss: 0.00461671, Time: 0.0712 Steps: 178800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016440, Sample Num: 263040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099103, Log Avg loss: 0.00000001, Global Avg Loss: 0.00461155, Time: 0.1039 Steps: 179000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016640, Sample Num: 266240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00097951, Log Avg loss: 0.00003295, Global Avg Loss: 0.00460644, Time: 0.0775 Steps: 179200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016840, Sample Num: 269440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096788, Log Avg loss: 0.00000001, Global Avg Loss: 0.00460130, Time: 0.0421 Steps: 179400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017040, Sample Num: 272640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00097851, Log Avg loss: 0.00187402, Global Avg Loss: 0.00459827, Time: 0.0564 Steps: 179600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017240, Sample Num: 275840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105330, Log Avg loss: 0.00742503, Global Avg Loss: 0.00460141, Time: 0.1471 Steps: 179800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017440, Sample Num: 279040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104122, Log Avg loss: 0.00000001, Global Avg Loss: 0.00459630, Time: 0.1577 Steps: 180000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 017640, Sample Num: 282240, Cur Loss: 0.00000817, Cur Avg Loss: 0.00102942, Log Avg loss: 0.00000006, Global Avg Loss: 0.00459120, Time: 0.1094 Steps: 180200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 017840, Sample Num: 285440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101787, Log Avg loss: 0.00000001, Global Avg Loss: 0.00458611, Time: 0.0654 Steps: 180400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018040, Sample Num: 288640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100659, Log Avg loss: 0.00000016, Global Avg Loss: 0.00458103, Time: 0.0432 Steps: 180600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018240, Sample Num: 291840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099555, Log Avg loss: 0.00000001, Global Avg Loss: 0.00457596, Time: 0.1265 Steps: 180800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018440, Sample Num: 295040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101770, Log Avg loss: 0.00303756, Global Avg Loss: 0.00457426, Time: 0.0566 Steps: 181000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018640, Sample Num: 298240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100678, Log Avg loss: 0.00000005, Global Avg Loss: 0.00456921, Time: 0.0281 Steps: 181200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018840, Sample Num: 301440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099909, Log Avg loss: 0.00028197, Global Avg Loss: 0.00456449, Time: 0.1549 Steps: 181400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019040, Sample Num: 304640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098860, Log Avg loss: 0.00000011, Global Avg Loss: 0.00455946, Time: 0.0418 Steps: 181600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019240, Sample Num: 307840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00097832, Log Avg loss: 0.00000008, Global Avg Loss: 0.00455444, Time: 0.0637 Steps: 181800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019440, Sample Num: 311040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096825, Log Avg loss: 0.00000000, Global Avg Loss: 0.00454944, Time: 0.1497 Steps: 182000, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 019640, Sample Num: 314240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00095865, Log Avg loss: 0.00002508, Global Avg Loss: 0.00454447, Time: 0.0471 Steps: 182200, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 019840, Sample Num: 317440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00094899, Log Avg loss: 0.00000001, Global Avg Loss: 0.00453949, Time: 0.1144 Steps: 182400, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 020040, Sample Num: 320640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00093952, Log Avg loss: 0.00000001, Global Avg Loss: 0.00453452, Time: 0.1126 Steps: 182600, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 020240, Sample Num: 323840, Cur Loss: 0.00000002, Cur Avg Loss: 0.00096669, Log Avg loss: 0.00368906, Global Avg Loss: 0.00453359, Time: 0.0550 Steps: 182800, Updated lr: 0.000010 ***** Running evaluation checkpoint-182880 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-182880 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1930.588210, Avg time per batch (s): 0.100000 {"eval_avg_loss": 0.021195, "eval_total_loss": 57.437317, "eval_acc": 0.998062, "eval_prec": 0.998021, "eval_recall": 0.998113, "eval_f1": 0.998067, "eval_roc_auc": 0.999748, "eval_pr_auc": 0.999517, "eval_confusion_matrix": {"tn": 21584, "fp": 43, "fn": 41, "tp": 21681}, "eval_mcc2": 0.996124, "eval_mcc": 0.996124, "eval_sn": 0.998113, "eval_sp": 0.998012, "update_flag": false, "test_avg_loss": 0.020487, "test_total_loss": 83.260388, "test_acc": 0.998108, "test_prec": 0.99797, "test_recall": 0.998246, "test_f1": 0.998108, "test_roc_auc": 0.999737, "test_pr_auc": 0.999489, "test_confusion_matrix": {"tn": 32451, "fp": 66, "fn": 57, "tp": 32448}, "test_mcc2": 0.996217, "test_mcc": 0.996217, "test_sn": 0.998246, "test_sp": 0.99797, "lr": 1.000985221674877e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.004535887290572754, "train_cur_epoch_loss": 20.348011209950805, "train_cur_epoch_avg_loss": 0.0010013785044267129, "train_cur_epoch_time": 1930.5882096290588, "train_cur_epoch_avg_time": 0.09500926228489463, "epoch": 9, "step": 182880} ################################################## Training, Epoch: 0010, Batch: 000120, Sample Num: 1920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00391155, Global Avg Loss: 0.00453291, Time: 0.1055 Steps: 183000, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000320, Sample Num: 5120, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000011, Log Avg loss: 0.00000016, Global Avg Loss: 0.00452796, Time: 0.0880 Steps: 183200, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000008, Log Avg loss: 0.00000002, Global Avg Loss: 0.00452303, Time: 0.1603 Steps: 183400, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000720, Sample Num: 11520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000006, Log Avg loss: 0.00000003, Global Avg Loss: 0.00451810, Time: 0.0867 Steps: 183600, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000920, Sample Num: 14720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000005, Log Avg loss: 0.00000001, Global Avg Loss: 0.00451318, Time: 0.0470 Steps: 183800, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 001120, Sample Num: 17920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000004, Log Avg loss: 0.00000000, Global Avg Loss: 0.00450828, Time: 0.0547 Steps: 184000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001320, Sample Num: 21120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000004, Log Avg loss: 0.00000000, Global Avg Loss: 0.00450338, Time: 0.1893 Steps: 184200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00000002, Global Avg Loss: 0.00449850, Time: 0.1100 Steps: 184400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001720, Sample Num: 27520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00000001, Global Avg Loss: 0.00449362, Time: 0.1676 Steps: 184600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00000000, Global Avg Loss: 0.00448876, Time: 0.0936 Steps: 184800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002120, Sample Num: 33920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000004, Log Avg loss: 0.00000011, Global Avg Loss: 0.00448391, Time: 0.0449 Steps: 185000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000004, Log Avg loss: 0.00000005, Global Avg Loss: 0.00447907, Time: 0.0401 Steps: 185200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002520, Sample Num: 40320, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000004, Log Avg loss: 0.00000009, Global Avg Loss: 0.00447424, Time: 0.1146 Steps: 185400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000007, Log Avg loss: 0.00000048, Global Avg Loss: 0.00446941, Time: 0.1666 Steps: 185600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002920, Sample Num: 46720, Cur Loss: 0.00025346, Cur Avg Loss: 0.00044715, Log Avg loss: 0.00652741, Global Avg Loss: 0.00447163, Time: 0.0388 Steps: 185800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 003120, Sample Num: 49920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00041849, Log Avg loss: 0.00000000, Global Avg Loss: 0.00446682, Time: 0.1651 Steps: 186000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003320, Sample Num: 53120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00059990, Log Avg loss: 0.00342999, Global Avg Loss: 0.00446571, Time: 0.2328 Steps: 186200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00056582, Log Avg loss: 0.00000001, Global Avg Loss: 0.00446092, Time: 0.0500 Steps: 186400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003720, Sample Num: 59520, Cur Loss: 0.00000006, Cur Avg Loss: 0.00053540, Log Avg loss: 0.00000001, Global Avg Loss: 0.00445613, Time: 0.1102 Steps: 186600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003920, Sample Num: 62720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00065766, Log Avg loss: 0.00293168, Global Avg Loss: 0.00445450, Time: 0.0507 Steps: 186800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00062573, Log Avg loss: 0.00000001, Global Avg Loss: 0.00444974, Time: 0.1494 Steps: 187000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004320, Sample Num: 69120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00059677, Log Avg loss: 0.00000002, Global Avg Loss: 0.00444498, Time: 0.1182 Steps: 187200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00091907, Log Avg loss: 0.00788086, Global Avg Loss: 0.00444865, Time: 0.0809 Steps: 187400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004720, Sample Num: 75520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088725, Log Avg loss: 0.00016820, Global Avg Loss: 0.00444409, Time: 0.1076 Steps: 187600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004920, Sample Num: 78720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00090017, Log Avg loss: 0.00120503, Global Avg Loss: 0.00444064, Time: 0.0513 Steps: 187800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 005120, Sample Num: 81920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00086501, Log Avg loss: 0.00000009, Global Avg Loss: 0.00443591, Time: 0.2400 Steps: 188000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005320, Sample Num: 85120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083252, Log Avg loss: 0.00000074, Global Avg Loss: 0.00443120, Time: 0.1187 Steps: 188200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005520, Sample Num: 88320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00080236, Log Avg loss: 0.00000002, Global Avg Loss: 0.00442650, Time: 0.2935 Steps: 188400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005720, Sample Num: 91520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082031, Log Avg loss: 0.00131592, Global Avg Loss: 0.00442320, Time: 0.1146 Steps: 188600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005920, Sample Num: 94720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099057, Log Avg loss: 0.00585989, Global Avg Loss: 0.00442472, Time: 0.0350 Steps: 188800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006120, Sample Num: 97920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115360, Log Avg loss: 0.00597941, Global Avg Loss: 0.00442637, Time: 0.1097 Steps: 189000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006320, Sample Num: 101120, Cur Loss: 0.00000095, Cur Avg Loss: 0.00111711, Log Avg loss: 0.00000022, Global Avg Loss: 0.00442169, Time: 0.0442 Steps: 189200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006520, Sample Num: 104320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108287, Log Avg loss: 0.00000093, Global Avg Loss: 0.00441702, Time: 0.0436 Steps: 189400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006720, Sample Num: 107520, Cur Loss: 0.00000001, Cur Avg Loss: 0.00105064, Log Avg loss: 0.00000002, Global Avg Loss: 0.00441236, Time: 0.2598 Steps: 189600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006920, Sample Num: 110720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102028, Log Avg loss: 0.00000008, Global Avg Loss: 0.00440771, Time: 0.0785 Steps: 189800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 007120, Sample Num: 113920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099162, Log Avg loss: 0.00000010, Global Avg Loss: 0.00440307, Time: 0.1349 Steps: 190000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 007320, Sample Num: 117120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096453, Log Avg loss: 0.00000015, Global Avg Loss: 0.00439844, Time: 0.0665 Steps: 190200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007520, Sample Num: 120320, Cur Loss: 0.00000163, Cur Avg Loss: 0.00102866, Log Avg loss: 0.00337566, Global Avg Loss: 0.00439737, Time: 0.0399 Steps: 190400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007720, Sample Num: 123520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100203, Log Avg loss: 0.00000095, Global Avg Loss: 0.00439275, Time: 0.0368 Steps: 190600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007920, Sample Num: 126720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126552, Log Avg loss: 0.01143609, Global Avg Loss: 0.00440014, Time: 0.2436 Steps: 190800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008120, Sample Num: 129920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00123436, Log Avg loss: 0.00000044, Global Avg Loss: 0.00439553, Time: 0.0671 Steps: 191000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008320, Sample Num: 133120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00120469, Log Avg loss: 0.00000016, Global Avg Loss: 0.00439093, Time: 0.1337 Steps: 191200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008520, Sample Num: 136320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00117641, Log Avg loss: 0.00000002, Global Avg Loss: 0.00438634, Time: 0.0427 Steps: 191400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008720, Sample Num: 139520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00114943, Log Avg loss: 0.00000001, Global Avg Loss: 0.00438176, Time: 0.1534 Steps: 191600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008920, Sample Num: 142720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112366, Log Avg loss: 0.00000001, Global Avg Loss: 0.00437720, Time: 0.0538 Steps: 191800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 009120, Sample Num: 145920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00110020, Log Avg loss: 0.00005394, Global Avg Loss: 0.00437269, Time: 0.0332 Steps: 192000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 009320, Sample Num: 149120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00107659, Log Avg loss: 0.00000004, Global Avg Loss: 0.00436814, Time: 0.1555 Steps: 192200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009520, Sample Num: 152320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112480, Log Avg loss: 0.00337143, Global Avg Loss: 0.00436711, Time: 0.0659 Steps: 192400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009720, Sample Num: 155520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00110166, Log Avg loss: 0.00000001, Global Avg Loss: 0.00436257, Time: 0.0600 Steps: 192600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009920, Sample Num: 158720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00107945, Log Avg loss: 0.00000001, Global Avg Loss: 0.00435805, Time: 0.1580 Steps: 192800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010120, Sample Num: 161920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105814, Log Avg loss: 0.00000131, Global Avg Loss: 0.00435353, Time: 0.1551 Steps: 193000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010320, Sample Num: 165120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00103763, Log Avg loss: 0.00000002, Global Avg Loss: 0.00434902, Time: 0.1413 Steps: 193200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010520, Sample Num: 168320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106965, Log Avg loss: 0.00272148, Global Avg Loss: 0.00434734, Time: 0.1115 Steps: 193400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010720, Sample Num: 171520, Cur Loss: 0.00000101, Cur Avg Loss: 0.00104975, Log Avg loss: 0.00000346, Global Avg Loss: 0.00434285, Time: 0.0853 Steps: 193600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010920, Sample Num: 174720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00103053, Log Avg loss: 0.00000015, Global Avg Loss: 0.00433837, Time: 0.1195 Steps: 193800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 011120, Sample Num: 177920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108152, Log Avg loss: 0.00386560, Global Avg Loss: 0.00433788, Time: 0.0353 Steps: 194000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 011320, Sample Num: 181120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106294, Log Avg loss: 0.00002982, Global Avg Loss: 0.00433345, Time: 0.0782 Steps: 194200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011520, Sample Num: 184320, Cur Loss: 0.00000004, Cur Avg Loss: 0.00110368, Log Avg loss: 0.00340955, Global Avg Loss: 0.00433250, Time: 0.1874 Steps: 194400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011720, Sample Num: 187520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108485, Log Avg loss: 0.00000029, Global Avg Loss: 0.00432804, Time: 0.1585 Steps: 194600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011920, Sample Num: 190720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106665, Log Avg loss: 0.00000004, Global Avg Loss: 0.00432360, Time: 0.1182 Steps: 194800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012120, Sample Num: 193920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104905, Log Avg loss: 0.00000002, Global Avg Loss: 0.00431917, Time: 0.1477 Steps: 195000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012320, Sample Num: 197120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00103202, Log Avg loss: 0.00000001, Global Avg Loss: 0.00431474, Time: 0.0648 Steps: 195200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012520, Sample Num: 200320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101553, Log Avg loss: 0.00000001, Global Avg Loss: 0.00431033, Time: 0.1825 Steps: 195400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012720, Sample Num: 203520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099959, Log Avg loss: 0.00000174, Global Avg Loss: 0.00430592, Time: 0.1190 Steps: 195600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012920, Sample Num: 206720, Cur Loss: 0.00000001, Cur Avg Loss: 0.00098412, Log Avg loss: 0.00000001, Global Avg Loss: 0.00430152, Time: 0.1055 Steps: 195800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 013120, Sample Num: 209920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101216, Log Avg loss: 0.00282375, Global Avg Loss: 0.00430001, Time: 0.2060 Steps: 196000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 013320, Sample Num: 213120, Cur Loss: 0.00000027, Cur Avg Loss: 0.00099697, Log Avg loss: 0.00000003, Global Avg Loss: 0.00429563, Time: 0.1057 Steps: 196200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013520, Sample Num: 216320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098222, Log Avg loss: 0.00000046, Global Avg Loss: 0.00429126, Time: 0.0446 Steps: 196400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013720, Sample Num: 219520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096791, Log Avg loss: 0.00000027, Global Avg Loss: 0.00428689, Time: 0.1697 Steps: 196600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013920, Sample Num: 222720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098351, Log Avg loss: 0.00205398, Global Avg Loss: 0.00428462, Time: 0.0567 Steps: 196800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014120, Sample Num: 225920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096958, Log Avg loss: 0.00000002, Global Avg Loss: 0.00428027, Time: 0.0482 Steps: 197000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014320, Sample Num: 229120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00095604, Log Avg loss: 0.00000003, Global Avg Loss: 0.00427593, Time: 0.0587 Steps: 197200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014520, Sample Num: 232320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00094287, Log Avg loss: 0.00000002, Global Avg Loss: 0.00427160, Time: 0.0253 Steps: 197400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014720, Sample Num: 235520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00093006, Log Avg loss: 0.00000009, Global Avg Loss: 0.00426728, Time: 0.2919 Steps: 197600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014920, Sample Num: 238720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00092218, Log Avg loss: 0.00034160, Global Avg Loss: 0.00426331, Time: 0.0870 Steps: 197800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 015120, Sample Num: 241920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00090998, Log Avg loss: 0.00000011, Global Avg Loss: 0.00425900, Time: 0.0619 Steps: 198000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 015320, Sample Num: 245120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00089810, Log Avg loss: 0.00000003, Global Avg Loss: 0.00425470, Time: 0.0582 Steps: 198200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015520, Sample Num: 248320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088653, Log Avg loss: 0.00000002, Global Avg Loss: 0.00425041, Time: 0.1905 Steps: 198400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015720, Sample Num: 251520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087525, Log Avg loss: 0.00000002, Global Avg Loss: 0.00424613, Time: 0.1523 Steps: 198600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015920, Sample Num: 254720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00086426, Log Avg loss: 0.00000025, Global Avg Loss: 0.00424186, Time: 0.0342 Steps: 198800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016120, Sample Num: 257920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00085354, Log Avg loss: 0.00000045, Global Avg Loss: 0.00423760, Time: 0.0522 Steps: 199000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016320, Sample Num: 261120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084308, Log Avg loss: 0.00000002, Global Avg Loss: 0.00423334, Time: 0.1094 Steps: 199200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016520, Sample Num: 264320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083326, Log Avg loss: 0.00003232, Global Avg Loss: 0.00422913, Time: 0.0676 Steps: 199400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016720, Sample Num: 267520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082330, Log Avg loss: 0.00000007, Global Avg Loss: 0.00422489, Time: 0.0618 Steps: 199600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016920, Sample Num: 270720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081357, Log Avg loss: 0.00000004, Global Avg Loss: 0.00422066, Time: 0.0569 Steps: 199800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 017120, Sample Num: 273920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088416, Log Avg loss: 0.00685624, Global Avg Loss: 0.00422330, Time: 0.0497 Steps: 200000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 017320, Sample Num: 277120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087395, Log Avg loss: 0.00000004, Global Avg Loss: 0.00421908, Time: 0.1546 Steps: 200200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017520, Sample Num: 280320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00086397, Log Avg loss: 0.00000003, Global Avg Loss: 0.00421487, Time: 0.2451 Steps: 200400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017720, Sample Num: 283520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00085422, Log Avg loss: 0.00000007, Global Avg Loss: 0.00421067, Time: 0.0460 Steps: 200600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017920, Sample Num: 286720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084469, Log Avg loss: 0.00000036, Global Avg Loss: 0.00420647, Time: 0.2629 Steps: 200800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018120, Sample Num: 289920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083537, Log Avg loss: 0.00000001, Global Avg Loss: 0.00420229, Time: 0.0651 Steps: 201000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018320, Sample Num: 293120, Cur Loss: 0.00000001, Cur Avg Loss: 0.00082625, Log Avg loss: 0.00000001, Global Avg Loss: 0.00419811, Time: 0.1353 Steps: 201200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018520, Sample Num: 296320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083035, Log Avg loss: 0.00120634, Global Avg Loss: 0.00419514, Time: 0.1451 Steps: 201400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018720, Sample Num: 299520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082148, Log Avg loss: 0.00000001, Global Avg Loss: 0.00419098, Time: 0.0664 Steps: 201600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018920, Sample Num: 302720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081284, Log Avg loss: 0.00000421, Global Avg Loss: 0.00418683, Time: 0.1080 Steps: 201800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 019120, Sample Num: 305920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00080434, Log Avg loss: 0.00000009, Global Avg Loss: 0.00418268, Time: 0.0363 Steps: 202000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 019320, Sample Num: 309120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00079602, Log Avg loss: 0.00000001, Global Avg Loss: 0.00417855, Time: 0.0666 Steps: 202200, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019520, Sample Num: 312320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00078786, Log Avg loss: 0.00000001, Global Avg Loss: 0.00417442, Time: 0.1893 Steps: 202400, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019720, Sample Num: 315520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00077991, Log Avg loss: 0.00000418, Global Avg Loss: 0.00417030, Time: 0.1098 Steps: 202600, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019920, Sample Num: 318720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00077208, Log Avg loss: 0.00000004, Global Avg Loss: 0.00416619, Time: 0.1934 Steps: 202800, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 020120, Sample Num: 321920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00076441, Log Avg loss: 0.00000001, Global Avg Loss: 0.00416208, Time: 0.0440 Steps: 203000, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 020320, Sample Num: 325113, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081405, Log Avg loss: 0.00580828, Global Avg Loss: 0.00416370, Time: 0.0287 Steps: 203200, Updated lr: 0.000000 ***** Running evaluation checkpoint-203200 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-203200 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 1922.727506, Avg time per batch (s): 0.090000 {"eval_avg_loss": 0.021557, "eval_total_loss": 58.418712, "eval_acc": 0.998039, "eval_prec": 0.998112, "eval_recall": 0.997974, "eval_f1": 0.998043, "eval_roc_auc": 0.999747, "eval_pr_auc": 0.999516, "eval_confusion_matrix": {"tn": 21586, "fp": 41, "fn": 44, "tp": 21678}, "eval_mcc2": 0.996078, "eval_mcc": 0.996078, "eval_sn": 0.997974, "eval_sp": 0.998104, "update_flag": false, "test_avg_loss": 0.0208, "test_total_loss": 84.53138, "test_acc": 0.998154, "test_prec": 0.998062, "test_recall": 0.998246, "test_f1": 0.998154, "test_roc_auc": 0.999721, "test_pr_auc": 0.999457, "test_confusion_matrix": {"tn": 32454, "fp": 63, "fn": 57, "tp": 32448}, "test_mcc2": 0.996309, "test_mcc": 0.996309, "test_sn": 0.998246, "test_sp": 0.998063, "lr": 0.0, "cur_epoch_step": 20320, "train_global_avg_loss": 0.0041637036706828594, "train_cur_epoch_loss": 16.54151818280952, "train_cur_epoch_avg_loss": 0.0008140510916736968, "train_cur_epoch_time": 1922.7275059223175, "train_cur_epoch_avg_time": 0.09462241663003532, "epoch": 10, "step": 203200} ################################################## #########################Best Metric######################### {"epoch": 5, "global_step": 101600, "eval_avg_loss": 0.017001, "eval_total_loss": 46.071856, "eval_acc": 0.998131, "eval_prec": 0.997975, "eval_recall": 0.998297, "eval_f1": 0.998136, "eval_roc_auc": 0.999759, "eval_pr_auc": 0.999543, "eval_confusion_matrix": {"tn": 21583, "fp": 44, "fn": 37, "tp": 21685}, "eval_mcc2": 0.996263, "eval_mcc": 0.996263, "eval_sn": 0.998297, "eval_sp": 0.997966, "update_flag": true, "test_avg_loss": 0.016787, "test_total_loss": 68.22411, "test_acc": 0.998031, "test_prec": 0.997817, "test_recall": 0.998246, "test_f1": 0.998031, "test_roc_auc": 0.999808, "test_pr_auc": 0.999647, "test_confusion_matrix": {"tn": 32446, "fp": 71, "fn": 57, "tp": 32448}, "test_mcc2": 0.996063, "test_mcc": 0.996063, "test_sn": 0.998246, "test_sp": 0.997817} ################################################## Total Time: 658469.035225, Avg time per epoch(10 epochs): 65846.900000 ++++++++++++Validation+++++++++++++ best acc global step: 101600 checkpoint path: ../models/ViralCapsid/protein/binary_class/luca_base/matrix/20250104031508/checkpoint-101600 ***** Running evaluation checkpoint-101600 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## {"evaluation_avg_loss_101600": 0.017001, "evaluation_total_loss_101600": 46.071856, "evaluation_acc_101600": 0.998131, "evaluation_prec_101600": 0.997975, "evaluation_recall_101600": 0.998297, "evaluation_f1_101600": 0.998136, "evaluation_roc_auc_101600": 0.999759, "evaluation_pr_auc_101600": 0.999543, "evaluation_confusion_matrix_101600": {"tn": 21583, "fp": 44, "fn": 37, "tp": 21685}, "evaluation_mcc2_101600": 0.996263, "evaluation_mcc_101600": 0.996263, "evaluation_sn_101600": 0.998297, "evaluation_sp_101600": 0.997966} ++++++++++++Testing+++++++++++++ best acc global step: 101600 checkpoint path: ../models/ViralCapsid/protein/binary_class/luca_base/matrix/20250104031508/checkpoint-101600 ***** Running testing checkpoint-101600 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## {"evaluation_avg_loss_101600": 0.016787, "evaluation_total_loss_101600": 68.22411, "evaluation_acc_101600": 0.998031, "evaluation_prec_101600": 0.997817, "evaluation_recall_101600": 0.998246, "evaluation_f1_101600": 0.998031, "evaluation_roc_auc_101600": 0.999808, "evaluation_pr_auc_101600": 0.999647, "evaluation_confusion_matrix_101600": {"tn": 32446, "fp": 71, "fn": 57, "tp": 32448}, "evaluation_mcc2_101600": 0.996063, "evaluation_mcc_101600": 0.996063, "evaluation_sn_101600": 0.998246, "evaluation_sp_101600": 0.997817}