{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "acc", "beta1": 0.9, "beta2": 0.98, "buffer_size": 1024, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "ViralCapsid", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/ViralCapsid/protein/binary_class/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 2560, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "seq_matrix", "intermediate_size": 4096, "label_filepath": "../dataset/ViralCapsid/protein/binary_class/label.txt", "label_size": 2, "label_type": "ViralCapsid", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": "../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000", "llm_step": "3800000", "llm_task_level": "token_level,span_level,seq_level", "llm_time_str": "20240815023346", "llm_type": "lucaone_virus", "llm_version": "v1.0", "local_rank": -1, "log_dir": "../logs/ViralCapsid/protein/binary_class/luca_base/seq_matrix/20250106024245", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": false, "matrix_dirpath": "../matrices/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": false, "no_token_type_embeddings": true, "non_ignore": false, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 4, "num_hidden_layers": 2, "num_train_epochs": 10, "output_dir": "../models/ViralCapsid/protein/binary_class/luca_base/seq_matrix/20250106024245", "output_mode": "binary_class", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 1.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "128", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "binary_class", "tb_log_dir": "../tb-logs/ViralCapsid/protein/binary_class/luca_base/seq_matrix/20250106024245", "test_data_dir": "../dataset/ViralCapsid/protein/binary_class/test/", "time_str": "20250106024304", "train_data_dir": "../dataset/ViralCapsid/protein/binary_class/train/", "trunc_type": "right", "vector_dirpath": "../vectors/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 200, "weight": null, "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,seq,embedding_matrix ################################################## Encoder Config: {'llm_type': 'lucaone_virus', 'llm_version': 'v1.0', 'llm_step': '3800000', 'llm_dirpath': '../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000', 'input_type': 'seq_matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'matrix_dirpath': '../matrices/ViralCapsid/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': False, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 2560, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4096, "no_position_embeddings": true, "no_token_embeddings": false, "no_token_type_embeddings": true, "num_attention_heads": 4, "num_hidden_layers": 2, "pad_token_id": 0, "pos_weight": 1.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": [ 128 ], "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.29.0", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39 } ################################################## Mode Architecture: LucaBase( (seq_encoder): LucaTransformer( (embeddings): LucaEmbeddings( (word_embeddings): Embedding(39, 1024, padding_idx=0) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): ModuleList( (0): LucaTransformerLayer( (pre_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (self_attn): LucaMultiHeadAttention( (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) (rot_emb): RotaryEmbedding() ) (post_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) ) (1): LucaTransformerLayer( (pre_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (self_attn): LucaMultiHeadAttention( (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) (rot_emb): RotaryEmbedding() ) (post_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) ) ) (last_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (seq_pooler): GlobalMaskValueAttentionPooling1D (1024 -> 1024) (matrix_pooler): GlobalMaskValueAttentionPooling1D (2560 -> 2560) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=1024, out_features=128, bias=True) (1): GELU(approximate='none') ) (1): ModuleList( (0): Linear(in_features=2560, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=256, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=1, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 48532993 ################################################## {"total_num": "46.280000M", "total_size": "185.140000MB", "param_sum": "46.280000M", "param_size": "185.140000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "46.284669M", "trainable_size": "185.138676MB"} ################################################## Train dataset len: 325113, batch size: 16, batch num: 20320 Train dataset t_total: 203200, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 325113 Train Dataset Num Epochs = 10 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 203200 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.60253119, Cur Avg Loss: 0.45271324, Log Avg loss: 0.45271324, Global Avg Loss: 0.45271324, Time: 0.0855 Steps: 200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.24101771, Cur Avg Loss: 0.29002469, Log Avg loss: 0.12733615, Global Avg Loss: 0.29002469, Time: 0.0861 Steps: 400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.00731993, Cur Avg Loss: 0.20918661, Log Avg loss: 0.04751044, Global Avg Loss: 0.20918661, Time: 0.1543 Steps: 600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.01534165, Cur Avg Loss: 0.17118096, Log Avg loss: 0.05716402, Global Avg Loss: 0.17118096, Time: 0.2635 Steps: 800, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.03499073, Cur Avg Loss: 0.14327463, Log Avg loss: 0.03164932, Global Avg Loss: 0.14327463, Time: 0.1456 Steps: 1000, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.00814719, Cur Avg Loss: 0.12639234, Log Avg loss: 0.04198085, Global Avg Loss: 0.12639234, Time: 0.1192 Steps: 1200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.06167984, Cur Avg Loss: 0.11482710, Log Avg loss: 0.04543568, Global Avg Loss: 0.11482710, Time: 0.0927 Steps: 1400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00140705, Cur Avg Loss: 0.10343662, Log Avg loss: 0.02370326, Global Avg Loss: 0.10343662, Time: 0.1746 Steps: 1600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.00121512, Cur Avg Loss: 0.09480149, Log Avg loss: 0.02572043, Global Avg Loss: 0.09480149, Time: 0.2613 Steps: 1800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00034212, Cur Avg Loss: 0.08754455, Log Avg loss: 0.02223214, Global Avg Loss: 0.08754455, Time: 0.0792 Steps: 2000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.00086260, Cur Avg Loss: 0.08191042, Log Avg loss: 0.02556911, Global Avg Loss: 0.08191042, Time: 0.2831 Steps: 2200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.04261180, Cur Avg Loss: 0.07685373, Log Avg loss: 0.02123008, Global Avg Loss: 0.07685373, Time: 0.2621 Steps: 2400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00091945, Cur Avg Loss: 0.07250779, Log Avg loss: 0.02035652, Global Avg Loss: 0.07250779, Time: 0.1273 Steps: 2600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00023572, Cur Avg Loss: 0.06880679, Log Avg loss: 0.02069377, Global Avg Loss: 0.06880679, Time: 0.2276 Steps: 2800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00271058, Cur Avg Loss: 0.06589848, Log Avg loss: 0.02518219, Global Avg Loss: 0.06589848, Time: 0.0956 Steps: 3000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00114973, Cur Avg Loss: 0.06266832, Log Avg loss: 0.01421595, Global Avg Loss: 0.06266832, Time: 0.5317 Steps: 3200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.00019830, Cur Avg Loss: 0.06000541, Log Avg loss: 0.01739889, Global Avg Loss: 0.06000541, Time: 0.2635 Steps: 3400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.00028286, Cur Avg Loss: 0.05777045, Log Avg loss: 0.01977605, Global Avg Loss: 0.05777045, Time: 0.4290 Steps: 3600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.42263478, Cur Avg Loss: 0.05587229, Log Avg loss: 0.02170541, Global Avg Loss: 0.05587229, Time: 0.1333 Steps: 3800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00065555, Cur Avg Loss: 0.05398640, Log Avg loss: 0.01815447, Global Avg Loss: 0.05398640, Time: 0.0486 Steps: 4000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.00039909, Cur Avg Loss: 0.05225192, Log Avg loss: 0.01756226, Global Avg Loss: 0.05225192, Time: 0.1538 Steps: 4200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00050943, Cur Avg Loss: 0.05058966, Log Avg loss: 0.01568228, Global Avg Loss: 0.05058966, Time: 0.0919 Steps: 4400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00011746, Cur Avg Loss: 0.04930691, Log Avg loss: 0.02108652, Global Avg Loss: 0.04930691, Time: 0.1323 Steps: 4600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.01155919, Cur Avg Loss: 0.04765489, Log Avg loss: 0.00965831, Global Avg Loss: 0.04765489, Time: 0.3753 Steps: 4800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005000, Sample Num: 80000, Cur Loss: 0.00168547, Cur Avg Loss: 0.04650121, Log Avg loss: 0.01881300, Global Avg Loss: 0.04650121, Time: 0.2312 Steps: 5000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00014261, Cur Avg Loss: 0.04519001, Log Avg loss: 0.01241003, Global Avg Loss: 0.04519001, Time: 0.0758 Steps: 5200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00119304, Cur Avg Loss: 0.04413987, Log Avg loss: 0.01683601, Global Avg Loss: 0.04413987, Time: 0.0863 Steps: 5400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 005600, Sample Num: 89600, Cur Loss: 0.00021783, Cur Avg Loss: 0.04304868, Log Avg loss: 0.01358669, Global Avg Loss: 0.04304868, Time: 0.4881 Steps: 5600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 005800, Sample Num: 92800, Cur Loss: 0.00039254, Cur Avg Loss: 0.04197983, Log Avg loss: 0.01205203, Global Avg Loss: 0.04197983, Time: 0.0507 Steps: 5800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006000, Sample Num: 96000, Cur Loss: 0.00132784, Cur Avg Loss: 0.04107304, Log Avg loss: 0.01477603, Global Avg Loss: 0.04107304, Time: 0.2666 Steps: 6000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006200, Sample Num: 99200, Cur Loss: 0.00020467, Cur Avg Loss: 0.04025367, Log Avg loss: 0.01567259, Global Avg Loss: 0.04025367, Time: 0.1231 Steps: 6200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00199453, Cur Avg Loss: 0.03942243, Log Avg loss: 0.01365413, Global Avg Loss: 0.03942243, Time: 0.1055 Steps: 6400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006600, Sample Num: 105600, Cur Loss: 0.00011741, Cur Avg Loss: 0.03849952, Log Avg loss: 0.00896634, Global Avg Loss: 0.03849952, Time: 0.2807 Steps: 6600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 006800, Sample Num: 108800, Cur Loss: 0.00064522, Cur Avg Loss: 0.03786518, Log Avg loss: 0.01693207, Global Avg Loss: 0.03786518, Time: 0.0870 Steps: 6800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00012743, Cur Avg Loss: 0.03710654, Log Avg loss: 0.01131271, Global Avg Loss: 0.03710654, Time: 0.0869 Steps: 7000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007200, Sample Num: 115200, Cur Loss: 0.00016394, Cur Avg Loss: 0.03639714, Log Avg loss: 0.01156821, Global Avg Loss: 0.03639714, Time: 0.0800 Steps: 7200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00092670, Cur Avg Loss: 0.03585010, Log Avg loss: 0.01615642, Global Avg Loss: 0.03585010, Time: 0.0787 Steps: 7400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00035941, Cur Avg Loss: 0.03512770, Log Avg loss: 0.00839918, Global Avg Loss: 0.03512770, Time: 0.4228 Steps: 7600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 007800, Sample Num: 124800, Cur Loss: 0.00118531, Cur Avg Loss: 0.03455628, Log Avg loss: 0.01284195, Global Avg Loss: 0.03455628, Time: 0.0883 Steps: 7800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00074167, Cur Avg Loss: 0.03421929, Log Avg loss: 0.02107667, Global Avg Loss: 0.03421929, Time: 0.3806 Steps: 8000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00019331, Cur Avg Loss: 0.03366085, Log Avg loss: 0.01132357, Global Avg Loss: 0.03366085, Time: 0.1174 Steps: 8200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008400, Sample Num: 134400, Cur Loss: 0.03983903, Cur Avg Loss: 0.03317265, Log Avg loss: 0.01315645, Global Avg Loss: 0.03317265, Time: 0.1409 Steps: 8400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008600, Sample Num: 137600, Cur Loss: 0.00151353, Cur Avg Loss: 0.03277756, Log Avg loss: 0.01618371, Global Avg Loss: 0.03277756, Time: 0.1579 Steps: 8600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00073513, Cur Avg Loss: 0.03232490, Log Avg loss: 0.01286029, Global Avg Loss: 0.03232490, Time: 0.3426 Steps: 8800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00013824, Cur Avg Loss: 0.03191049, Log Avg loss: 0.01367671, Global Avg Loss: 0.03191049, Time: 0.2293 Steps: 9000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00005385, Cur Avg Loss: 0.03134833, Log Avg loss: 0.00605102, Global Avg Loss: 0.03134833, Time: 0.0878 Steps: 9200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00003796, Cur Avg Loss: 0.03085372, Log Avg loss: 0.00810170, Global Avg Loss: 0.03085372, Time: 0.1418 Steps: 9400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00084175, Cur Avg Loss: 0.03043399, Log Avg loss: 0.01070680, Global Avg Loss: 0.03043399, Time: 0.1285 Steps: 9600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 009800, Sample Num: 156800, Cur Loss: 0.00046570, Cur Avg Loss: 0.02999154, Log Avg loss: 0.00875362, Global Avg Loss: 0.02999154, Time: 0.2620 Steps: 9800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00013322, Cur Avg Loss: 0.02960066, Log Avg loss: 0.01044790, Global Avg Loss: 0.02960066, Time: 0.0717 Steps: 10000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010200, Sample Num: 163200, Cur Loss: 0.00056554, Cur Avg Loss: 0.02918336, Log Avg loss: 0.00831809, Global Avg Loss: 0.02918336, Time: 0.1296 Steps: 10200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010400, Sample Num: 166400, Cur Loss: 0.00036165, Cur Avg Loss: 0.02883984, Log Avg loss: 0.01132053, Global Avg Loss: 0.02883984, Time: 0.2676 Steps: 10400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00016827, Cur Avg Loss: 0.02859740, Log Avg loss: 0.01599019, Global Avg Loss: 0.02859740, Time: 0.0865 Steps: 10600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00033047, Cur Avg Loss: 0.02820060, Log Avg loss: 0.00717051, Global Avg Loss: 0.02820060, Time: 0.3165 Steps: 10800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00854660, Cur Avg Loss: 0.02781981, Log Avg loss: 0.00725724, Global Avg Loss: 0.02781981, Time: 0.1108 Steps: 11000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00064690, Cur Avg Loss: 0.02758220, Log Avg loss: 0.01451335, Global Avg Loss: 0.02758220, Time: 0.1844 Steps: 11200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00050971, Cur Avg Loss: 0.02729708, Log Avg loss: 0.01133034, Global Avg Loss: 0.02729708, Time: 0.1535 Steps: 11400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00015713, Cur Avg Loss: 0.02700673, Log Avg loss: 0.01045710, Global Avg Loss: 0.02700673, Time: 0.4831 Steps: 11600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 011800, Sample Num: 188800, Cur Loss: 0.00248411, Cur Avg Loss: 0.02679665, Log Avg loss: 0.01461158, Global Avg Loss: 0.02679665, Time: 0.0493 Steps: 11800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012000, Sample Num: 192000, Cur Loss: 0.00391242, Cur Avg Loss: 0.02645849, Log Avg loss: 0.00650736, Global Avg Loss: 0.02645849, Time: 0.2477 Steps: 12000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012200, Sample Num: 195200, Cur Loss: 0.00072224, Cur Avg Loss: 0.02638749, Log Avg loss: 0.02212743, Global Avg Loss: 0.02638749, Time: 0.2612 Steps: 12200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012400, Sample Num: 198400, Cur Loss: 0.00026368, Cur Avg Loss: 0.02608017, Log Avg loss: 0.00733359, Global Avg Loss: 0.02608017, Time: 0.1506 Steps: 12400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012600, Sample Num: 201600, Cur Loss: 0.00023477, Cur Avg Loss: 0.02580397, Log Avg loss: 0.00867942, Global Avg Loss: 0.02580397, Time: 0.5167 Steps: 12600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 012800, Sample Num: 204800, Cur Loss: 0.00009979, Cur Avg Loss: 0.02552037, Log Avg loss: 0.00765359, Global Avg Loss: 0.02552037, Time: 0.1985 Steps: 12800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013000, Sample Num: 208000, Cur Loss: 0.00006491, Cur Avg Loss: 0.02529014, Log Avg loss: 0.01055539, Global Avg Loss: 0.02529014, Time: 0.0748 Steps: 13000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013200, Sample Num: 211200, Cur Loss: 0.00073801, Cur Avg Loss: 0.02503436, Log Avg loss: 0.00840893, Global Avg Loss: 0.02503436, Time: 0.4912 Steps: 13200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 013400, Sample Num: 214400, Cur Loss: 0.00026532, Cur Avg Loss: 0.02480267, Log Avg loss: 0.00951132, Global Avg Loss: 0.02480267, Time: 0.0796 Steps: 13400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 013600, Sample Num: 217600, Cur Loss: 0.01695084, Cur Avg Loss: 0.02458615, Log Avg loss: 0.01007923, Global Avg Loss: 0.02458615, Time: 0.4206 Steps: 13600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 013800, Sample Num: 220800, Cur Loss: 0.00017429, Cur Avg Loss: 0.02443904, Log Avg loss: 0.01443550, Global Avg Loss: 0.02443904, Time: 0.2656 Steps: 13800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014000, Sample Num: 224000, Cur Loss: 0.00118822, Cur Avg Loss: 0.02421457, Log Avg loss: 0.00872609, Global Avg Loss: 0.02421457, Time: 0.2512 Steps: 14000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014200, Sample Num: 227200, Cur Loss: 0.00015260, Cur Avg Loss: 0.02401516, Log Avg loss: 0.01005671, Global Avg Loss: 0.02401516, Time: 0.0585 Steps: 14200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014400, Sample Num: 230400, Cur Loss: 0.00015389, Cur Avg Loss: 0.02382838, Log Avg loss: 0.01056684, Global Avg Loss: 0.02382838, Time: 0.0738 Steps: 14400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014600, Sample Num: 233600, Cur Loss: 0.17482696, Cur Avg Loss: 0.02359433, Log Avg loss: 0.00674265, Global Avg Loss: 0.02359433, Time: 0.0690 Steps: 14600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 014800, Sample Num: 236800, Cur Loss: 0.00023673, Cur Avg Loss: 0.02343869, Log Avg loss: 0.01207690, Global Avg Loss: 0.02343869, Time: 0.0947 Steps: 14800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015000, Sample Num: 240000, Cur Loss: 0.00053880, Cur Avg Loss: 0.02324286, Log Avg loss: 0.00875166, Global Avg Loss: 0.02324286, Time: 0.0530 Steps: 15000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015200, Sample Num: 243200, Cur Loss: 0.00004774, Cur Avg Loss: 0.02299928, Log Avg loss: 0.00473076, Global Avg Loss: 0.02299928, Time: 0.2125 Steps: 15200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015400, Sample Num: 246400, Cur Loss: 0.00012502, Cur Avg Loss: 0.02285805, Log Avg loss: 0.01212436, Global Avg Loss: 0.02285805, Time: 0.2608 Steps: 15400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 015600, Sample Num: 249600, Cur Loss: 0.00300368, Cur Avg Loss: 0.02270188, Log Avg loss: 0.01067650, Global Avg Loss: 0.02270188, Time: 0.0529 Steps: 15600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 015800, Sample Num: 252800, Cur Loss: 0.00008261, Cur Avg Loss: 0.02255473, Log Avg loss: 0.01107764, Global Avg Loss: 0.02255473, Time: 0.1982 Steps: 15800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016000, Sample Num: 256000, Cur Loss: 0.00018798, Cur Avg Loss: 0.02234770, Log Avg loss: 0.00599191, Global Avg Loss: 0.02234770, Time: 0.1886 Steps: 16000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016200, Sample Num: 259200, Cur Loss: 0.00000904, Cur Avg Loss: 0.02210862, Log Avg loss: 0.00298200, Global Avg Loss: 0.02210862, Time: 0.2625 Steps: 16200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016400, Sample Num: 262400, Cur Loss: 0.00024881, Cur Avg Loss: 0.02191328, Log Avg loss: 0.00609150, Global Avg Loss: 0.02191328, Time: 0.0648 Steps: 16400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016600, Sample Num: 265600, Cur Loss: 0.00014552, Cur Avg Loss: 0.02176890, Log Avg loss: 0.00992896, Global Avg Loss: 0.02176890, Time: 0.1430 Steps: 16600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 016800, Sample Num: 268800, Cur Loss: 0.00008021, Cur Avg Loss: 0.02160908, Log Avg loss: 0.00834404, Global Avg Loss: 0.02160908, Time: 0.3380 Steps: 16800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017000, Sample Num: 272000, Cur Loss: 0.00008112, Cur Avg Loss: 0.02146064, Log Avg loss: 0.00899201, Global Avg Loss: 0.02146064, Time: 0.1372 Steps: 17000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017200, Sample Num: 275200, Cur Loss: 0.00020262, Cur Avg Loss: 0.02141039, Log Avg loss: 0.01713943, Global Avg Loss: 0.02141039, Time: 0.1972 Steps: 17200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017400, Sample Num: 278400, Cur Loss: 0.00014418, Cur Avg Loss: 0.02123708, Log Avg loss: 0.00633192, Global Avg Loss: 0.02123708, Time: 0.1264 Steps: 17400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 017600, Sample Num: 281600, Cur Loss: 0.00107239, Cur Avg Loss: 0.02112894, Log Avg loss: 0.01172103, Global Avg Loss: 0.02112894, Time: 0.1131 Steps: 17600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 017800, Sample Num: 284800, Cur Loss: 0.00038361, Cur Avg Loss: 0.02102863, Log Avg loss: 0.01220104, Global Avg Loss: 0.02102863, Time: 0.0910 Steps: 17800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018000, Sample Num: 288000, Cur Loss: 0.00004856, Cur Avg Loss: 0.02084472, Log Avg loss: 0.00447736, Global Avg Loss: 0.02084472, Time: 0.1550 Steps: 18000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018200, Sample Num: 291200, Cur Loss: 0.00008111, Cur Avg Loss: 0.02076137, Log Avg loss: 0.01325928, Global Avg Loss: 0.02076137, Time: 0.1112 Steps: 18200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018400, Sample Num: 294400, Cur Loss: 0.00148737, Cur Avg Loss: 0.02066016, Log Avg loss: 0.01145012, Global Avg Loss: 0.02066016, Time: 0.1021 Steps: 18400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018600, Sample Num: 297600, Cur Loss: 0.03036477, Cur Avg Loss: 0.02049859, Log Avg loss: 0.00563476, Global Avg Loss: 0.02049859, Time: 0.3083 Steps: 18600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 018800, Sample Num: 300800, Cur Loss: 0.41717923, Cur Avg Loss: 0.02039588, Log Avg loss: 0.01084333, Global Avg Loss: 0.02039588, Time: 0.0576 Steps: 18800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019000, Sample Num: 304000, Cur Loss: 0.00036063, Cur Avg Loss: 0.02029535, Log Avg loss: 0.01084555, Global Avg Loss: 0.02029535, Time: 0.1060 Steps: 19000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019200, Sample Num: 307200, Cur Loss: 0.00009715, Cur Avg Loss: 0.02019805, Log Avg loss: 0.01095451, Global Avg Loss: 0.02019805, Time: 0.1788 Steps: 19200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019400, Sample Num: 310400, Cur Loss: 0.00220752, Cur Avg Loss: 0.02006079, Log Avg loss: 0.00688361, Global Avg Loss: 0.02006079, Time: 0.1092 Steps: 19400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 019600, Sample Num: 313600, Cur Loss: 0.00033276, Cur Avg Loss: 0.01997405, Log Avg loss: 0.01156014, Global Avg Loss: 0.01997405, Time: 0.0482 Steps: 19600, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 019800, Sample Num: 316800, Cur Loss: 0.00007044, Cur Avg Loss: 0.01983103, Log Avg loss: 0.00581596, Global Avg Loss: 0.01983103, Time: 0.0840 Steps: 19800, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 020000, Sample Num: 320000, Cur Loss: 0.10363697, Cur Avg Loss: 0.01974737, Log Avg loss: 0.01146412, Global Avg Loss: 0.01974737, Time: 0.4009 Steps: 20000, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 020200, Sample Num: 323200, Cur Loss: 0.00001511, Cur Avg Loss: 0.01961028, Log Avg loss: 0.00590220, Global Avg Loss: 0.01961028, Time: 0.2008 Steps: 20200, Updated lr: 0.000090 ***** Running evaluation checkpoint-20320 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-20320 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4202.078495, Avg time per batch (s): 0.210000 {"eval_avg_loss": 0.007574, "eval_total_loss": 20.524539, "eval_acc": 0.998016, "eval_prec": 0.998525, "eval_recall": 0.997514, "eval_f1": 0.998019, "eval_roc_auc": 0.999922, "eval_pr_auc": 0.999927, "eval_confusion_matrix": {"tn": 21595, "fp": 32, "fn": 54, "tp": 21668}, "eval_mcc2": 0.996033, "eval_mcc": 0.996033, "eval_sn": 0.997514, "eval_sp": 0.99852, "update_flag": true, "test_avg_loss": 0.006706, "test_total_loss": 27.252259, "test_acc": 0.998308, "test_prec": 0.998676, "test_recall": 0.997939, "test_f1": 0.998307, "test_roc_auc": 0.999919, "test_pr_auc": 0.999921, "test_confusion_matrix": {"tn": 32474, "fp": 43, "fn": 67, "tp": 32438}, "test_mcc2": 0.996617, "test_mcc": 0.996617, "test_sn": 0.997939, "test_sp": 0.998678, "lr": 9.008866995073892e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.019655665481372066, "train_cur_epoch_loss": 399.4031225814804, "train_cur_epoch_avg_loss": 0.019655665481372066, "train_cur_epoch_time": 4202.078495264053, "train_cur_epoch_avg_time": 0.2067952015385853, "epoch": 1, "step": 20320} ################################################## Training, Epoch: 0002, Batch: 000080, Sample Num: 1280, Cur Loss: 0.00023149, Cur Avg Loss: 0.00235311, Log Avg loss: 0.01731810, Global Avg Loss: 0.01958781, Time: 0.1324 Steps: 20400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000280, Sample Num: 4480, Cur Loss: 0.00070059, Cur Avg Loss: 0.00663758, Log Avg loss: 0.00835137, Global Avg Loss: 0.01947872, Time: 0.1260 Steps: 20600, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000480, Sample Num: 7680, Cur Loss: 0.00028896, Cur Avg Loss: 0.00573805, Log Avg loss: 0.00447870, Global Avg Loss: 0.01933449, Time: 0.3294 Steps: 20800, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000680, Sample Num: 10880, Cur Loss: 0.00004829, Cur Avg Loss: 0.00509866, Log Avg loss: 0.00356413, Global Avg Loss: 0.01918430, Time: 0.1255 Steps: 21000, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000880, Sample Num: 14080, Cur Loss: 0.00009374, Cur Avg Loss: 0.00611368, Log Avg loss: 0.00956477, Global Avg Loss: 0.01909355, Time: 0.1287 Steps: 21200, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 001080, Sample Num: 17280, Cur Loss: 0.00000949, Cur Avg Loss: 0.00607797, Log Avg loss: 0.00592084, Global Avg Loss: 0.01897044, Time: 0.2644 Steps: 21400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 001280, Sample Num: 20480, Cur Loss: 0.00004350, Cur Avg Loss: 0.00575066, Log Avg loss: 0.00398317, Global Avg Loss: 0.01883167, Time: 0.5339 Steps: 21600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001480, Sample Num: 23680, Cur Loss: 0.00019557, Cur Avg Loss: 0.00597600, Log Avg loss: 0.00741816, Global Avg Loss: 0.01872695, Time: 0.3190 Steps: 21800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001680, Sample Num: 26880, Cur Loss: 0.00000644, Cur Avg Loss: 0.00551496, Log Avg loss: 0.00210327, Global Avg Loss: 0.01857583, Time: 0.2150 Steps: 22000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001880, Sample Num: 30080, Cur Loss: 0.00000659, Cur Avg Loss: 0.00522082, Log Avg loss: 0.00275008, Global Avg Loss: 0.01843326, Time: 0.0811 Steps: 22200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002080, Sample Num: 33280, Cur Loss: 0.00001381, Cur Avg Loss: 0.00555203, Log Avg loss: 0.00866538, Global Avg Loss: 0.01834604, Time: 0.3990 Steps: 22400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002280, Sample Num: 36480, Cur Loss: 0.00004496, Cur Avg Loss: 0.00616390, Log Avg loss: 0.01252732, Global Avg Loss: 0.01829455, Time: 0.0880 Steps: 22600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002480, Sample Num: 39680, Cur Loss: 0.00012953, Cur Avg Loss: 0.00624926, Log Avg loss: 0.00722235, Global Avg Loss: 0.01819742, Time: 0.1548 Steps: 22800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002680, Sample Num: 42880, Cur Loss: 0.00004213, Cur Avg Loss: 0.00637229, Log Avg loss: 0.00789792, Global Avg Loss: 0.01810786, Time: 0.1305 Steps: 23000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002880, Sample Num: 46080, Cur Loss: 0.00617481, Cur Avg Loss: 0.00633149, Log Avg loss: 0.00578480, Global Avg Loss: 0.01800163, Time: 0.2614 Steps: 23200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 003080, Sample Num: 49280, Cur Loss: 0.00113514, Cur Avg Loss: 0.00647111, Log Avg loss: 0.00848153, Global Avg Loss: 0.01792026, Time: 0.1540 Steps: 23400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 003280, Sample Num: 52480, Cur Loss: 0.00000209, Cur Avg Loss: 0.00643069, Log Avg loss: 0.00580835, Global Avg Loss: 0.01781762, Time: 0.2937 Steps: 23600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003480, Sample Num: 55680, Cur Loss: 0.00055184, Cur Avg Loss: 0.00635888, Log Avg loss: 0.00518112, Global Avg Loss: 0.01771143, Time: 0.2647 Steps: 23800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003680, Sample Num: 58880, Cur Loss: 0.00004501, Cur Avg Loss: 0.00631145, Log Avg loss: 0.00548613, Global Avg Loss: 0.01760955, Time: 0.1339 Steps: 24000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003880, Sample Num: 62080, Cur Loss: 0.00001604, Cur Avg Loss: 0.00629341, Log Avg loss: 0.00596144, Global Avg Loss: 0.01751329, Time: 0.2481 Steps: 24200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004080, Sample Num: 65280, Cur Loss: 0.00052845, Cur Avg Loss: 0.00622310, Log Avg loss: 0.00485909, Global Avg Loss: 0.01740956, Time: 0.0758 Steps: 24400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004280, Sample Num: 68480, Cur Loss: 0.00005803, Cur Avg Loss: 0.00644136, Log Avg loss: 0.01089395, Global Avg Loss: 0.01735659, Time: 0.1133 Steps: 24600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004480, Sample Num: 71680, Cur Loss: 0.00011606, Cur Avg Loss: 0.00656595, Log Avg loss: 0.00923212, Global Avg Loss: 0.01729107, Time: 0.6422 Steps: 24800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004680, Sample Num: 74880, Cur Loss: 0.00001051, Cur Avg Loss: 0.00646741, Log Avg loss: 0.00426026, Global Avg Loss: 0.01718682, Time: 0.2838 Steps: 25000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 004880, Sample Num: 78080, Cur Loss: 0.00015413, Cur Avg Loss: 0.00639719, Log Avg loss: 0.00475385, Global Avg Loss: 0.01708815, Time: 0.3438 Steps: 25200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 005080, Sample Num: 81280, Cur Loss: 0.00028674, Cur Avg Loss: 0.00636371, Log Avg loss: 0.00554680, Global Avg Loss: 0.01699727, Time: 0.0845 Steps: 25400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 005280, Sample Num: 84480, Cur Loss: 0.00000286, Cur Avg Loss: 0.00634522, Log Avg loss: 0.00587555, Global Avg Loss: 0.01691039, Time: 0.5406 Steps: 25600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005480, Sample Num: 87680, Cur Loss: 0.00001859, Cur Avg Loss: 0.00622386, Log Avg loss: 0.00302014, Global Avg Loss: 0.01680271, Time: 0.0914 Steps: 25800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005680, Sample Num: 90880, Cur Loss: 0.00008929, Cur Avg Loss: 0.00608975, Log Avg loss: 0.00241499, Global Avg Loss: 0.01669203, Time: 0.2853 Steps: 26000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 005880, Sample Num: 94080, Cur Loss: 0.00001647, Cur Avg Loss: 0.00604873, Log Avg loss: 0.00488372, Global Avg Loss: 0.01660189, Time: 0.1309 Steps: 26200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006080, Sample Num: 97280, Cur Loss: 0.00000934, Cur Avg Loss: 0.00622250, Log Avg loss: 0.01133157, Global Avg Loss: 0.01656197, Time: 0.5219 Steps: 26400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006280, Sample Num: 100480, Cur Loss: 0.00005336, Cur Avg Loss: 0.00616812, Log Avg loss: 0.00451497, Global Avg Loss: 0.01647139, Time: 0.2630 Steps: 26600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006480, Sample Num: 103680, Cur Loss: 0.00037090, Cur Avg Loss: 0.00609311, Log Avg loss: 0.00373785, Global Avg Loss: 0.01637636, Time: 0.4207 Steps: 26800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006680, Sample Num: 106880, Cur Loss: 0.00001971, Cur Avg Loss: 0.00615089, Log Avg loss: 0.00802266, Global Avg Loss: 0.01631448, Time: 0.0807 Steps: 27000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 006880, Sample Num: 110080, Cur Loss: 0.00000874, Cur Avg Loss: 0.00605108, Log Avg loss: 0.00271767, Global Avg Loss: 0.01621451, Time: 0.1420 Steps: 27200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007080, Sample Num: 113280, Cur Loss: 0.00001083, Cur Avg Loss: 0.00593088, Log Avg loss: 0.00179590, Global Avg Loss: 0.01610926, Time: 0.2846 Steps: 27400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007280, Sample Num: 116480, Cur Loss: 0.00003930, Cur Avg Loss: 0.00596258, Log Avg loss: 0.00708471, Global Avg Loss: 0.01604387, Time: 0.1154 Steps: 27600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 007480, Sample Num: 119680, Cur Loss: 0.00001401, Cur Avg Loss: 0.00592854, Log Avg loss: 0.00468962, Global Avg Loss: 0.01596218, Time: 0.1276 Steps: 27800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 007680, Sample Num: 122880, Cur Loss: 0.00000473, Cur Avg Loss: 0.00588472, Log Avg loss: 0.00424587, Global Avg Loss: 0.01587849, Time: 0.1429 Steps: 28000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 007880, Sample Num: 126080, Cur Loss: 0.00005207, Cur Avg Loss: 0.00593481, Log Avg loss: 0.00785809, Global Avg Loss: 0.01582161, Time: 0.0771 Steps: 28200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008080, Sample Num: 129280, Cur Loss: 0.03928184, Cur Avg Loss: 0.00592814, Log Avg loss: 0.00566523, Global Avg Loss: 0.01575009, Time: 0.3969 Steps: 28400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008280, Sample Num: 132480, Cur Loss: 0.00000183, Cur Avg Loss: 0.00581898, Log Avg loss: 0.00140928, Global Avg Loss: 0.01564980, Time: 0.2675 Steps: 28600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008480, Sample Num: 135680, Cur Loss: 0.00000428, Cur Avg Loss: 0.00578374, Log Avg loss: 0.00432474, Global Avg Loss: 0.01557115, Time: 0.0523 Steps: 28800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008680, Sample Num: 138880, Cur Loss: 0.00004573, Cur Avg Loss: 0.00582922, Log Avg loss: 0.00775765, Global Avg Loss: 0.01551727, Time: 0.0486 Steps: 29000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 008880, Sample Num: 142080, Cur Loss: 0.00014612, Cur Avg Loss: 0.00580161, Log Avg loss: 0.00460331, Global Avg Loss: 0.01544252, Time: 0.2664 Steps: 29200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009080, Sample Num: 145280, Cur Loss: 0.00003599, Cur Avg Loss: 0.00583851, Log Avg loss: 0.00747660, Global Avg Loss: 0.01538833, Time: 0.3607 Steps: 29400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009280, Sample Num: 148480, Cur Loss: 0.00003539, Cur Avg Loss: 0.00571667, Log Avg loss: 0.00018535, Global Avg Loss: 0.01528560, Time: 0.6985 Steps: 29600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 009480, Sample Num: 151680, Cur Loss: 0.00808213, Cur Avg Loss: 0.00567673, Log Avg loss: 0.00382362, Global Avg Loss: 0.01520868, Time: 0.4129 Steps: 29800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 009680, Sample Num: 154880, Cur Loss: 0.00001636, Cur Avg Loss: 0.00561300, Log Avg loss: 0.00259195, Global Avg Loss: 0.01512457, Time: 0.1981 Steps: 30000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 009880, Sample Num: 158080, Cur Loss: 0.00000317, Cur Avg Loss: 0.00561221, Log Avg loss: 0.00557377, Global Avg Loss: 0.01506132, Time: 0.0903 Steps: 30200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010080, Sample Num: 161280, Cur Loss: 0.00011941, Cur Avg Loss: 0.00555612, Log Avg loss: 0.00278537, Global Avg Loss: 0.01498055, Time: 0.3820 Steps: 30400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010280, Sample Num: 164480, Cur Loss: 0.00001131, Cur Avg Loss: 0.00554441, Log Avg loss: 0.00495441, Global Avg Loss: 0.01491502, Time: 0.0759 Steps: 30600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010480, Sample Num: 167680, Cur Loss: 0.00000399, Cur Avg Loss: 0.00561036, Log Avg loss: 0.00900007, Global Avg Loss: 0.01487661, Time: 0.4238 Steps: 30800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010680, Sample Num: 170880, Cur Loss: 0.00000422, Cur Avg Loss: 0.00552591, Log Avg loss: 0.00110060, Global Avg Loss: 0.01478774, Time: 0.0798 Steps: 31000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 010880, Sample Num: 174080, Cur Loss: 0.00005443, Cur Avg Loss: 0.00546917, Log Avg loss: 0.00243925, Global Avg Loss: 0.01470858, Time: 0.3613 Steps: 31200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011080, Sample Num: 177280, Cur Loss: 0.00000204, Cur Avg Loss: 0.00555721, Log Avg loss: 0.01034675, Global Avg Loss: 0.01468080, Time: 0.0805 Steps: 31400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011280, Sample Num: 180480, Cur Loss: 0.00000572, Cur Avg Loss: 0.00560813, Log Avg loss: 0.00842927, Global Avg Loss: 0.01464123, Time: 0.4019 Steps: 31600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 011480, Sample Num: 183680, Cur Loss: 0.00010358, Cur Avg Loss: 0.00554248, Log Avg loss: 0.00183949, Global Avg Loss: 0.01456072, Time: 0.0818 Steps: 31800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 011680, Sample Num: 186880, Cur Loss: 0.00002519, Cur Avg Loss: 0.00551610, Log Avg loss: 0.00400231, Global Avg Loss: 0.01449473, Time: 0.4034 Steps: 32000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 011880, Sample Num: 190080, Cur Loss: 0.00087781, Cur Avg Loss: 0.00552659, Log Avg loss: 0.00613915, Global Avg Loss: 0.01444283, Time: 0.0710 Steps: 32200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012080, Sample Num: 193280, Cur Loss: 0.00008179, Cur Avg Loss: 0.00547614, Log Avg loss: 0.00247951, Global Avg Loss: 0.01436898, Time: 0.1307 Steps: 32400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012280, Sample Num: 196480, Cur Loss: 0.00002051, Cur Avg Loss: 0.00546312, Log Avg loss: 0.00467668, Global Avg Loss: 0.01430952, Time: 0.0497 Steps: 32600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012480, Sample Num: 199680, Cur Loss: 0.00001413, Cur Avg Loss: 0.00541980, Log Avg loss: 0.00275959, Global Avg Loss: 0.01423909, Time: 0.0744 Steps: 32800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012680, Sample Num: 202880, Cur Loss: 0.00003427, Cur Avg Loss: 0.00543340, Log Avg loss: 0.00628224, Global Avg Loss: 0.01419087, Time: 0.0964 Steps: 33000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 012880, Sample Num: 206080, Cur Loss: 0.00000039, Cur Avg Loss: 0.00537923, Log Avg loss: 0.00194477, Global Avg Loss: 0.01411710, Time: 0.1414 Steps: 33200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013080, Sample Num: 209280, Cur Loss: 0.00003634, Cur Avg Loss: 0.00537241, Log Avg loss: 0.00493308, Global Avg Loss: 0.01406210, Time: 0.0864 Steps: 33400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013280, Sample Num: 212480, Cur Loss: 0.00000300, Cur Avg Loss: 0.00529628, Log Avg loss: 0.00031722, Global Avg Loss: 0.01398029, Time: 0.0973 Steps: 33600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 013480, Sample Num: 215680, Cur Loss: 0.00004773, Cur Avg Loss: 0.00521964, Log Avg loss: 0.00013111, Global Avg Loss: 0.01389834, Time: 0.1419 Steps: 33800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 013680, Sample Num: 218880, Cur Loss: 0.00000061, Cur Avg Loss: 0.00514949, Log Avg loss: 0.00042099, Global Avg Loss: 0.01381906, Time: 0.0798 Steps: 34000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 013880, Sample Num: 222080, Cur Loss: 0.00001553, Cur Avg Loss: 0.00520056, Log Avg loss: 0.00869370, Global Avg Loss: 0.01378909, Time: 0.0900 Steps: 34200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014080, Sample Num: 225280, Cur Loss: 0.00000654, Cur Avg Loss: 0.00516461, Log Avg loss: 0.00267015, Global Avg Loss: 0.01372444, Time: 0.1801 Steps: 34400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014280, Sample Num: 228480, Cur Loss: 0.00000083, Cur Avg Loss: 0.00527932, Log Avg loss: 0.01335500, Global Avg Loss: 0.01372231, Time: 0.3992 Steps: 34600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014480, Sample Num: 231680, Cur Loss: 0.00011729, Cur Avg Loss: 0.00527760, Log Avg loss: 0.00515423, Global Avg Loss: 0.01367307, Time: 0.2081 Steps: 34800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014680, Sample Num: 234880, Cur Loss: 0.00008184, Cur Avg Loss: 0.00526494, Log Avg loss: 0.00434837, Global Avg Loss: 0.01361978, Time: 0.2655 Steps: 35000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 014880, Sample Num: 238080, Cur Loss: 0.00002787, Cur Avg Loss: 0.00528770, Log Avg loss: 0.00695879, Global Avg Loss: 0.01358194, Time: 0.2300 Steps: 35200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015080, Sample Num: 241280, Cur Loss: 0.00006626, Cur Avg Loss: 0.00522570, Log Avg loss: 0.00061266, Global Avg Loss: 0.01350866, Time: 0.0570 Steps: 35400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015280, Sample Num: 244480, Cur Loss: 0.00002015, Cur Avg Loss: 0.00516891, Log Avg loss: 0.00088711, Global Avg Loss: 0.01343776, Time: 0.1256 Steps: 35600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 015480, Sample Num: 247680, Cur Loss: 0.00002458, Cur Avg Loss: 0.00519733, Log Avg loss: 0.00736885, Global Avg Loss: 0.01340385, Time: 0.2616 Steps: 35800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 015680, Sample Num: 250880, Cur Loss: 0.00000191, Cur Avg Loss: 0.00513197, Log Avg loss: 0.00007291, Global Avg Loss: 0.01332979, Time: 0.1295 Steps: 36000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 015880, Sample Num: 254080, Cur Loss: 0.00000267, Cur Avg Loss: 0.00512810, Log Avg loss: 0.00482475, Global Avg Loss: 0.01328280, Time: 0.2684 Steps: 36200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016080, Sample Num: 257280, Cur Loss: 0.00001062, Cur Avg Loss: 0.00506562, Log Avg loss: 0.00010437, Global Avg Loss: 0.01321039, Time: 0.2567 Steps: 36400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016280, Sample Num: 260480, Cur Loss: 0.00000027, Cur Avg Loss: 0.00501314, Log Avg loss: 0.00079351, Global Avg Loss: 0.01314254, Time: 0.1436 Steps: 36600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016480, Sample Num: 263680, Cur Loss: 0.00000649, Cur Avg Loss: 0.00498906, Log Avg loss: 0.00302946, Global Avg Loss: 0.01308758, Time: 0.4233 Steps: 36800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016680, Sample Num: 266880, Cur Loss: 0.00000343, Cur Avg Loss: 0.00500419, Log Avg loss: 0.00625061, Global Avg Loss: 0.01305062, Time: 0.1671 Steps: 37000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 016880, Sample Num: 270080, Cur Loss: 0.00000129, Cur Avg Loss: 0.00497702, Log Avg loss: 0.00271132, Global Avg Loss: 0.01299503, Time: 0.5019 Steps: 37200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017080, Sample Num: 273280, Cur Loss: 0.00000318, Cur Avg Loss: 0.00496853, Log Avg loss: 0.00425162, Global Avg Loss: 0.01294828, Time: 0.1606 Steps: 37400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017280, Sample Num: 276480, Cur Loss: 0.00004682, Cur Avg Loss: 0.00501504, Log Avg loss: 0.00898751, Global Avg Loss: 0.01292721, Time: 0.3528 Steps: 37600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 017480, Sample Num: 279680, Cur Loss: 0.00000284, Cur Avg Loss: 0.00496540, Log Avg loss: 0.00067619, Global Avg Loss: 0.01286239, Time: 0.1651 Steps: 37800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 017680, Sample Num: 282880, Cur Loss: 0.00000386, Cur Avg Loss: 0.00498141, Log Avg loss: 0.00638090, Global Avg Loss: 0.01282828, Time: 0.2172 Steps: 38000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 017880, Sample Num: 286080, Cur Loss: 0.00001502, Cur Avg Loss: 0.00497104, Log Avg loss: 0.00405404, Global Avg Loss: 0.01278234, Time: 0.2204 Steps: 38200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018080, Sample Num: 289280, Cur Loss: 0.00000035, Cur Avg Loss: 0.00492713, Log Avg loss: 0.00100191, Global Avg Loss: 0.01272098, Time: 0.4055 Steps: 38400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018280, Sample Num: 292480, Cur Loss: 0.00000136, Cur Avg Loss: 0.00492030, Log Avg loss: 0.00430249, Global Avg Loss: 0.01267736, Time: 0.0907 Steps: 38600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018480, Sample Num: 295680, Cur Loss: 0.00004030, Cur Avg Loss: 0.00492489, Log Avg loss: 0.00534454, Global Avg Loss: 0.01263956, Time: 0.3238 Steps: 38800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018680, Sample Num: 298880, Cur Loss: 0.00000690, Cur Avg Loss: 0.00492174, Log Avg loss: 0.00463068, Global Avg Loss: 0.01259849, Time: 0.0815 Steps: 39000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 018880, Sample Num: 302080, Cur Loss: 0.00000501, Cur Avg Loss: 0.00490733, Log Avg loss: 0.00356122, Global Avg Loss: 0.01255238, Time: 0.4130 Steps: 39200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019080, Sample Num: 305280, Cur Loss: 0.00000204, Cur Avg Loss: 0.00490703, Log Avg loss: 0.00487922, Global Avg Loss: 0.01251343, Time: 0.1338 Steps: 39400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019280, Sample Num: 308480, Cur Loss: 0.00001936, Cur Avg Loss: 0.00488719, Log Avg loss: 0.00299385, Global Avg Loss: 0.01246536, Time: 0.0480 Steps: 39600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 019480, Sample Num: 311680, Cur Loss: 0.00000586, Cur Avg Loss: 0.00487362, Log Avg loss: 0.00356563, Global Avg Loss: 0.01242063, Time: 0.1282 Steps: 39800, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 019680, Sample Num: 314880, Cur Loss: 0.00000466, Cur Avg Loss: 0.00486269, Log Avg loss: 0.00379870, Global Avg Loss: 0.01237752, Time: 0.0894 Steps: 40000, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 019880, Sample Num: 318080, Cur Loss: 0.00001358, Cur Avg Loss: 0.00484566, Log Avg loss: 0.00316995, Global Avg Loss: 0.01233171, Time: 0.2638 Steps: 40200, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 020080, Sample Num: 321280, Cur Loss: 0.00018947, Cur Avg Loss: 0.00479907, Log Avg loss: 0.00016773, Global Avg Loss: 0.01227150, Time: 0.0839 Steps: 40400, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 020280, Sample Num: 324480, Cur Loss: 0.33099270, Cur Avg Loss: 0.00486150, Log Avg loss: 0.01112945, Global Avg Loss: 0.01226587, Time: 0.1703 Steps: 40600, Updated lr: 0.000080 ***** Running evaluation checkpoint-40640 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-40640 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4172.033409, Avg time per batch (s): 0.210000 {"eval_avg_loss": 0.009136, "eval_total_loss": 24.75942, "eval_acc": 0.998316, "eval_prec": 0.998664, "eval_recall": 0.997974, "eval_f1": 0.998319, "eval_roc_auc": 0.999918, "eval_pr_auc": 0.999922, "eval_confusion_matrix": {"tn": 21598, "fp": 29, "fn": 44, "tp": 21678}, "eval_mcc2": 0.996632, "eval_mcc": 0.996632, "eval_sn": 0.997974, "eval_sp": 0.998659, "update_flag": true, "test_avg_loss": 0.008006, "test_total_loss": 32.538096, "test_acc": 0.998431, "test_prec": 0.998584, "test_recall": 0.998277, "test_f1": 0.998431, "test_roc_auc": 0.999939, "test_pr_auc": 0.999945, "test_confusion_matrix": {"tn": 32471, "fp": 46, "fn": 56, "tp": 32449}, "test_mcc2": 0.996863, "test_mcc": 0.996863, "test_sn": 0.998277, "test_sp": 0.998585, "lr": 8.007881773399016e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.012253953982207785, "train_cur_epoch_loss": 98.59756725544385, "train_cur_epoch_avg_loss": 0.004852242483043496, "train_cur_epoch_time": 4172.033408880234, "train_cur_epoch_avg_time": 0.20531660476772803, "epoch": 2, "step": 40640} ################################################## Training, Epoch: 0003, Batch: 000160, Sample Num: 2560, Cur Loss: 0.00000093, Cur Avg Loss: 0.00010870, Log Avg loss: 0.00011856, Global Avg Loss: 0.01220633, Time: 0.1164 Steps: 40800, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000360, Sample Num: 5760, Cur Loss: 0.00000436, Cur Avg Loss: 0.00104934, Log Avg loss: 0.00180185, Global Avg Loss: 0.01215557, Time: 0.0871 Steps: 41000, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00000054, Cur Avg Loss: 0.00074942, Log Avg loss: 0.00020956, Global Avg Loss: 0.01209758, Time: 0.1451 Steps: 41200, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000760, Sample Num: 12160, Cur Loss: 0.00000259, Cur Avg Loss: 0.00107880, Log Avg loss: 0.00200105, Global Avg Loss: 0.01204881, Time: 0.2312 Steps: 41400, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000960, Sample Num: 15360, Cur Loss: 0.00000459, Cur Avg Loss: 0.00091400, Log Avg loss: 0.00028778, Global Avg Loss: 0.01199226, Time: 0.0795 Steps: 41600, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 001160, Sample Num: 18560, Cur Loss: 0.00000016, Cur Avg Loss: 0.00140147, Log Avg loss: 0.00374130, Global Avg Loss: 0.01195278, Time: 0.2597 Steps: 41800, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00000038, Cur Avg Loss: 0.00119916, Log Avg loss: 0.00002581, Global Avg Loss: 0.01189599, Time: 0.0555 Steps: 42000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001560, Sample Num: 24960, Cur Loss: 0.00000738, Cur Avg Loss: 0.00138560, Log Avg loss: 0.00265337, Global Avg Loss: 0.01185219, Time: 0.1945 Steps: 42200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001760, Sample Num: 28160, Cur Loss: 0.00000030, Cur Avg Loss: 0.00123757, Log Avg loss: 0.00008293, Global Avg Loss: 0.01179667, Time: 0.1862 Steps: 42400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001960, Sample Num: 31360, Cur Loss: 0.00000003, Cur Avg Loss: 0.00111515, Log Avg loss: 0.00003788, Global Avg Loss: 0.01174146, Time: 0.1452 Steps: 42600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002160, Sample Num: 34560, Cur Loss: 0.00000105, Cur Avg Loss: 0.00201625, Log Avg loss: 0.01084704, Global Avg Loss: 0.01173728, Time: 0.3993 Steps: 42800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00000013, Cur Avg Loss: 0.00212110, Log Avg loss: 0.00325346, Global Avg Loss: 0.01169783, Time: 0.2815 Steps: 43000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002560, Sample Num: 40960, Cur Loss: 0.00000048, Cur Avg Loss: 0.00208117, Log Avg loss: 0.00161002, Global Avg Loss: 0.01165112, Time: 0.4771 Steps: 43200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002760, Sample Num: 44160, Cur Loss: 0.00000119, Cur Avg Loss: 0.00198452, Log Avg loss: 0.00074741, Global Avg Loss: 0.01160087, Time: 0.3275 Steps: 43400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002960, Sample Num: 47360, Cur Loss: 0.00000035, Cur Avg Loss: 0.00213997, Log Avg loss: 0.00428515, Global Avg Loss: 0.01156732, Time: 0.0781 Steps: 43600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00241209, Log Avg loss: 0.00643943, Global Avg Loss: 0.01154390, Time: 0.1292 Steps: 43800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 003360, Sample Num: 53760, Cur Loss: 0.00000274, Cur Avg Loss: 0.00252502, Log Avg loss: 0.00430930, Global Avg Loss: 0.01151102, Time: 0.1467 Steps: 44000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003560, Sample Num: 56960, Cur Loss: 0.00000034, Cur Avg Loss: 0.00261186, Log Avg loss: 0.00407080, Global Avg Loss: 0.01147735, Time: 0.1594 Steps: 44200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003760, Sample Num: 60160, Cur Loss: 0.00000066, Cur Avg Loss: 0.00267703, Log Avg loss: 0.00383708, Global Avg Loss: 0.01144294, Time: 0.2566 Steps: 44400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003960, Sample Num: 63360, Cur Loss: 0.00000186, Cur Avg Loss: 0.00256630, Log Avg loss: 0.00048463, Global Avg Loss: 0.01139379, Time: 0.0523 Steps: 44600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00000084, Cur Avg Loss: 0.00245990, Log Avg loss: 0.00035321, Global Avg Loss: 0.01134451, Time: 0.1085 Steps: 44800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004360, Sample Num: 69760, Cur Loss: 0.00001802, Cur Avg Loss: 0.00243094, Log Avg loss: 0.00182853, Global Avg Loss: 0.01130221, Time: 0.4334 Steps: 45000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004560, Sample Num: 72960, Cur Loss: 0.00000055, Cur Avg Loss: 0.00271570, Log Avg loss: 0.00892349, Global Avg Loss: 0.01129169, Time: 0.1273 Steps: 45200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004760, Sample Num: 76160, Cur Loss: 0.00000034, Cur Avg Loss: 0.00275159, Log Avg loss: 0.00356970, Global Avg Loss: 0.01125767, Time: 0.1413 Steps: 45400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 004960, Sample Num: 79360, Cur Loss: 0.00001626, Cur Avg Loss: 0.00264822, Log Avg loss: 0.00018822, Global Avg Loss: 0.01120912, Time: 0.1298 Steps: 45600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 005160, Sample Num: 82560, Cur Loss: 0.00003583, Cur Avg Loss: 0.00271336, Log Avg loss: 0.00432866, Global Avg Loss: 0.01117907, Time: 0.1389 Steps: 45800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 005360, Sample Num: 85760, Cur Loss: 0.00000062, Cur Avg Loss: 0.00264060, Log Avg loss: 0.00076357, Global Avg Loss: 0.01113379, Time: 0.2574 Steps: 46000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00000741, Cur Avg Loss: 0.00254655, Log Avg loss: 0.00002581, Global Avg Loss: 0.01108570, Time: 0.1488 Steps: 46200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005760, Sample Num: 92160, Cur Loss: 0.00000031, Cur Avg Loss: 0.00251448, Log Avg loss: 0.00162301, Global Avg Loss: 0.01104492, Time: 0.3386 Steps: 46400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00001147, Cur Avg Loss: 0.00277622, Log Avg loss: 0.01031422, Global Avg Loss: 0.01104178, Time: 0.2646 Steps: 46600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006160, Sample Num: 98560, Cur Loss: 0.00011898, Cur Avg Loss: 0.00292079, Log Avg loss: 0.00722914, Global Avg Loss: 0.01102549, Time: 0.2613 Steps: 46800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006360, Sample Num: 101760, Cur Loss: 0.00018657, Cur Avg Loss: 0.00284486, Log Avg loss: 0.00050621, Global Avg Loss: 0.01098072, Time: 0.3449 Steps: 47000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006560, Sample Num: 104960, Cur Loss: 0.00000041, Cur Avg Loss: 0.00281131, Log Avg loss: 0.00174454, Global Avg Loss: 0.01094159, Time: 0.2746 Steps: 47200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006760, Sample Num: 108160, Cur Loss: 0.00000504, Cur Avg Loss: 0.00285130, Log Avg loss: 0.00416271, Global Avg Loss: 0.01091298, Time: 0.0949 Steps: 47400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 006960, Sample Num: 111360, Cur Loss: 0.00249484, Cur Avg Loss: 0.00277098, Log Avg loss: 0.00005628, Global Avg Loss: 0.01086737, Time: 0.1041 Steps: 47600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 007160, Sample Num: 114560, Cur Loss: 0.00000470, Cur Avg Loss: 0.00270049, Log Avg loss: 0.00024727, Global Avg Loss: 0.01082293, Time: 0.0663 Steps: 47800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 007360, Sample Num: 117760, Cur Loss: 0.00000004, Cur Avg Loss: 0.00275001, Log Avg loss: 0.00452300, Global Avg Loss: 0.01079668, Time: 0.5269 Steps: 48000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007560, Sample Num: 120960, Cur Loss: 0.00000485, Cur Avg Loss: 0.00294362, Log Avg loss: 0.01006864, Global Avg Loss: 0.01079366, Time: 0.2608 Steps: 48200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00000046, Cur Avg Loss: 0.00299869, Log Avg loss: 0.00508011, Global Avg Loss: 0.01077005, Time: 0.2601 Steps: 48400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 007960, Sample Num: 127360, Cur Loss: 0.00000658, Cur Avg Loss: 0.00311488, Log Avg loss: 0.00762327, Global Avg Loss: 0.01075710, Time: 0.2995 Steps: 48600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00000186, Cur Avg Loss: 0.00305823, Log Avg loss: 0.00080323, Global Avg Loss: 0.01071631, Time: 0.1408 Steps: 48800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00003409, Cur Avg Loss: 0.00305684, Log Avg loss: 0.00300026, Global Avg Loss: 0.01068481, Time: 0.0483 Steps: 49000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008560, Sample Num: 136960, Cur Loss: 0.00000104, Cur Avg Loss: 0.00301313, Log Avg loss: 0.00118590, Global Avg Loss: 0.01064620, Time: 0.1171 Steps: 49200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008760, Sample Num: 140160, Cur Loss: 0.00000045, Cur Avg Loss: 0.00294725, Log Avg loss: 0.00012762, Global Avg Loss: 0.01060361, Time: 0.1028 Steps: 49400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 008960, Sample Num: 143360, Cur Loss: 0.00000174, Cur Avg Loss: 0.00297714, Log Avg loss: 0.00428664, Global Avg Loss: 0.01057814, Time: 0.1231 Steps: 49600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 009160, Sample Num: 146560, Cur Loss: 0.00000128, Cur Avg Loss: 0.00291251, Log Avg loss: 0.00001678, Global Avg Loss: 0.01053573, Time: 0.4141 Steps: 49800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00000032, Cur Avg Loss: 0.00285058, Log Avg loss: 0.00001447, Global Avg Loss: 0.01049364, Time: 0.1353 Steps: 50000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009560, Sample Num: 152960, Cur Loss: 0.00000240, Cur Avg Loss: 0.00286250, Log Avg loss: 0.00341998, Global Avg Loss: 0.01046546, Time: 0.0433 Steps: 50200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009760, Sample Num: 156160, Cur Loss: 0.00000037, Cur Avg Loss: 0.00280691, Log Avg loss: 0.00015016, Global Avg Loss: 0.01042453, Time: 0.1281 Steps: 50400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 009960, Sample Num: 159360, Cur Loss: 0.00006060, Cur Avg Loss: 0.00288036, Log Avg loss: 0.00646434, Global Avg Loss: 0.01040887, Time: 0.1448 Steps: 50600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010160, Sample Num: 162560, Cur Loss: 0.00000403, Cur Avg Loss: 0.00282415, Log Avg loss: 0.00002489, Global Avg Loss: 0.01036799, Time: 0.0643 Steps: 50800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00000006, Cur Avg Loss: 0.00280265, Log Avg loss: 0.00171045, Global Avg Loss: 0.01033404, Time: 0.0848 Steps: 51000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010560, Sample Num: 168960, Cur Loss: 0.00001279, Cur Avg Loss: 0.00279193, Log Avg loss: 0.00223701, Global Avg Loss: 0.01030241, Time: 0.0938 Steps: 51200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010760, Sample Num: 172160, Cur Loss: 0.00000097, Cur Avg Loss: 0.00274389, Log Avg loss: 0.00020702, Global Avg Loss: 0.01026313, Time: 0.2617 Steps: 51400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 010960, Sample Num: 175360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00269414, Log Avg loss: 0.00001750, Global Avg Loss: 0.01022342, Time: 0.0845 Steps: 51600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00000022, Cur Avg Loss: 0.00287217, Log Avg loss: 0.01262825, Global Avg Loss: 0.01023270, Time: 0.2826 Steps: 51800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 011360, Sample Num: 181760, Cur Loss: 0.00000078, Cur Avg Loss: 0.00290845, Log Avg loss: 0.00493329, Global Avg Loss: 0.01021232, Time: 0.0542 Steps: 52000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00000016, Cur Avg Loss: 0.00294632, Log Avg loss: 0.00509697, Global Avg Loss: 0.01019272, Time: 0.4134 Steps: 52200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00000051, Cur Avg Loss: 0.00289751, Log Avg loss: 0.00007670, Global Avg Loss: 0.01015411, Time: 0.4661 Steps: 52400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 011960, Sample Num: 191360, Cur Loss: 0.00000006, Cur Avg Loss: 0.00291435, Log Avg loss: 0.00390407, Global Avg Loss: 0.01013035, Time: 0.4157 Steps: 52600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012160, Sample Num: 194560, Cur Loss: 0.00000001, Cur Avg Loss: 0.00287003, Log Avg loss: 0.00022015, Global Avg Loss: 0.01009281, Time: 0.1277 Steps: 52800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012360, Sample Num: 197760, Cur Loss: 0.00000001, Cur Avg Loss: 0.00283588, Log Avg loss: 0.00075916, Global Avg Loss: 0.01005759, Time: 0.1304 Steps: 53000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012560, Sample Num: 200960, Cur Loss: 0.00003064, Cur Avg Loss: 0.00284493, Log Avg loss: 0.00340450, Global Avg Loss: 0.01003258, Time: 0.4094 Steps: 53200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012760, Sample Num: 204160, Cur Loss: 0.00000012, Cur Avg Loss: 0.00287069, Log Avg loss: 0.00448836, Global Avg Loss: 0.01001181, Time: 0.1183 Steps: 53400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 012960, Sample Num: 207360, Cur Loss: 0.00000047, Cur Avg Loss: 0.00282812, Log Avg loss: 0.00011203, Global Avg Loss: 0.00997487, Time: 0.1143 Steps: 53600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 013160, Sample Num: 210560, Cur Loss: 0.00000032, Cur Avg Loss: 0.00286438, Log Avg loss: 0.00521416, Global Avg Loss: 0.00995717, Time: 0.0924 Steps: 53800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 013360, Sample Num: 213760, Cur Loss: 0.00000238, Cur Avg Loss: 0.00282832, Log Avg loss: 0.00045557, Global Avg Loss: 0.00992198, Time: 0.2375 Steps: 54000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013560, Sample Num: 216960, Cur Loss: 0.00000001, Cur Avg Loss: 0.00278680, Log Avg loss: 0.00001345, Global Avg Loss: 0.00988542, Time: 0.4644 Steps: 54200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013760, Sample Num: 220160, Cur Loss: 0.00000273, Cur Avg Loss: 0.00274932, Log Avg loss: 0.00020777, Global Avg Loss: 0.00984984, Time: 0.3423 Steps: 54400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 013960, Sample Num: 223360, Cur Loss: 0.00000091, Cur Avg Loss: 0.00276321, Log Avg loss: 0.00371895, Global Avg Loss: 0.00982738, Time: 0.2401 Steps: 54600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014160, Sample Num: 226560, Cur Loss: 0.00000013, Cur Avg Loss: 0.00272503, Log Avg loss: 0.00005997, Global Avg Loss: 0.00979174, Time: 0.4013 Steps: 54800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014360, Sample Num: 229760, Cur Loss: 0.00000028, Cur Avg Loss: 0.00281047, Log Avg loss: 0.00885972, Global Avg Loss: 0.00978835, Time: 0.1531 Steps: 55000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014560, Sample Num: 232960, Cur Loss: 0.00000019, Cur Avg Loss: 0.00277246, Log Avg loss: 0.00004318, Global Avg Loss: 0.00975304, Time: 0.1293 Steps: 55200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014760, Sample Num: 236160, Cur Loss: 0.00000050, Cur Avg Loss: 0.00273529, Log Avg loss: 0.00002963, Global Avg Loss: 0.00971793, Time: 0.0653 Steps: 55400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 014960, Sample Num: 239360, Cur Loss: 0.00000001, Cur Avg Loss: 0.00273066, Log Avg loss: 0.00238922, Global Avg Loss: 0.00969157, Time: 0.3693 Steps: 55600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015160, Sample Num: 242560, Cur Loss: 0.00000072, Cur Avg Loss: 0.00271039, Log Avg loss: 0.00119405, Global Avg Loss: 0.00966112, Time: 0.3389 Steps: 55800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015360, Sample Num: 245760, Cur Loss: 0.00000001, Cur Avg Loss: 0.00267516, Log Avg loss: 0.00000430, Global Avg Loss: 0.00962663, Time: 0.5511 Steps: 56000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 015560, Sample Num: 248960, Cur Loss: 0.00000001, Cur Avg Loss: 0.00268448, Log Avg loss: 0.00340021, Global Avg Loss: 0.00960447, Time: 0.2612 Steps: 56200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 015760, Sample Num: 252160, Cur Loss: 0.00000006, Cur Avg Loss: 0.00266462, Log Avg loss: 0.00111943, Global Avg Loss: 0.00957438, Time: 0.0562 Steps: 56400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 015960, Sample Num: 255360, Cur Loss: 0.00000003, Cur Avg Loss: 0.00263135, Log Avg loss: 0.00000965, Global Avg Loss: 0.00954058, Time: 0.1353 Steps: 56600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016160, Sample Num: 258560, Cur Loss: 0.00000001, Cur Avg Loss: 0.00259881, Log Avg loss: 0.00000264, Global Avg Loss: 0.00950700, Time: 0.1057 Steps: 56800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016360, Sample Num: 261760, Cur Loss: 0.00000127, Cur Avg Loss: 0.00256706, Log Avg loss: 0.00000151, Global Avg Loss: 0.00947365, Time: 0.2786 Steps: 57000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016560, Sample Num: 264960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00255986, Log Avg loss: 0.00197127, Global Avg Loss: 0.00944741, Time: 0.1155 Steps: 57200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016760, Sample Num: 268160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00252950, Log Avg loss: 0.00001524, Global Avg Loss: 0.00941455, Time: 0.4003 Steps: 57400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 016960, Sample Num: 271360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00249998, Log Avg loss: 0.00002645, Global Avg Loss: 0.00938195, Time: 0.1850 Steps: 57600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017160, Sample Num: 274560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00264758, Log Avg loss: 0.01516423, Global Avg Loss: 0.00940196, Time: 0.2574 Steps: 57800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017360, Sample Num: 277760, Cur Loss: 0.00264521, Cur Avg Loss: 0.00261781, Log Avg loss: 0.00006344, Global Avg Loss: 0.00936976, Time: 0.0471 Steps: 58000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 017560, Sample Num: 280960, Cur Loss: 0.00000010, Cur Avg Loss: 0.00259001, Log Avg loss: 0.00017679, Global Avg Loss: 0.00933817, Time: 0.5528 Steps: 58200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 017760, Sample Num: 284160, Cur Loss: 0.00000042, Cur Avg Loss: 0.00259386, Log Avg loss: 0.00293195, Global Avg Loss: 0.00931623, Time: 0.0785 Steps: 58400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 017960, Sample Num: 287360, Cur Loss: 0.00000003, Cur Avg Loss: 0.00256521, Log Avg loss: 0.00002102, Global Avg Loss: 0.00928450, Time: 0.1262 Steps: 58600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018160, Sample Num: 290560, Cur Loss: 0.00000001, Cur Avg Loss: 0.00253697, Log Avg loss: 0.00000101, Global Avg Loss: 0.00925293, Time: 0.2830 Steps: 58800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018360, Sample Num: 293760, Cur Loss: 0.00000072, Cur Avg Loss: 0.00255194, Log Avg loss: 0.00391092, Global Avg Loss: 0.00923482, Time: 0.1247 Steps: 59000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018560, Sample Num: 296960, Cur Loss: 0.00000048, Cur Avg Loss: 0.00252447, Log Avg loss: 0.00000325, Global Avg Loss: 0.00920363, Time: 0.2569 Steps: 59200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018760, Sample Num: 300160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00249802, Log Avg loss: 0.00004286, Global Avg Loss: 0.00917279, Time: 0.2630 Steps: 59400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 018960, Sample Num: 303360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00248549, Log Avg loss: 0.00131090, Global Avg Loss: 0.00914640, Time: 0.1307 Steps: 59600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019160, Sample Num: 306560, Cur Loss: 0.00000007, Cur Avg Loss: 0.00248169, Log Avg loss: 0.00212096, Global Avg Loss: 0.00912291, Time: 0.2616 Steps: 59800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019360, Sample Num: 309760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00250495, Log Avg loss: 0.00473329, Global Avg Loss: 0.00910828, Time: 0.1305 Steps: 60000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 019560, Sample Num: 312960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00248029, Log Avg loss: 0.00009340, Global Avg Loss: 0.00907833, Time: 0.3571 Steps: 60200, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 019760, Sample Num: 316160, Cur Loss: 0.00000376, Cur Avg Loss: 0.00245525, Log Avg loss: 0.00000612, Global Avg Loss: 0.00904829, Time: 0.4218 Steps: 60400, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 019960, Sample Num: 319360, Cur Loss: 0.00000118, Cur Avg Loss: 0.00243067, Log Avg loss: 0.00000263, Global Avg Loss: 0.00901843, Time: 0.0875 Steps: 60600, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 020160, Sample Num: 322560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00240656, Log Avg loss: 0.00000049, Global Avg Loss: 0.00898877, Time: 0.3349 Steps: 60800, Updated lr: 0.000070 ***** Running evaluation checkpoint-60960 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-60960 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4103.537411, Avg time per batch (s): 0.200000 {"eval_avg_loss": 0.016265, "eval_total_loss": 44.079425, "eval_acc": 0.998085, "eval_prec": 0.998985, "eval_recall": 0.997192, "eval_f1": 0.998088, "eval_roc_auc": 0.999924, "eval_pr_auc": 0.999926, "eval_confusion_matrix": {"tn": 21605, "fp": 22, "fn": 61, "tp": 21661}, "eval_mcc2": 0.996172, "eval_mcc": 0.996172, "eval_sn": 0.997192, "eval_sp": 0.998983, "update_flag": false, "test_avg_loss": 0.013341, "test_total_loss": 54.218997, "test_acc": 0.998278, "test_prec": 0.999199, "test_recall": 0.997354, "test_f1": 0.998276, "test_roc_auc": 0.999945, "test_pr_auc": 0.99995, "test_confusion_matrix": {"tn": 32491, "fp": 26, "fn": 86, "tp": 32419}, "test_mcc2": 0.996557, "test_mcc": 0.996557, "test_sn": 0.997354, "test_sp": 0.9992, "lr": 7.006896551724137e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.008990315658425578, "train_cur_epoch_loss": 50.04895270067402, "train_cur_epoch_avg_loss": 0.002463039010859942, "train_cur_epoch_time": 4103.53741145134, "train_cur_epoch_avg_time": 0.20194573875252655, "epoch": 3, "step": 60960} ################################################## Training, Epoch: 0004, Batch: 000040, Sample Num: 640, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000083, Log Avg loss: 0.00766318, Global Avg Loss: 0.00898442, Time: 0.2752 Steps: 61000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00025456, Cur Avg Loss: 0.00005823, Log Avg loss: 0.00006972, Global Avg Loss: 0.00895529, Time: 0.1422 Steps: 61200, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000440, Sample Num: 7040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00003951, Log Avg loss: 0.00001703, Global Avg Loss: 0.00892617, Time: 0.1195 Steps: 61400, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000640, Sample Num: 10240, Cur Loss: 0.00000006, Cur Avg Loss: 0.00002804, Log Avg loss: 0.00000280, Global Avg Loss: 0.00889720, Time: 0.2448 Steps: 61600, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000840, Sample Num: 13440, Cur Loss: 0.00000001, Cur Avg Loss: 0.00002194, Log Avg loss: 0.00000245, Global Avg Loss: 0.00886842, Time: 0.1353 Steps: 61800, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001803, Log Avg loss: 0.00000160, Global Avg Loss: 0.00883981, Time: 0.0754 Steps: 62000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 001240, Sample Num: 19840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00003013, Log Avg loss: 0.00009307, Global Avg Loss: 0.00881169, Time: 0.2619 Steps: 62200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00002608, Log Avg loss: 0.00000096, Global Avg Loss: 0.00878345, Time: 0.1369 Steps: 62400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00002657, Log Avg loss: 0.00003007, Global Avg Loss: 0.00875548, Time: 0.0477 Steps: 62600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001840, Sample Num: 29440, Cur Loss: 0.00000001, Cur Avg Loss: 0.00002455, Log Avg loss: 0.00000797, Global Avg Loss: 0.00872762, Time: 0.1495 Steps: 62800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00014090, Log Avg loss: 0.00121132, Global Avg Loss: 0.00870376, Time: 0.0780 Steps: 63000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002240, Sample Num: 35840, Cur Loss: 0.00000042, Cur Avg Loss: 0.00016918, Log Avg loss: 0.00045769, Global Avg Loss: 0.00867767, Time: 0.1969 Steps: 63200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002440, Sample Num: 39040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00018962, Log Avg loss: 0.00041858, Global Avg Loss: 0.00865161, Time: 0.3431 Steps: 63400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002640, Sample Num: 42240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00043651, Log Avg loss: 0.00344854, Global Avg Loss: 0.00863525, Time: 0.1045 Steps: 63600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00070699, Log Avg loss: 0.00427734, Global Avg Loss: 0.00862159, Time: 0.1323 Steps: 63800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126367, Log Avg loss: 0.00916856, Global Avg Loss: 0.00862330, Time: 0.0788 Steps: 64000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 003240, Sample Num: 51840, Cur Loss: 0.00000018, Cur Avg Loss: 0.00151974, Log Avg loss: 0.00541190, Global Avg Loss: 0.00861330, Time: 0.1309 Steps: 64200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003440, Sample Num: 55040, Cur Loss: 0.00000003, Cur Avg Loss: 0.00143502, Log Avg loss: 0.00006262, Global Avg Loss: 0.00858674, Time: 0.2613 Steps: 64400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003640, Sample Num: 58240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137835, Log Avg loss: 0.00040366, Global Avg Loss: 0.00856141, Time: 0.1307 Steps: 64600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003840, Sample Num: 61440, Cur Loss: 0.00000054, Cur Avg Loss: 0.00147385, Log Avg loss: 0.00321187, Global Avg Loss: 0.00854490, Time: 0.3485 Steps: 64800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00000019, Cur Avg Loss: 0.00154167, Log Avg loss: 0.00284377, Global Avg Loss: 0.00852735, Time: 0.0676 Steps: 65000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004240, Sample Num: 67840, Cur Loss: 0.00000029, Cur Avg Loss: 0.00146900, Log Avg loss: 0.00000117, Global Avg Loss: 0.00850120, Time: 0.1381 Steps: 65200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004440, Sample Num: 71040, Cur Loss: 0.00002226, Cur Avg Loss: 0.00161770, Log Avg loss: 0.00477016, Global Avg Loss: 0.00848979, Time: 0.2668 Steps: 65400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00000006, Cur Avg Loss: 0.00202912, Log Avg loss: 0.01116257, Global Avg Loss: 0.00849794, Time: 0.1625 Steps: 65600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00000025, Cur Avg Loss: 0.00195150, Log Avg loss: 0.00015070, Global Avg Loss: 0.00847257, Time: 0.3345 Steps: 65800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 005040, Sample Num: 80640, Cur Loss: 0.00000006, Cur Avg Loss: 0.00188003, Log Avg loss: 0.00015045, Global Avg Loss: 0.00844735, Time: 0.1319 Steps: 66000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 005240, Sample Num: 83840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00180856, Log Avg loss: 0.00000745, Global Avg Loss: 0.00842185, Time: 0.2816 Steps: 66200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005440, Sample Num: 87040, Cur Loss: 0.00000012, Cur Avg Loss: 0.00174213, Log Avg loss: 0.00000183, Global Avg Loss: 0.00839649, Time: 0.2006 Steps: 66400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005640, Sample Num: 90240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00169044, Log Avg loss: 0.00028445, Global Avg Loss: 0.00837213, Time: 0.4847 Steps: 66600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 005840, Sample Num: 93440, Cur Loss: 0.00000691, Cur Avg Loss: 0.00190783, Log Avg loss: 0.00803826, Global Avg Loss: 0.00837113, Time: 0.1303 Steps: 66800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006040, Sample Num: 96640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00227904, Log Avg loss: 0.01311821, Global Avg Loss: 0.00838530, Time: 0.0855 Steps: 67000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006240, Sample Num: 99840, Cur Loss: 0.00000016, Cur Avg Loss: 0.00220696, Log Avg loss: 0.00003022, Global Avg Loss: 0.00836043, Time: 0.1456 Steps: 67200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006440, Sample Num: 103040, Cur Loss: 0.00001509, Cur Avg Loss: 0.00213974, Log Avg loss: 0.00004260, Global Avg Loss: 0.00833575, Time: 0.1259 Steps: 67400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006640, Sample Num: 106240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00207564, Log Avg loss: 0.00001138, Global Avg Loss: 0.00831112, Time: 0.2922 Steps: 67600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 006840, Sample Num: 109440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00209367, Log Avg loss: 0.00269226, Global Avg Loss: 0.00829455, Time: 0.0558 Steps: 67800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007040, Sample Num: 112640, Cur Loss: 0.00000037, Cur Avg Loss: 0.00203716, Log Avg loss: 0.00010466, Global Avg Loss: 0.00827046, Time: 0.3506 Steps: 68000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007240, Sample Num: 115840, Cur Loss: 0.00000084, Cur Avg Loss: 0.00201485, Log Avg loss: 0.00122937, Global Avg Loss: 0.00824981, Time: 0.4204 Steps: 68200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 007440, Sample Num: 119040, Cur Loss: 0.00000010, Cur Avg Loss: 0.00212346, Log Avg loss: 0.00605527, Global Avg Loss: 0.00824339, Time: 0.1397 Steps: 68400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 007640, Sample Num: 122240, Cur Loss: 0.00000010, Cur Avg Loss: 0.00221057, Log Avg loss: 0.00545092, Global Avg Loss: 0.00823525, Time: 0.4082 Steps: 68600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 007840, Sample Num: 125440, Cur Loss: 0.00000007, Cur Avg Loss: 0.00227741, Log Avg loss: 0.00483068, Global Avg Loss: 0.00822536, Time: 0.2628 Steps: 68800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008040, Sample Num: 128640, Cur Loss: 0.00006830, Cur Avg Loss: 0.00243060, Log Avg loss: 0.00843581, Global Avg Loss: 0.00822597, Time: 0.0861 Steps: 69000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008240, Sample Num: 131840, Cur Loss: 0.00000002, Cur Avg Loss: 0.00241416, Log Avg loss: 0.00175339, Global Avg Loss: 0.00820726, Time: 0.4026 Steps: 69200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008440, Sample Num: 135040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00235877, Log Avg loss: 0.00007648, Global Avg Loss: 0.00818383, Time: 0.2627 Steps: 69400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008640, Sample Num: 138240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00230419, Log Avg loss: 0.00000101, Global Avg Loss: 0.00816031, Time: 0.1975 Steps: 69600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 008840, Sample Num: 141440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00225207, Log Avg loss: 0.00000071, Global Avg Loss: 0.00813693, Time: 0.1219 Steps: 69800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009040, Sample Num: 144640, Cur Loss: 0.00000015, Cur Avg Loss: 0.00227135, Log Avg loss: 0.00312333, Global Avg Loss: 0.00812261, Time: 0.0893 Steps: 70000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009240, Sample Num: 147840, Cur Loss: 0.00000003, Cur Avg Loss: 0.00222222, Log Avg loss: 0.00000143, Global Avg Loss: 0.00809947, Time: 0.1404 Steps: 70200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 009440, Sample Num: 151040, Cur Loss: 0.00000001, Cur Avg Loss: 0.00226886, Log Avg loss: 0.00442365, Global Avg Loss: 0.00808903, Time: 0.0830 Steps: 70400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 009640, Sample Num: 154240, Cur Loss: 0.00000107, Cur Avg Loss: 0.00222189, Log Avg loss: 0.00000519, Global Avg Loss: 0.00806613, Time: 0.2607 Steps: 70600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 009840, Sample Num: 157440, Cur Loss: 0.00000003, Cur Avg Loss: 0.00219378, Log Avg loss: 0.00083877, Global Avg Loss: 0.00804571, Time: 0.4174 Steps: 70800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010040, Sample Num: 160640, Cur Loss: 0.00000001, Cur Avg Loss: 0.00215017, Log Avg loss: 0.00000468, Global Avg Loss: 0.00802306, Time: 0.1065 Steps: 71000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010240, Sample Num: 163840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00210820, Log Avg loss: 0.00000100, Global Avg Loss: 0.00800053, Time: 0.5228 Steps: 71200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010440, Sample Num: 167040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00207495, Log Avg loss: 0.00037277, Global Avg Loss: 0.00797916, Time: 0.4142 Steps: 71400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010640, Sample Num: 170240, Cur Loss: 0.00000139, Cur Avg Loss: 0.00209737, Log Avg loss: 0.00326762, Global Avg Loss: 0.00796600, Time: 0.4520 Steps: 71600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 010840, Sample Num: 173440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00205897, Log Avg loss: 0.00001578, Global Avg Loss: 0.00794386, Time: 0.2605 Steps: 71800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011040, Sample Num: 176640, Cur Loss: 0.31485409, Cur Avg Loss: 0.00210757, Log Avg loss: 0.00474167, Global Avg Loss: 0.00793496, Time: 0.1570 Steps: 72000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011240, Sample Num: 179840, Cur Loss: 0.00000641, Cur Avg Loss: 0.00222235, Log Avg loss: 0.00855861, Global Avg Loss: 0.00793669, Time: 0.2665 Steps: 72200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 011440, Sample Num: 183040, Cur Loss: 0.00000002, Cur Avg Loss: 0.00219294, Log Avg loss: 0.00053990, Global Avg Loss: 0.00791626, Time: 0.1004 Steps: 72400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 011640, Sample Num: 186240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00222918, Log Avg loss: 0.00430220, Global Avg Loss: 0.00790630, Time: 0.2734 Steps: 72600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 011840, Sample Num: 189440, Cur Loss: 0.00000036, Cur Avg Loss: 0.00219160, Log Avg loss: 0.00000413, Global Avg Loss: 0.00788459, Time: 0.1943 Steps: 72800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012040, Sample Num: 192640, Cur Loss: 0.00000030, Cur Avg Loss: 0.00215540, Log Avg loss: 0.00001284, Global Avg Loss: 0.00786302, Time: 0.2618 Steps: 73000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012240, Sample Num: 195840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00217820, Log Avg loss: 0.00355052, Global Avg Loss: 0.00785124, Time: 0.1254 Steps: 73200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012440, Sample Num: 199040, Cur Loss: 0.00000001, Cur Avg Loss: 0.00214326, Log Avg loss: 0.00000475, Global Avg Loss: 0.00782986, Time: 0.0789 Steps: 73400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012640, Sample Num: 202240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00220029, Log Avg loss: 0.00574772, Global Avg Loss: 0.00782420, Time: 0.2633 Steps: 73600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 012840, Sample Num: 205440, Cur Loss: 0.00000191, Cur Avg Loss: 0.00216603, Log Avg loss: 0.00000108, Global Avg Loss: 0.00780300, Time: 0.0684 Steps: 73800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013040, Sample Num: 208640, Cur Loss: 0.00000001, Cur Avg Loss: 0.00223538, Log Avg loss: 0.00668754, Global Avg Loss: 0.00779999, Time: 0.2844 Steps: 74000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013240, Sample Num: 211840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00220174, Log Avg loss: 0.00000815, Global Avg Loss: 0.00777898, Time: 0.1940 Steps: 74200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 013440, Sample Num: 215040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00216899, Log Avg loss: 0.00000099, Global Avg Loss: 0.00775808, Time: 0.1752 Steps: 74400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 013640, Sample Num: 218240, Cur Loss: 0.00000018, Cur Avg Loss: 0.00213721, Log Avg loss: 0.00000175, Global Avg Loss: 0.00773728, Time: 0.1779 Steps: 74600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 013840, Sample Num: 221440, Cur Loss: 0.00000012, Cur Avg Loss: 0.00216034, Log Avg loss: 0.00373774, Global Avg Loss: 0.00772659, Time: 0.1836 Steps: 74800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014040, Sample Num: 224640, Cur Loss: 0.00000018, Cur Avg Loss: 0.00213687, Log Avg loss: 0.00051300, Global Avg Loss: 0.00770735, Time: 0.1279 Steps: 75000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014240, Sample Num: 227840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00216635, Log Avg loss: 0.00423580, Global Avg Loss: 0.00769812, Time: 0.0830 Steps: 75200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014440, Sample Num: 231040, Cur Loss: 0.00000006, Cur Avg Loss: 0.00213653, Log Avg loss: 0.00001287, Global Avg Loss: 0.00767773, Time: 0.1273 Steps: 75400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014640, Sample Num: 234240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00210751, Log Avg loss: 0.00001283, Global Avg Loss: 0.00765746, Time: 0.2553 Steps: 75600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 014840, Sample Num: 237440, Cur Loss: 0.00000086, Cur Avg Loss: 0.00210590, Log Avg loss: 0.00198810, Global Avg Loss: 0.00764250, Time: 0.2105 Steps: 75800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015040, Sample Num: 240640, Cur Loss: 0.00000001, Cur Avg Loss: 0.00207807, Log Avg loss: 0.00001265, Global Avg Loss: 0.00762242, Time: 0.5272 Steps: 76000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015240, Sample Num: 243840, Cur Loss: 0.00000001, Cur Avg Loss: 0.00205080, Log Avg loss: 0.00000049, Global Avg Loss: 0.00760241, Time: 0.1493 Steps: 76200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 015440, Sample Num: 247040, Cur Loss: 0.00000007, Cur Avg Loss: 0.00202469, Log Avg loss: 0.00003513, Global Avg Loss: 0.00758260, Time: 0.2347 Steps: 76400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 015640, Sample Num: 250240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00199884, Log Avg loss: 0.00000264, Global Avg Loss: 0.00756281, Time: 0.1071 Steps: 76600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 015840, Sample Num: 253440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00197361, Log Avg loss: 0.00000091, Global Avg Loss: 0.00754312, Time: 0.1457 Steps: 76800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016040, Sample Num: 256640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00194901, Log Avg loss: 0.00000044, Global Avg Loss: 0.00752353, Time: 0.0816 Steps: 77000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016240, Sample Num: 259840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00192501, Log Avg loss: 0.00000013, Global Avg Loss: 0.00750404, Time: 0.1002 Steps: 77200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016440, Sample Num: 263040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00193531, Log Avg loss: 0.00277190, Global Avg Loss: 0.00749181, Time: 0.1284 Steps: 77400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016640, Sample Num: 266240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00191478, Log Avg loss: 0.00022714, Global Avg Loss: 0.00747309, Time: 0.1107 Steps: 77600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 016840, Sample Num: 269440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00189218, Log Avg loss: 0.00001205, Global Avg Loss: 0.00745391, Time: 0.1980 Steps: 77800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017040, Sample Num: 272640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00186997, Log Avg loss: 0.00000004, Global Avg Loss: 0.00743479, Time: 0.0829 Steps: 78000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017240, Sample Num: 275840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00205977, Log Avg loss: 0.01823033, Global Avg Loss: 0.00746240, Time: 0.2614 Steps: 78200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 017440, Sample Num: 279040, Cur Loss: 0.00000017, Cur Avg Loss: 0.00203618, Log Avg loss: 0.00000319, Global Avg Loss: 0.00744338, Time: 0.0705 Steps: 78400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 017640, Sample Num: 282240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00201317, Log Avg loss: 0.00000623, Global Avg Loss: 0.00742445, Time: 0.0927 Steps: 78600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 017840, Sample Num: 285440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00199104, Log Avg loss: 0.00003935, Global Avg Loss: 0.00740571, Time: 0.2639 Steps: 78800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018040, Sample Num: 288640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00196908, Log Avg loss: 0.00001024, Global Avg Loss: 0.00738699, Time: 0.0870 Steps: 79000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018240, Sample Num: 291840, Cur Loss: 0.00000002, Cur Avg Loss: 0.00194751, Log Avg loss: 0.00000164, Global Avg Loss: 0.00736834, Time: 0.1204 Steps: 79200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018440, Sample Num: 295040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00198678, Log Avg loss: 0.00556839, Global Avg Loss: 0.00736380, Time: 0.4342 Steps: 79400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018640, Sample Num: 298240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00196546, Log Avg loss: 0.00000024, Global Avg Loss: 0.00734530, Time: 0.5138 Steps: 79600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 018840, Sample Num: 301440, Cur Loss: 0.00000042, Cur Avg Loss: 0.00197846, Log Avg loss: 0.00318988, Global Avg Loss: 0.00733489, Time: 0.1172 Steps: 79800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019040, Sample Num: 304640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00195773, Log Avg loss: 0.00000495, Global Avg Loss: 0.00731656, Time: 0.1665 Steps: 80000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019240, Sample Num: 307840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00193742, Log Avg loss: 0.00000382, Global Avg Loss: 0.00729832, Time: 0.2487 Steps: 80200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 019440, Sample Num: 311040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00191751, Log Avg loss: 0.00000176, Global Avg Loss: 0.00728017, Time: 0.4211 Steps: 80400, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 019640, Sample Num: 314240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00189802, Log Avg loss: 0.00000346, Global Avg Loss: 0.00726212, Time: 0.0810 Steps: 80600, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 019840, Sample Num: 317440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00187889, Log Avg loss: 0.00000040, Global Avg Loss: 0.00724414, Time: 0.1159 Steps: 80800, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 020040, Sample Num: 320640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00186014, Log Avg loss: 0.00000020, Global Avg Loss: 0.00722626, Time: 0.0641 Steps: 81000, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 020240, Sample Num: 323840, Cur Loss: 0.00000001, Cur Avg Loss: 0.00190684, Log Avg loss: 0.00658640, Global Avg Loss: 0.00722468, Time: 0.4332 Steps: 81200, Updated lr: 0.000060 ***** Running evaluation checkpoint-81280 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-81280 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4137.115569, Avg time per batch (s): 0.200000 {"eval_avg_loss": 0.01993, "eval_total_loss": 54.011375, "eval_acc": 0.99797, "eval_prec": 0.998939, "eval_recall": 0.997008, "eval_f1": 0.997972, "eval_roc_auc": 0.999879, "eval_pr_auc": 0.999834, "eval_confusion_matrix": {"tn": 21604, "fp": 23, "fn": 65, "tp": 21657}, "eval_mcc2": 0.995942, "eval_mcc": 0.995942, "eval_sn": 0.997008, "eval_sp": 0.998937, "update_flag": false, "test_avg_loss": 0.015864, "test_total_loss": 64.470198, "test_acc": 0.998278, "test_prec": 0.999076, "test_recall": 0.997477, "test_f1": 0.998276, "test_roc_auc": 0.999949, "test_pr_auc": 0.999953, "test_confusion_matrix": {"tn": 32487, "fp": 30, "fn": 82, "tp": 32423}, "test_mcc2": 0.996556, "test_mcc": 0.996556, "test_sn": 0.997477, "test_sp": 0.999077, "lr": 6.005911330049261e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.007229863548323162, "train_cur_epoch_loss": 39.59366667006459, "train_cur_epoch_avg_loss": 0.0019485072180149899, "train_cur_epoch_time": 4137.115569114685, "train_cur_epoch_avg_time": 0.20359820714147073, "epoch": 4, "step": 81280} ################################################## Training, Epoch: 0005, Batch: 000120, Sample Num: 1920, Cur Loss: 0.00000002, Cur Avg Loss: 0.00000023, Log Avg loss: 0.00499638, Global Avg Loss: 0.00721921, Time: 0.5090 Steps: 81400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000320, Sample Num: 5120, Cur Loss: 0.00000023, Cur Avg Loss: 0.00025970, Log Avg loss: 0.00041538, Global Avg Loss: 0.00720253, Time: 0.5182 Steps: 81600, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00000048, Cur Avg Loss: 0.00015993, Log Avg loss: 0.00000030, Global Avg Loss: 0.00718492, Time: 0.5651 Steps: 81800, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000720, Sample Num: 11520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00011552, Log Avg loss: 0.00000004, Global Avg Loss: 0.00716740, Time: 0.0641 Steps: 82000, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000920, Sample Num: 14720, Cur Loss: 0.00001603, Cur Avg Loss: 0.00009549, Log Avg loss: 0.00002337, Global Avg Loss: 0.00715001, Time: 0.2664 Steps: 82200, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 001120, Sample Num: 17920, Cur Loss: 0.00000012, Cur Avg Loss: 0.00007869, Log Avg loss: 0.00000143, Global Avg Loss: 0.00713266, Time: 0.0636 Steps: 82400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 001320, Sample Num: 21120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00006680, Log Avg loss: 0.00000021, Global Avg Loss: 0.00711539, Time: 0.1080 Steps: 82600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00005824, Log Avg loss: 0.00000171, Global Avg Loss: 0.00709821, Time: 0.2571 Steps: 82800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001720, Sample Num: 27520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00005152, Log Avg loss: 0.00000052, Global Avg Loss: 0.00708111, Time: 0.2685 Steps: 83000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00004616, Log Avg loss: 0.00000006, Global Avg Loss: 0.00706409, Time: 0.0781 Steps: 83200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002120, Sample Num: 33920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00005641, Log Avg loss: 0.00015472, Global Avg Loss: 0.00704752, Time: 0.0848 Steps: 83400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00005176, Log Avg loss: 0.00000249, Global Avg Loss: 0.00703066, Time: 0.3831 Steps: 83600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002520, Sample Num: 40320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00004766, Log Avg loss: 0.00000008, Global Avg Loss: 0.00701388, Time: 0.2583 Steps: 83800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00003591, Cur Avg Loss: 0.00042229, Log Avg loss: 0.00514267, Global Avg Loss: 0.00700943, Time: 0.1366 Steps: 84000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002920, Sample Num: 46720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00086463, Log Avg loss: 0.00688047, Global Avg Loss: 0.00700912, Time: 0.2530 Steps: 84200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 003120, Sample Num: 49920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124937, Log Avg loss: 0.00686661, Global Avg Loss: 0.00700878, Time: 0.2308 Steps: 84400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 003320, Sample Num: 53120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148180, Log Avg loss: 0.00510768, Global Avg Loss: 0.00700429, Time: 0.0902 Steps: 84600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00000006, Cur Avg Loss: 0.00140073, Log Avg loss: 0.00005487, Global Avg Loss: 0.00698790, Time: 0.3333 Steps: 84800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003720, Sample Num: 59520, Cur Loss: 0.00000012, Cur Avg Loss: 0.00133703, Log Avg loss: 0.00021600, Global Avg Loss: 0.00697197, Time: 0.2161 Steps: 85000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003920, Sample Num: 62720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00143805, Log Avg loss: 0.00331700, Global Avg Loss: 0.00696339, Time: 0.1155 Steps: 85200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00000006, Cur Avg Loss: 0.00137139, Log Avg loss: 0.00006479, Global Avg Loss: 0.00694723, Time: 0.2783 Steps: 85400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004320, Sample Num: 69120, Cur Loss: 0.00000001, Cur Avg Loss: 0.00130806, Log Avg loss: 0.00000345, Global Avg Loss: 0.00693101, Time: 0.3526 Steps: 85600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00000006, Cur Avg Loss: 0.00149615, Log Avg loss: 0.00555906, Global Avg Loss: 0.00692781, Time: 0.0917 Steps: 85800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004720, Sample Num: 75520, Cur Loss: 0.00000075, Cur Avg Loss: 0.00172661, Log Avg loss: 0.00693486, Global Avg Loss: 0.00692782, Time: 0.0568 Steps: 86000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 004920, Sample Num: 78720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165716, Log Avg loss: 0.00001809, Global Avg Loss: 0.00691179, Time: 0.4058 Steps: 86200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 005120, Sample Num: 81920, Cur Loss: 0.00000115, Cur Avg Loss: 0.00170868, Log Avg loss: 0.00297609, Global Avg Loss: 0.00690268, Time: 0.1995 Steps: 86400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 005320, Sample Num: 85120, Cur Loss: 0.00000002, Cur Avg Loss: 0.00164475, Log Avg loss: 0.00000829, Global Avg Loss: 0.00688676, Time: 0.2899 Steps: 86600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005520, Sample Num: 88320, Cur Loss: 0.00000004, Cur Avg Loss: 0.00158518, Log Avg loss: 0.00000046, Global Avg Loss: 0.00687089, Time: 0.2742 Steps: 86800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005720, Sample Num: 91520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164460, Log Avg loss: 0.00328476, Global Avg Loss: 0.00686265, Time: 0.2937 Steps: 87000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 005920, Sample Num: 94720, Cur Loss: 0.00000006, Cur Avg Loss: 0.00187375, Log Avg loss: 0.00842741, Global Avg Loss: 0.00686624, Time: 0.1183 Steps: 87200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006120, Sample Num: 97920, Cur Loss: 0.00000081, Cur Avg Loss: 0.00214134, Log Avg loss: 0.01006206, Global Avg Loss: 0.00687355, Time: 0.1731 Steps: 87400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006320, Sample Num: 101120, Cur Loss: 0.00000012, Cur Avg Loss: 0.00207392, Log Avg loss: 0.00001070, Global Avg Loss: 0.00685788, Time: 0.4002 Steps: 87600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006520, Sample Num: 104320, Cur Loss: 0.00000003, Cur Avg Loss: 0.00201135, Log Avg loss: 0.00003412, Global Avg Loss: 0.00684234, Time: 0.4023 Steps: 87800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006720, Sample Num: 107520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00195150, Log Avg loss: 0.00000051, Global Avg Loss: 0.00682679, Time: 0.3843 Steps: 88000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 006920, Sample Num: 110720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00189510, Log Avg loss: 0.00000007, Global Avg Loss: 0.00681131, Time: 0.2629 Steps: 88200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 007120, Sample Num: 113920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00184187, Log Avg loss: 0.00000015, Global Avg Loss: 0.00679590, Time: 0.3188 Steps: 88400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 007320, Sample Num: 117120, Cur Loss: 0.00000030, Cur Avg Loss: 0.00181724, Log Avg loss: 0.00094021, Global Avg Loss: 0.00678268, Time: 0.1065 Steps: 88600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007520, Sample Num: 120320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00204129, Log Avg loss: 0.01024162, Global Avg Loss: 0.00679047, Time: 0.1122 Steps: 88800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007720, Sample Num: 123520, Cur Loss: 0.00000006, Cur Avg Loss: 0.00215941, Log Avg loss: 0.00660087, Global Avg Loss: 0.00679004, Time: 0.1218 Steps: 89000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 007920, Sample Num: 126720, Cur Loss: 0.00000081, Cur Avg Loss: 0.00231526, Log Avg loss: 0.00833097, Global Avg Loss: 0.00679350, Time: 0.1732 Steps: 89200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008120, Sample Num: 129920, Cur Loss: 0.00000026, Cur Avg Loss: 0.00234992, Log Avg loss: 0.00372261, Global Avg Loss: 0.00678663, Time: 0.2623 Steps: 89400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008320, Sample Num: 133120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00229411, Log Avg loss: 0.00002787, Global Avg Loss: 0.00677154, Time: 0.5991 Steps: 89600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008520, Sample Num: 136320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00224029, Log Avg loss: 0.00000140, Global Avg Loss: 0.00675646, Time: 0.2116 Steps: 89800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008720, Sample Num: 139520, Cur Loss: 0.00000006, Cur Avg Loss: 0.00218893, Log Avg loss: 0.00000137, Global Avg Loss: 0.00674145, Time: 0.1122 Steps: 90000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 008920, Sample Num: 142720, Cur Loss: 0.00000024, Cur Avg Loss: 0.00213986, Log Avg loss: 0.00000006, Global Avg Loss: 0.00672651, Time: 0.4043 Steps: 90200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 009120, Sample Num: 145920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00213637, Log Avg loss: 0.00198093, Global Avg Loss: 0.00671601, Time: 0.1308 Steps: 90400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 009320, Sample Num: 149120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00209053, Log Avg loss: 0.00000004, Global Avg Loss: 0.00670118, Time: 0.2606 Steps: 90600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009520, Sample Num: 152320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00213593, Log Avg loss: 0.00425157, Global Avg Loss: 0.00669579, Time: 0.4053 Steps: 90800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009720, Sample Num: 155520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00209198, Log Avg loss: 0.00000006, Global Avg Loss: 0.00668107, Time: 0.3715 Steps: 91000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 009920, Sample Num: 158720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00204980, Log Avg loss: 0.00000010, Global Avg Loss: 0.00666642, Time: 0.3507 Steps: 91200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010120, Sample Num: 161920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00200930, Log Avg loss: 0.00000036, Global Avg Loss: 0.00665183, Time: 0.4816 Steps: 91400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010320, Sample Num: 165120, Cur Loss: 0.00000018, Cur Avg Loss: 0.00197036, Log Avg loss: 0.00000002, Global Avg Loss: 0.00663731, Time: 0.1412 Steps: 91600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010520, Sample Num: 168320, Cur Loss: 0.00000013, Cur Avg Loss: 0.00196534, Log Avg loss: 0.00170606, Global Avg Loss: 0.00662656, Time: 0.1270 Steps: 91800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010720, Sample Num: 171520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00192887, Log Avg loss: 0.00001057, Global Avg Loss: 0.00661218, Time: 0.2597 Steps: 92000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 010920, Sample Num: 174720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00189355, Log Avg loss: 0.00000025, Global Avg Loss: 0.00659784, Time: 0.0821 Steps: 92200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 011120, Sample Num: 177920, Cur Loss: 0.00000001, Cur Avg Loss: 0.00210340, Log Avg loss: 0.01356130, Global Avg Loss: 0.00661291, Time: 0.0955 Steps: 92400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 011320, Sample Num: 181120, Cur Loss: 0.00002486, Cur Avg Loss: 0.00209033, Log Avg loss: 0.00136400, Global Avg Loss: 0.00660158, Time: 0.1978 Steps: 92600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011520, Sample Num: 184320, Cur Loss: 0.00000020, Cur Avg Loss: 0.00212267, Log Avg loss: 0.00395320, Global Avg Loss: 0.00659587, Time: 0.3376 Steps: 92800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011720, Sample Num: 187520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00208654, Log Avg loss: 0.00000507, Global Avg Loss: 0.00658169, Time: 0.1526 Steps: 93000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 011920, Sample Num: 190720, Cur Loss: 0.00000002, Cur Avg Loss: 0.00205170, Log Avg loss: 0.00001048, Global Avg Loss: 0.00656759, Time: 0.1387 Steps: 93200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012120, Sample Num: 193920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00202266, Log Avg loss: 0.00029178, Global Avg Loss: 0.00655415, Time: 0.0933 Steps: 93400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012320, Sample Num: 197120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00198985, Log Avg loss: 0.00000127, Global Avg Loss: 0.00654015, Time: 0.3415 Steps: 93600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012520, Sample Num: 200320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00195809, Log Avg loss: 0.00000173, Global Avg Loss: 0.00652621, Time: 0.0854 Steps: 93800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012720, Sample Num: 203520, Cur Loss: 0.00000004, Cur Avg Loss: 0.00202677, Log Avg loss: 0.00632636, Global Avg Loss: 0.00652579, Time: 0.3784 Steps: 94000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 012920, Sample Num: 206720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00199541, Log Avg loss: 0.00000064, Global Avg Loss: 0.00651193, Time: 0.4775 Steps: 94200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 013120, Sample Num: 209920, Cur Loss: 0.00000012, Cur Avg Loss: 0.00202502, Log Avg loss: 0.00393825, Global Avg Loss: 0.00650648, Time: 0.3768 Steps: 94400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 013320, Sample Num: 213120, Cur Loss: 0.00000018, Cur Avg Loss: 0.00199467, Log Avg loss: 0.00000320, Global Avg Loss: 0.00649273, Time: 0.1330 Steps: 94600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013520, Sample Num: 216320, Cur Loss: 0.00000051, Cur Avg Loss: 0.00196527, Log Avg loss: 0.00000733, Global Avg Loss: 0.00647905, Time: 0.4137 Steps: 94800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013720, Sample Num: 219520, Cur Loss: 0.00000008, Cur Avg Loss: 0.00193663, Log Avg loss: 0.00000034, Global Avg Loss: 0.00646541, Time: 0.0759 Steps: 95000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 013920, Sample Num: 222720, Cur Loss: 0.00000007, Cur Avg Loss: 0.00195986, Log Avg loss: 0.00355393, Global Avg Loss: 0.00645929, Time: 0.2532 Steps: 95200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014120, Sample Num: 225920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00193211, Log Avg loss: 0.00000066, Global Avg Loss: 0.00644575, Time: 0.2596 Steps: 95400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014320, Sample Num: 229120, Cur Loss: 0.00001576, Cur Avg Loss: 0.00192575, Log Avg loss: 0.00147657, Global Avg Loss: 0.00643536, Time: 0.2631 Steps: 95600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014520, Sample Num: 232320, Cur Loss: 0.00000006, Cur Avg Loss: 0.00189923, Log Avg loss: 0.00000058, Global Avg Loss: 0.00642192, Time: 0.4890 Steps: 95800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014720, Sample Num: 235520, Cur Loss: 0.00000012, Cur Avg Loss: 0.00187745, Log Avg loss: 0.00029626, Global Avg Loss: 0.00640916, Time: 0.3428 Steps: 96000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 014920, Sample Num: 238720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00185748, Log Avg loss: 0.00038781, Global Avg Loss: 0.00639664, Time: 0.0827 Steps: 96200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015120, Sample Num: 241920, Cur Loss: 0.00000018, Cur Avg Loss: 0.00183295, Log Avg loss: 0.00000292, Global Avg Loss: 0.00638338, Time: 0.4710 Steps: 96400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015320, Sample Num: 245120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00180904, Log Avg loss: 0.00000104, Global Avg Loss: 0.00637016, Time: 0.0682 Steps: 96600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 015520, Sample Num: 248320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00178573, Log Avg loss: 0.00000011, Global Avg Loss: 0.00635700, Time: 0.1014 Steps: 96800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 015720, Sample Num: 251520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00178285, Log Avg loss: 0.00155990, Global Avg Loss: 0.00634711, Time: 0.0782 Steps: 97000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 015920, Sample Num: 254720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00176051, Log Avg loss: 0.00000434, Global Avg Loss: 0.00633406, Time: 0.4974 Steps: 97200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016120, Sample Num: 257920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00174006, Log Avg loss: 0.00011214, Global Avg Loss: 0.00632128, Time: 0.0999 Steps: 97400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016320, Sample Num: 261120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00171874, Log Avg loss: 0.00000008, Global Avg Loss: 0.00630833, Time: 0.4259 Steps: 97600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016520, Sample Num: 264320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00169795, Log Avg loss: 0.00000144, Global Avg Loss: 0.00629543, Time: 0.2630 Steps: 97800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016720, Sample Num: 267520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167764, Log Avg loss: 0.00000004, Global Avg Loss: 0.00628259, Time: 0.3099 Steps: 98000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 016920, Sample Num: 270720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165781, Log Avg loss: 0.00000028, Global Avg Loss: 0.00626979, Time: 0.3300 Steps: 98200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017120, Sample Num: 273920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00174221, Log Avg loss: 0.00888241, Global Avg Loss: 0.00627510, Time: 0.0848 Steps: 98400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017320, Sample Num: 277120, Cur Loss: 0.00000001, Cur Avg Loss: 0.00177383, Log Avg loss: 0.00448054, Global Avg Loss: 0.00627146, Time: 0.1491 Steps: 98600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 017520, Sample Num: 280320, Cur Loss: 0.00000018, Cur Avg Loss: 0.00175358, Log Avg loss: 0.00000010, Global Avg Loss: 0.00625877, Time: 0.2595 Steps: 98800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 017720, Sample Num: 283520, Cur Loss: 0.00000004, Cur Avg Loss: 0.00173385, Log Avg loss: 0.00000518, Global Avg Loss: 0.00624613, Time: 0.1500 Steps: 99000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 017920, Sample Num: 286720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00171450, Log Avg loss: 0.00000024, Global Avg Loss: 0.00623354, Time: 0.4213 Steps: 99200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018120, Sample Num: 289920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00169560, Log Avg loss: 0.00000191, Global Avg Loss: 0.00622100, Time: 0.1011 Steps: 99400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018320, Sample Num: 293120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167709, Log Avg loss: 0.00000022, Global Avg Loss: 0.00620851, Time: 0.3048 Steps: 99600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018520, Sample Num: 296320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170457, Log Avg loss: 0.00422228, Global Avg Loss: 0.00620453, Time: 0.1295 Steps: 99800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018720, Sample Num: 299520, Cur Loss: 0.00000030, Cur Avg Loss: 0.00168636, Log Avg loss: 0.00000017, Global Avg Loss: 0.00619212, Time: 0.0886 Steps: 100000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 018920, Sample Num: 302720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00168412, Log Avg loss: 0.00147364, Global Avg Loss: 0.00618270, Time: 0.3595 Steps: 100200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019120, Sample Num: 305920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166652, Log Avg loss: 0.00000198, Global Avg Loss: 0.00617039, Time: 0.2628 Steps: 100400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019320, Sample Num: 309120, Cur Loss: 0.00000006, Cur Avg Loss: 0.00164927, Log Avg loss: 0.00000008, Global Avg Loss: 0.00615812, Time: 0.1277 Steps: 100600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 019520, Sample Num: 312320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00163237, Log Avg loss: 0.00000024, Global Avg Loss: 0.00614591, Time: 0.0823 Steps: 100800, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 019720, Sample Num: 315520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161582, Log Avg loss: 0.00000011, Global Avg Loss: 0.00613374, Time: 0.2090 Steps: 101000, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 019920, Sample Num: 318720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159960, Log Avg loss: 0.00000001, Global Avg Loss: 0.00612161, Time: 0.3920 Steps: 101200, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 020120, Sample Num: 321920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158370, Log Avg loss: 0.00000002, Global Avg Loss: 0.00610954, Time: 0.0596 Steps: 101400, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 020320, Sample Num: 325113, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167157, Log Avg loss: 0.01051160, Global Avg Loss: 0.00611820, Time: 0.1556 Steps: 101600, Updated lr: 0.000050 ***** Running evaluation checkpoint-101600 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-101600 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4146.688583, Avg time per batch (s): 0.200000 {"eval_avg_loss": 0.021627, "eval_total_loss": 58.609657, "eval_acc": 0.99797, "eval_prec": 0.999077, "eval_recall": 0.99687, "eval_f1": 0.997972, "eval_roc_auc": 0.999913, "eval_pr_auc": 0.999892, "eval_confusion_matrix": {"tn": 21607, "fp": 20, "fn": 68, "tp": 21654}, "eval_mcc2": 0.995942, "eval_mcc": 0.995942, "eval_sn": 0.99687, "eval_sp": 0.999075, "update_flag": false, "test_avg_loss": 0.017202, "test_total_loss": 69.909491, "test_acc": 0.99817, "test_prec": 0.999229, "test_recall": 0.997108, "test_f1": 0.998168, "test_roc_auc": 0.99996, "test_pr_auc": 0.999963, "test_confusion_matrix": {"tn": 32492, "fp": 25, "fn": 94, "tp": 32411}, "test_mcc2": 0.996342, "test_mcc": 0.996342, "test_sn": 0.997108, "test_sp": 0.999231, "lr": 5.0049261083743846e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.006118204525778731, "train_cur_epoch_loss": 33.96627061139995, "train_cur_epoch_avg_loss": 0.0016715684356003913, "train_cur_epoch_time": 4146.688582897186, "train_cur_epoch_avg_time": 0.20406932002446782, "epoch": 5, "step": 101600} ################################################## Training, Epoch: 0006, Batch: 000200, Sample Num: 3200, Cur Loss: 0.00000011, Cur Avg Loss: 0.00000151, Log Avg loss: 0.00000151, Global Avg Loss: 0.00610619, Time: 0.4012 Steps: 101800, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000400, Sample Num: 6400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000469, Log Avg loss: 0.00000786, Global Avg Loss: 0.00609423, Time: 0.0753 Steps: 102000, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000600, Sample Num: 9600, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000314, Log Avg loss: 0.00000005, Global Avg Loss: 0.00608230, Time: 0.0782 Steps: 102200, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000800, Sample Num: 12800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000236, Log Avg loss: 0.00000001, Global Avg Loss: 0.00607042, Time: 0.2629 Steps: 102400, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 001000, Sample Num: 16000, Cur Loss: 0.00000002, Cur Avg Loss: 0.00000189, Log Avg loss: 0.00000002, Global Avg Loss: 0.00605859, Time: 0.0831 Steps: 102600, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 001200, Sample Num: 19200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000164, Log Avg loss: 0.00000039, Global Avg Loss: 0.00604680, Time: 0.3793 Steps: 102800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001400, Sample Num: 22400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000142, Log Avg loss: 0.00000009, Global Avg Loss: 0.00603506, Time: 0.2590 Steps: 103000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000148, Log Avg loss: 0.00000187, Global Avg Loss: 0.00602337, Time: 0.1685 Steps: 103200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001800, Sample Num: 28800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000136, Log Avg loss: 0.00000044, Global Avg Loss: 0.00601172, Time: 0.2161 Steps: 103400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000123, Log Avg loss: 0.00000001, Global Avg Loss: 0.00600012, Time: 0.0685 Steps: 103600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002200, Sample Num: 35200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00059464, Log Avg loss: 0.00652883, Global Avg Loss: 0.00600113, Time: 0.1258 Steps: 103800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002400, Sample Num: 38400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00054509, Log Avg loss: 0.00000002, Global Avg Loss: 0.00598959, Time: 0.1304 Steps: 104000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00050605, Log Avg loss: 0.00003759, Global Avg Loss: 0.00597817, Time: 0.3965 Steps: 104200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00090915, Log Avg loss: 0.00614942, Global Avg Loss: 0.00597850, Time: 0.0889 Steps: 104400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102996, Log Avg loss: 0.00272131, Global Avg Loss: 0.00597227, Time: 0.0808 Steps: 104600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00000002, Cur Avg Loss: 0.00096568, Log Avg loss: 0.00000141, Global Avg Loss: 0.00596088, Time: 0.3495 Steps: 104800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003400, Sample Num: 54400, Cur Loss: 0.00000006, Cur Avg Loss: 0.00115424, Log Avg loss: 0.00417131, Global Avg Loss: 0.00595747, Time: 0.0912 Steps: 105000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003600, Sample Num: 57600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109019, Log Avg loss: 0.00000121, Global Avg Loss: 0.00594614, Time: 0.1216 Steps: 105200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003800, Sample Num: 60800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00120738, Log Avg loss: 0.00331683, Global Avg Loss: 0.00594115, Time: 0.0856 Steps: 105400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00000004, Cur Avg Loss: 0.00115331, Log Avg loss: 0.00012607, Global Avg Loss: 0.00593014, Time: 0.1019 Steps: 105600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004200, Sample Num: 67200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109842, Log Avg loss: 0.00000052, Global Avg Loss: 0.00591893, Time: 0.4144 Steps: 105800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130708, Log Avg loss: 0.00568889, Global Avg Loss: 0.00591850, Time: 0.0565 Steps: 106000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00000017, Cur Avg Loss: 0.00155122, Log Avg loss: 0.00692238, Global Avg Loss: 0.00592039, Time: 0.1243 Steps: 106200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 004800, Sample Num: 76800, Cur Loss: 0.00000051, Cur Avg Loss: 0.00148665, Log Avg loss: 0.00000159, Global Avg Loss: 0.00590926, Time: 0.3718 Steps: 106400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 005000, Sample Num: 80000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00144673, Log Avg loss: 0.00048872, Global Avg Loss: 0.00589909, Time: 0.0857 Steps: 106600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00139131, Log Avg loss: 0.00000571, Global Avg Loss: 0.00588806, Time: 0.3987 Steps: 106800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134043, Log Avg loss: 0.00001749, Global Avg Loss: 0.00587708, Time: 0.3409 Steps: 107000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005600, Sample Num: 89600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129292, Log Avg loss: 0.00001010, Global Avg Loss: 0.00586614, Time: 0.2903 Steps: 107200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 005800, Sample Num: 92800, Cur Loss: 0.00000161, Cur Avg Loss: 0.00148015, Log Avg loss: 0.00672273, Global Avg Loss: 0.00586773, Time: 0.2873 Steps: 107400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006000, Sample Num: 96000, Cur Loss: 0.00000005, Cur Avg Loss: 0.00163146, Log Avg loss: 0.00601933, Global Avg Loss: 0.00586801, Time: 0.1336 Steps: 107600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006200, Sample Num: 99200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00183571, Log Avg loss: 0.00796317, Global Avg Loss: 0.00587190, Time: 0.4546 Steps: 107800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00000010, Cur Avg Loss: 0.00178054, Log Avg loss: 0.00007037, Global Avg Loss: 0.00586116, Time: 0.1877 Steps: 108000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006600, Sample Num: 105600, Cur Loss: 0.00000005, Cur Avg Loss: 0.00172659, Log Avg loss: 0.00000013, Global Avg Loss: 0.00585032, Time: 0.2622 Steps: 108200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 006800, Sample Num: 108800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167581, Log Avg loss: 0.00000010, Global Avg Loss: 0.00583953, Time: 0.1376 Steps: 108400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162793, Log Avg loss: 0.00000001, Global Avg Loss: 0.00582878, Time: 0.1102 Steps: 108600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007200, Sample Num: 115200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158271, Log Avg loss: 0.00000001, Global Avg Loss: 0.00581806, Time: 0.0857 Steps: 108800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153997, Log Avg loss: 0.00000133, Global Avg Loss: 0.00580739, Time: 0.4804 Steps: 109000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00000006, Cur Avg Loss: 0.00177317, Log Avg loss: 0.01040158, Global Avg Loss: 0.00581580, Time: 0.3040 Steps: 109200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 007800, Sample Num: 124800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00183141, Log Avg loss: 0.00404475, Global Avg Loss: 0.00581257, Time: 0.3309 Steps: 109400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00000036, Cur Avg Loss: 0.00194399, Log Avg loss: 0.00633454, Global Avg Loss: 0.00581352, Time: 0.6454 Steps: 109600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00000001, Cur Avg Loss: 0.00189710, Log Avg loss: 0.00002121, Global Avg Loss: 0.00580297, Time: 0.4135 Steps: 109800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008400, Sample Num: 134400, Cur Loss: 0.00000022, Cur Avg Loss: 0.00185217, Log Avg loss: 0.00001034, Global Avg Loss: 0.00579243, Time: 0.2627 Steps: 110000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008600, Sample Num: 137600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00180913, Log Avg loss: 0.00000150, Global Avg Loss: 0.00578192, Time: 0.3036 Steps: 110200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00176802, Log Avg loss: 0.00000008, Global Avg Loss: 0.00577145, Time: 0.2565 Steps: 110400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00000003, Cur Avg Loss: 0.00172886, Log Avg loss: 0.00000569, Global Avg Loss: 0.00576102, Time: 0.0794 Steps: 110600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00169127, Log Avg loss: 0.00000003, Global Avg Loss: 0.00575063, Time: 0.1242 Steps: 110800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165529, Log Avg loss: 0.00000006, Global Avg Loss: 0.00574026, Time: 0.4340 Steps: 111000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00169498, Log Avg loss: 0.00356030, Global Avg Loss: 0.00573634, Time: 0.1678 Steps: 111200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 009800, Sample Num: 156800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166039, Log Avg loss: 0.00000024, Global Avg Loss: 0.00572604, Time: 0.2607 Steps: 111400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162747, Log Avg loss: 0.00001434, Global Avg Loss: 0.00571581, Time: 0.2343 Steps: 111600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010200, Sample Num: 163200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159557, Log Avg loss: 0.00000039, Global Avg Loss: 0.00570558, Time: 0.2623 Steps: 111800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010400, Sample Num: 166400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00156698, Log Avg loss: 0.00010895, Global Avg Loss: 0.00569559, Time: 0.2034 Steps: 112000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00156171, Log Avg loss: 0.00128774, Global Avg Loss: 0.00568773, Time: 0.2263 Steps: 112200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153279, Log Avg loss: 0.00000003, Global Avg Loss: 0.00567761, Time: 0.2576 Steps: 112400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00150503, Log Avg loss: 0.00000597, Global Avg Loss: 0.00566754, Time: 0.4081 Steps: 112600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00000005, Cur Avg Loss: 0.00169017, Log Avg loss: 0.01187315, Global Avg Loss: 0.00567854, Time: 0.5159 Steps: 112800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00000003, Cur Avg Loss: 0.00169147, Log Avg loss: 0.00176378, Global Avg Loss: 0.00567161, Time: 0.0736 Steps: 113000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00171683, Log Avg loss: 0.00316284, Global Avg Loss: 0.00566718, Time: 0.0812 Steps: 113200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 011800, Sample Num: 188800, Cur Loss: 0.00000008, Cur Avg Loss: 0.00168782, Log Avg loss: 0.00000481, Global Avg Loss: 0.00565719, Time: 0.0844 Steps: 113400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012000, Sample Num: 192000, Cur Loss: 0.00000001, Cur Avg Loss: 0.00165969, Log Avg loss: 0.00000008, Global Avg Loss: 0.00564723, Time: 0.0911 Steps: 113600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012200, Sample Num: 195200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00163302, Log Avg loss: 0.00003306, Global Avg Loss: 0.00563737, Time: 0.1393 Steps: 113800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012400, Sample Num: 198400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160669, Log Avg loss: 0.00000061, Global Avg Loss: 0.00562748, Time: 0.0872 Steps: 114000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012600, Sample Num: 201600, Cur Loss: 0.00000008, Cur Avg Loss: 0.00158119, Log Avg loss: 0.00000014, Global Avg Loss: 0.00561762, Time: 0.1452 Steps: 114200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 012800, Sample Num: 204800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164327, Log Avg loss: 0.00555395, Global Avg Loss: 0.00561751, Time: 0.1218 Steps: 114400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013000, Sample Num: 208000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161882, Log Avg loss: 0.00005409, Global Avg Loss: 0.00560780, Time: 0.1985 Steps: 114600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013200, Sample Num: 211200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170148, Log Avg loss: 0.00707453, Global Avg Loss: 0.00561036, Time: 0.0790 Steps: 114800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 013400, Sample Num: 214400, Cur Loss: 0.00000094, Cur Avg Loss: 0.00167609, Log Avg loss: 0.00000023, Global Avg Loss: 0.00560060, Time: 0.2618 Steps: 115000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 013600, Sample Num: 217600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165145, Log Avg loss: 0.00000041, Global Avg Loss: 0.00559088, Time: 0.2836 Steps: 115200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 013800, Sample Num: 220800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162752, Log Avg loss: 0.00000087, Global Avg Loss: 0.00558119, Time: 0.1689 Steps: 115400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014000, Sample Num: 224000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00163561, Log Avg loss: 0.00219362, Global Avg Loss: 0.00557533, Time: 0.2670 Steps: 115600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014200, Sample Num: 227200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161259, Log Avg loss: 0.00000109, Global Avg Loss: 0.00556570, Time: 0.0739 Steps: 115800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014400, Sample Num: 230400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159065, Log Avg loss: 0.00003262, Global Avg Loss: 0.00555616, Time: 0.0844 Steps: 116000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014600, Sample Num: 233600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00156886, Log Avg loss: 0.00000001, Global Avg Loss: 0.00554660, Time: 0.4278 Steps: 116200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 014800, Sample Num: 236800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154766, Log Avg loss: 0.00000000, Global Avg Loss: 0.00553707, Time: 0.0679 Steps: 116400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015000, Sample Num: 240000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00153695, Log Avg loss: 0.00074441, Global Avg Loss: 0.00552885, Time: 0.4175 Steps: 116600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015200, Sample Num: 243200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00151672, Log Avg loss: 0.00000017, Global Avg Loss: 0.00551938, Time: 0.4054 Steps: 116800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 015400, Sample Num: 246400, Cur Loss: 0.00000316, Cur Avg Loss: 0.00149703, Log Avg loss: 0.00000005, Global Avg Loss: 0.00550995, Time: 0.2616 Steps: 117000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 015600, Sample Num: 249600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147784, Log Avg loss: 0.00000004, Global Avg Loss: 0.00550054, Time: 0.4495 Steps: 117200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 015800, Sample Num: 252800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00145949, Log Avg loss: 0.00002885, Global Avg Loss: 0.00549122, Time: 0.0652 Steps: 117400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016000, Sample Num: 256000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00144125, Log Avg loss: 0.00000012, Global Avg Loss: 0.00548188, Time: 0.1958 Steps: 117600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016200, Sample Num: 259200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00142346, Log Avg loss: 0.00000000, Global Avg Loss: 0.00547258, Time: 0.0879 Steps: 117800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016400, Sample Num: 262400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140610, Log Avg loss: 0.00000000, Global Avg Loss: 0.00546330, Time: 0.3891 Steps: 118000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016600, Sample Num: 265600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00138916, Log Avg loss: 0.00000002, Global Avg Loss: 0.00545406, Time: 0.0832 Steps: 118200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 016800, Sample Num: 268800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137262, Log Avg loss: 0.00000005, Global Avg Loss: 0.00544484, Time: 0.2861 Steps: 118400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017000, Sample Num: 272000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135647, Log Avg loss: 0.00000000, Global Avg Loss: 0.00543566, Time: 0.0664 Steps: 118600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017200, Sample Num: 275200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148990, Log Avg loss: 0.01283134, Global Avg Loss: 0.00544811, Time: 0.0538 Steps: 118800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 017400, Sample Num: 278400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147278, Log Avg loss: 0.00000031, Global Avg Loss: 0.00543896, Time: 0.0915 Steps: 119000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 017600, Sample Num: 281600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00145605, Log Avg loss: 0.00000032, Global Avg Loss: 0.00542983, Time: 0.2001 Steps: 119200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 017800, Sample Num: 284800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00143972, Log Avg loss: 0.00000260, Global Avg Loss: 0.00542074, Time: 0.4197 Steps: 119400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018000, Sample Num: 288000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00142373, Log Avg loss: 0.00000113, Global Avg Loss: 0.00541168, Time: 0.2632 Steps: 119600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018200, Sample Num: 291200, Cur Loss: 0.00000012, Cur Avg Loss: 0.00140809, Log Avg loss: 0.00000002, Global Avg Loss: 0.00540264, Time: 0.2817 Steps: 119800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018400, Sample Num: 294400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00139278, Log Avg loss: 0.00000004, Global Avg Loss: 0.00539364, Time: 0.2970 Steps: 120000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018600, Sample Num: 297600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140469, Log Avg loss: 0.00249996, Global Avg Loss: 0.00538883, Time: 0.1306 Steps: 120200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 018800, Sample Num: 300800, Cur Loss: 0.00000006, Cur Avg Loss: 0.00140184, Log Avg loss: 0.00113677, Global Avg Loss: 0.00538176, Time: 0.1103 Steps: 120400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019000, Sample Num: 304000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00138708, Log Avg loss: 0.00000004, Global Avg Loss: 0.00537284, Time: 0.1174 Steps: 120600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019200, Sample Num: 307200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137263, Log Avg loss: 0.00000009, Global Avg Loss: 0.00536394, Time: 0.4056 Steps: 120800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 019400, Sample Num: 310400, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135848, Log Avg loss: 0.00000006, Global Avg Loss: 0.00535508, Time: 0.0684 Steps: 121000, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 019600, Sample Num: 313600, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135707, Log Avg loss: 0.00121975, Global Avg Loss: 0.00534825, Time: 0.1499 Steps: 121200, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 019800, Sample Num: 316800, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134336, Log Avg loss: 0.00000006, Global Avg Loss: 0.00533944, Time: 0.1473 Steps: 121400, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 020000, Sample Num: 320000, Cur Loss: 0.00000000, Cur Avg Loss: 0.00132993, Log Avg loss: 0.00000010, Global Avg Loss: 0.00533066, Time: 0.1416 Steps: 121600, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 020200, Sample Num: 323200, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135897, Log Avg loss: 0.00426291, Global Avg Loss: 0.00532891, Time: 0.2043 Steps: 121800, Updated lr: 0.000040 ***** Running evaluation checkpoint-121920 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-121920 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4126.816103, Avg time per batch (s): 0.200000 {"eval_avg_loss": 0.018596, "eval_total_loss": 50.396038, "eval_acc": 0.998085, "eval_prec": 0.998847, "eval_recall": 0.99733, "eval_f1": 0.998088, "eval_roc_auc": 0.999897, "eval_pr_auc": 0.999852, "eval_confusion_matrix": {"tn": 21602, "fp": 25, "fn": 58, "tp": 21664}, "eval_mcc2": 0.996172, "eval_mcc": 0.996172, "eval_sn": 0.99733, "eval_sp": 0.998844, "update_flag": false, "test_avg_loss": 0.01472, "test_total_loss": 59.821338, "test_acc": 0.998385, "test_prec": 0.998953, "test_recall": 0.997816, "test_f1": 0.998384, "test_roc_auc": 0.999946, "test_pr_auc": 0.999932, "test_confusion_matrix": {"tn": 32483, "fp": 34, "fn": 71, "tp": 32434}, "test_mcc2": 0.996771, "test_mcc": 0.996771, "test_sn": 0.997816, "test_sp": 0.998954, "lr": 4.003940886699508e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.005330855990732529, "train_cur_epoch_loss": 28.328382570982193, "train_cur_epoch_avg_loss": 0.0013941133155010923, "train_cur_epoch_time": 4126.8161034584045, "train_cur_epoch_avg_time": 0.2030913436741341, "epoch": 6, "step": 121920} ################################################## Training, Epoch: 0007, Batch: 000080, Sample Num: 1280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000005, Log Avg loss: 0.00438623, Global Avg Loss: 0.00532736, Time: 0.3000 Steps: 122000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000280, Sample Num: 4480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000460, Log Avg loss: 0.00000643, Global Avg Loss: 0.00531865, Time: 0.1211 Steps: 122200, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000480, Sample Num: 7680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000291, Log Avg loss: 0.00000055, Global Avg Loss: 0.00530996, Time: 0.0573 Steps: 122400, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000680, Sample Num: 10880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000207, Log Avg loss: 0.00000004, Global Avg Loss: 0.00530130, Time: 0.1163 Steps: 122600, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000880, Sample Num: 14080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000161, Log Avg loss: 0.00000004, Global Avg Loss: 0.00529267, Time: 0.4277 Steps: 122800, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 001080, Sample Num: 17280, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000131, Log Avg loss: 0.00000003, Global Avg Loss: 0.00528406, Time: 0.0786 Steps: 123000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 001280, Sample Num: 20480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000111, Log Avg loss: 0.00000002, Global Avg Loss: 0.00527548, Time: 0.4524 Steps: 123200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001480, Sample Num: 23680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000096, Log Avg loss: 0.00000002, Global Avg Loss: 0.00526693, Time: 0.1033 Steps: 123400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001680, Sample Num: 26880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000085, Log Avg loss: 0.00000004, Global Avg Loss: 0.00525841, Time: 0.4311 Steps: 123600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001880, Sample Num: 30080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000076, Log Avg loss: 0.00000000, Global Avg Loss: 0.00524991, Time: 0.2649 Steps: 123800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002080, Sample Num: 33280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00033538, Log Avg loss: 0.00348076, Global Avg Loss: 0.00524706, Time: 0.4168 Steps: 124000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002280, Sample Num: 36480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00030598, Log Avg loss: 0.00000025, Global Avg Loss: 0.00523861, Time: 0.1578 Steps: 124200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002480, Sample Num: 39680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00028131, Log Avg loss: 0.00000001, Global Avg Loss: 0.00523019, Time: 0.0781 Steps: 124400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002680, Sample Num: 42880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00026031, Log Avg loss: 0.00000002, Global Avg Loss: 0.00522179, Time: 0.3311 Steps: 124600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002880, Sample Num: 46080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00067099, Log Avg loss: 0.00617411, Global Avg Loss: 0.00522332, Time: 0.1727 Steps: 124800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 003080, Sample Num: 49280, Cur Loss: 0.00000006, Cur Avg Loss: 0.00102419, Log Avg loss: 0.00611018, Global Avg Loss: 0.00522474, Time: 0.0914 Steps: 125000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 003280, Sample Num: 52480, Cur Loss: 0.00000042, Cur Avg Loss: 0.00126105, Log Avg loss: 0.00490869, Global Avg Loss: 0.00522423, Time: 0.2262 Steps: 125200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003480, Sample Num: 55680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118860, Log Avg loss: 0.00000053, Global Avg Loss: 0.00521590, Time: 0.1204 Steps: 125400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003680, Sample Num: 58880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112406, Log Avg loss: 0.00000094, Global Avg Loss: 0.00520760, Time: 0.0795 Steps: 125600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003880, Sample Num: 62080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00125206, Log Avg loss: 0.00360721, Global Avg Loss: 0.00520506, Time: 0.2824 Steps: 125800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004080, Sample Num: 65280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00119111, Log Avg loss: 0.00000867, Global Avg Loss: 0.00519681, Time: 0.1664 Steps: 126000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004280, Sample Num: 68480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00113557, Log Avg loss: 0.00000258, Global Avg Loss: 0.00518858, Time: 0.2662 Steps: 126200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004480, Sample Num: 71680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00150327, Log Avg loss: 0.00937206, Global Avg Loss: 0.00519519, Time: 0.1383 Steps: 126400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004680, Sample Num: 74880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159344, Log Avg loss: 0.00361332, Global Avg Loss: 0.00519270, Time: 0.2636 Steps: 126600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 004880, Sample Num: 78080, Cur Loss: 0.00000066, Cur Avg Loss: 0.00152815, Log Avg loss: 0.00000038, Global Avg Loss: 0.00518451, Time: 0.0716 Steps: 126800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 005080, Sample Num: 81280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146804, Log Avg loss: 0.00000120, Global Avg Loss: 0.00517634, Time: 0.2623 Steps: 127000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 005280, Sample Num: 84480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00141243, Log Avg loss: 0.00000006, Global Avg Loss: 0.00516820, Time: 0.3373 Steps: 127200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005480, Sample Num: 87680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00136089, Log Avg loss: 0.00000015, Global Avg Loss: 0.00516009, Time: 0.2024 Steps: 127400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005680, Sample Num: 90880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00131297, Log Avg loss: 0.00000002, Global Avg Loss: 0.00515200, Time: 0.1313 Steps: 127600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 005880, Sample Num: 94080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00149409, Log Avg loss: 0.00663786, Global Avg Loss: 0.00515433, Time: 0.0648 Steps: 127800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006080, Sample Num: 97280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00183968, Log Avg loss: 0.01200017, Global Avg Loss: 0.00516503, Time: 0.3587 Steps: 128000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006280, Sample Num: 100480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00182286, Log Avg loss: 0.00131158, Global Avg Loss: 0.00515901, Time: 0.1117 Steps: 128200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006480, Sample Num: 103680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00176891, Log Avg loss: 0.00007488, Global Avg Loss: 0.00515109, Time: 0.1107 Steps: 128400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006680, Sample Num: 106880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00171595, Log Avg loss: 0.00000007, Global Avg Loss: 0.00514308, Time: 0.1112 Steps: 128600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 006880, Sample Num: 110080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166608, Log Avg loss: 0.00000012, Global Avg Loss: 0.00513510, Time: 0.1222 Steps: 128800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 007080, Sample Num: 113280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161901, Log Avg loss: 0.00000003, Global Avg Loss: 0.00512714, Time: 0.3450 Steps: 129000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 007280, Sample Num: 116480, Cur Loss: 0.00000008, Cur Avg Loss: 0.00157456, Log Avg loss: 0.00000108, Global Avg Loss: 0.00511920, Time: 0.4013 Steps: 129200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007480, Sample Num: 119680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167473, Log Avg loss: 0.00532079, Global Avg Loss: 0.00511951, Time: 0.1890 Steps: 129400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007680, Sample Num: 122880, Cur Loss: 0.00000001, Cur Avg Loss: 0.00166277, Log Avg loss: 0.00121539, Global Avg Loss: 0.00511349, Time: 0.3672 Steps: 129600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 007880, Sample Num: 126080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00177562, Log Avg loss: 0.00610905, Global Avg Loss: 0.00511502, Time: 0.2390 Steps: 129800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008080, Sample Num: 129280, Cur Loss: 0.00000007, Cur Avg Loss: 0.00190328, Log Avg loss: 0.00693299, Global Avg Loss: 0.00511782, Time: 0.4551 Steps: 130000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008280, Sample Num: 132480, Cur Loss: 0.00000081, Cur Avg Loss: 0.00185825, Log Avg loss: 0.00003917, Global Avg Loss: 0.00511002, Time: 0.2682 Steps: 130200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008480, Sample Num: 135680, Cur Loss: 0.00000003, Cur Avg Loss: 0.00181443, Log Avg loss: 0.00000022, Global Avg Loss: 0.00510218, Time: 0.3220 Steps: 130400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008680, Sample Num: 138880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00177277, Log Avg loss: 0.00000627, Global Avg Loss: 0.00509438, Time: 0.1921 Steps: 130600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 008880, Sample Num: 142080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00173284, Log Avg loss: 0.00000007, Global Avg Loss: 0.00508659, Time: 0.0868 Steps: 130800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 009080, Sample Num: 145280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00169467, Log Avg loss: 0.00000003, Global Avg Loss: 0.00507882, Time: 0.0855 Steps: 131000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 009280, Sample Num: 148480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165815, Log Avg loss: 0.00000001, Global Avg Loss: 0.00507108, Time: 0.1906 Steps: 131200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009480, Sample Num: 151680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170854, Log Avg loss: 0.00404690, Global Avg Loss: 0.00506952, Time: 0.0868 Steps: 131400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009680, Sample Num: 154880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167325, Log Avg loss: 0.00000007, Global Avg Loss: 0.00506182, Time: 0.0834 Steps: 131600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 009880, Sample Num: 158080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00163938, Log Avg loss: 0.00000004, Global Avg Loss: 0.00505413, Time: 0.0662 Steps: 131800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010080, Sample Num: 161280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160685, Log Avg loss: 0.00000012, Global Avg Loss: 0.00504648, Time: 0.0932 Steps: 132000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010280, Sample Num: 164480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157559, Log Avg loss: 0.00000002, Global Avg Loss: 0.00503884, Time: 0.2641 Steps: 132200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010480, Sample Num: 167680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154874, Log Avg loss: 0.00016857, Global Avg Loss: 0.00503149, Time: 0.4927 Steps: 132400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010680, Sample Num: 170880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00151974, Log Avg loss: 0.00000005, Global Avg Loss: 0.00502390, Time: 0.4649 Steps: 132600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 010880, Sample Num: 174080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00149180, Log Avg loss: 0.00000007, Global Avg Loss: 0.00501633, Time: 0.2009 Steps: 132800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 011080, Sample Num: 177280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164042, Log Avg loss: 0.00972538, Global Avg Loss: 0.00502341, Time: 0.1222 Steps: 133000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 011280, Sample Num: 180480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00166665, Log Avg loss: 0.00311983, Global Avg Loss: 0.00502055, Time: 0.0707 Steps: 133200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011480, Sample Num: 183680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00170715, Log Avg loss: 0.00399143, Global Avg Loss: 0.00501901, Time: 0.2580 Steps: 133400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011680, Sample Num: 186880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167792, Log Avg loss: 0.00000003, Global Avg Loss: 0.00501150, Time: 0.2643 Steps: 133600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 011880, Sample Num: 190080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164967, Log Avg loss: 0.00000005, Global Avg Loss: 0.00500401, Time: 0.1272 Steps: 133800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012080, Sample Num: 193280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162236, Log Avg loss: 0.00000001, Global Avg Loss: 0.00499654, Time: 0.1453 Steps: 134000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012280, Sample Num: 196480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00159595, Log Avg loss: 0.00000040, Global Avg Loss: 0.00498909, Time: 0.1320 Steps: 134200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012480, Sample Num: 199680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00157037, Log Avg loss: 0.00000020, Global Avg Loss: 0.00498167, Time: 0.1974 Steps: 134400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012680, Sample Num: 202880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00164400, Log Avg loss: 0.00623808, Global Avg Loss: 0.00498354, Time: 0.1315 Steps: 134600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 012880, Sample Num: 206080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161847, Log Avg loss: 0.00000001, Global Avg Loss: 0.00497614, Time: 0.3814 Steps: 134800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 013080, Sample Num: 209280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00167311, Log Avg loss: 0.00519214, Global Avg Loss: 0.00497646, Time: 0.0571 Steps: 135000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 013280, Sample Num: 212480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00165244, Log Avg loss: 0.00030066, Global Avg Loss: 0.00496954, Time: 0.1117 Steps: 135200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013480, Sample Num: 215680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00162793, Log Avg loss: 0.00000043, Global Avg Loss: 0.00496220, Time: 0.4634 Steps: 135400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013680, Sample Num: 218880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00160445, Log Avg loss: 0.00002192, Global Avg Loss: 0.00495492, Time: 0.2634 Steps: 135600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 013880, Sample Num: 222080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00161175, Log Avg loss: 0.00211121, Global Avg Loss: 0.00495073, Time: 0.3040 Steps: 135800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014080, Sample Num: 225280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00158892, Log Avg loss: 0.00000408, Global Avg Loss: 0.00494346, Time: 0.1342 Steps: 136000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014280, Sample Num: 228480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00156666, Log Avg loss: 0.00000002, Global Avg Loss: 0.00493620, Time: 0.2054 Steps: 136200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014480, Sample Num: 231680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00154511, Log Avg loss: 0.00000628, Global Avg Loss: 0.00492897, Time: 0.3715 Steps: 136400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014680, Sample Num: 234880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00152406, Log Avg loss: 0.00000009, Global Avg Loss: 0.00492175, Time: 0.2664 Steps: 136600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 014880, Sample Num: 238080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00150358, Log Avg loss: 0.00000008, Global Avg Loss: 0.00491456, Time: 0.3787 Steps: 136800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015080, Sample Num: 241280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148364, Log Avg loss: 0.00000000, Global Avg Loss: 0.00490738, Time: 0.4462 Steps: 137000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015280, Sample Num: 244480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146422, Log Avg loss: 0.00000003, Global Avg Loss: 0.00490023, Time: 0.1285 Steps: 137200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 015480, Sample Num: 247680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00144530, Log Avg loss: 0.00000000, Global Avg Loss: 0.00489309, Time: 0.4164 Steps: 137400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 015680, Sample Num: 250880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00142687, Log Avg loss: 0.00000000, Global Avg Loss: 0.00488598, Time: 0.3819 Steps: 137600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 015880, Sample Num: 254080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140890, Log Avg loss: 0.00000001, Global Avg Loss: 0.00487889, Time: 0.2642 Steps: 137800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016080, Sample Num: 257280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00139141, Log Avg loss: 0.00000264, Global Avg Loss: 0.00487182, Time: 0.1387 Steps: 138000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016280, Sample Num: 260480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137431, Log Avg loss: 0.00000000, Global Avg Loss: 0.00486477, Time: 0.1219 Steps: 138200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016480, Sample Num: 263680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135763, Log Avg loss: 0.00000006, Global Avg Loss: 0.00485774, Time: 0.0787 Steps: 138400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016680, Sample Num: 266880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134136, Log Avg loss: 0.00000000, Global Avg Loss: 0.00485073, Time: 0.1436 Steps: 138600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 016880, Sample Num: 270080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00132546, Log Avg loss: 0.00000000, Global Avg Loss: 0.00484374, Time: 0.1761 Steps: 138800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017080, Sample Num: 273280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00143939, Log Avg loss: 0.01105462, Global Avg Loss: 0.00485268, Time: 0.1106 Steps: 139000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017280, Sample Num: 276480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00150609, Log Avg loss: 0.00720283, Global Avg Loss: 0.00485606, Time: 0.4041 Steps: 139200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 017480, Sample Num: 279680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00148886, Log Avg loss: 0.00000000, Global Avg Loss: 0.00484909, Time: 0.2305 Steps: 139400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 017680, Sample Num: 282880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00147202, Log Avg loss: 0.00000000, Global Avg Loss: 0.00484214, Time: 0.1252 Steps: 139600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 017880, Sample Num: 286080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00145555, Log Avg loss: 0.00000002, Global Avg Loss: 0.00483522, Time: 0.1412 Steps: 139800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018080, Sample Num: 289280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00143959, Log Avg loss: 0.00001251, Global Avg Loss: 0.00482833, Time: 0.0952 Steps: 140000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018280, Sample Num: 292480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00142384, Log Avg loss: 0.00000000, Global Avg Loss: 0.00482144, Time: 0.4584 Steps: 140200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018480, Sample Num: 295680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00141673, Log Avg loss: 0.00076656, Global Avg Loss: 0.00481566, Time: 0.1016 Steps: 140400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018680, Sample Num: 298880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140156, Log Avg loss: 0.00000015, Global Avg Loss: 0.00480881, Time: 0.4120 Steps: 140600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 018880, Sample Num: 302080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140226, Log Avg loss: 0.00146744, Global Avg Loss: 0.00480407, Time: 0.2222 Steps: 140800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019080, Sample Num: 305280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00138756, Log Avg loss: 0.00000032, Global Avg Loss: 0.00479725, Time: 0.4149 Steps: 141000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019280, Sample Num: 308480, Cur Loss: 0.00000000, Cur Avg Loss: 0.00137317, Log Avg loss: 0.00000001, Global Avg Loss: 0.00479046, Time: 0.1415 Steps: 141200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 019480, Sample Num: 311680, Cur Loss: 0.00000000, Cur Avg Loss: 0.00135907, Log Avg loss: 0.00000006, Global Avg Loss: 0.00478368, Time: 0.2663 Steps: 141400, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 019680, Sample Num: 314880, Cur Loss: 0.00000000, Cur Avg Loss: 0.00134527, Log Avg loss: 0.00000072, Global Avg Loss: 0.00477693, Time: 0.0851 Steps: 141600, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 019880, Sample Num: 318080, Cur Loss: 0.00000000, Cur Avg Loss: 0.00133173, Log Avg loss: 0.00000001, Global Avg Loss: 0.00477019, Time: 0.2651 Steps: 141800, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 020080, Sample Num: 321280, Cur Loss: 0.00000000, Cur Avg Loss: 0.00131847, Log Avg loss: 0.00000002, Global Avg Loss: 0.00476347, Time: 0.1317 Steps: 142000, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 020280, Sample Num: 324480, Cur Loss: 0.00000001, Cur Avg Loss: 0.00135768, Log Avg loss: 0.00529412, Global Avg Loss: 0.00476422, Time: 0.1341 Steps: 142200, Updated lr: 0.000030 ***** Running evaluation checkpoint-142240 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-142240 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4186.067770, Avg time per batch (s): 0.210000 {"eval_avg_loss": 0.021539, "eval_total_loss": 58.37144, "eval_acc": 0.998293, "eval_prec": 0.998802, "eval_recall": 0.99779, "eval_f1": 0.998296, "eval_roc_auc": 0.999764, "eval_pr_auc": 0.999582, "eval_confusion_matrix": {"tn": 21601, "fp": 26, "fn": 48, "tp": 21674}, "eval_mcc2": 0.996586, "eval_mcc": 0.996586, "eval_sn": 0.99779, "eval_sp": 0.998798, "update_flag": false, "test_avg_loss": 0.016855, "test_total_loss": 68.49744, "test_acc": 0.998431, "test_prec": 0.998799, "test_recall": 0.998062, "test_f1": 0.99843, "test_roc_auc": 0.999887, "test_pr_auc": 0.999812, "test_confusion_matrix": {"tn": 32478, "fp": 39, "fn": 63, "tp": 32442}, "test_mcc2": 0.996863, "test_mcc": 0.996863, "test_sn": 0.998062, "test_sp": 0.998801, "lr": 3.0029556650246303e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.004762877192417451, "train_cur_epoch_loss": 27.53368945934355, "train_cur_epoch_avg_loss": 0.0013550044025267495, "train_cur_epoch_time": 4186.067770242691, "train_cur_epoch_avg_time": 0.20600727215761275, "epoch": 7, "step": 142240} ################################################## Training, Epoch: 0008, Batch: 000160, Sample Num: 2560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000001, Global Avg Loss: 0.00475753, Time: 0.2384 Steps: 142400, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000360, Sample Num: 5760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00000005, Global Avg Loss: 0.00475085, Time: 0.0824 Steps: 142600, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000001, Global Avg Loss: 0.00474420, Time: 0.0967 Steps: 142800, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000760, Sample Num: 12160, Cur Loss: 0.00000001, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00473756, Time: 0.0663 Steps: 143000, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000960, Sample Num: 15360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000001, Global Avg Loss: 0.00473095, Time: 0.5337 Steps: 143200, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 001160, Sample Num: 18560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00472435, Time: 0.1414 Steps: 143400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000001, Global Avg Loss: 0.00471777, Time: 0.2742 Steps: 143600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001560, Sample Num: 24960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000007, Log Avg loss: 0.00000042, Global Avg Loss: 0.00471121, Time: 0.1403 Steps: 143800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001760, Sample Num: 28160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000006, Log Avg loss: 0.00000000, Global Avg Loss: 0.00470466, Time: 0.0871 Steps: 144000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001960, Sample Num: 31360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000005, Log Avg loss: 0.00000000, Global Avg Loss: 0.00469814, Time: 0.1290 Steps: 144200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002160, Sample Num: 34560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00019980, Log Avg loss: 0.00215735, Global Avg Loss: 0.00469462, Time: 0.1837 Steps: 144400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00018288, Log Avg loss: 0.00000009, Global Avg Loss: 0.00468813, Time: 0.1559 Steps: 144600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002560, Sample Num: 40960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00016859, Log Avg loss: 0.00000000, Global Avg Loss: 0.00468165, Time: 0.0717 Steps: 144800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002760, Sample Num: 44160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00021652, Log Avg loss: 0.00082993, Global Avg Loss: 0.00467634, Time: 0.0804 Steps: 145000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 002960, Sample Num: 47360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00049721, Log Avg loss: 0.00437075, Global Avg Loss: 0.00467592, Time: 0.3416 Steps: 145200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00046575, Log Avg loss: 0.00000026, Global Avg Loss: 0.00466949, Time: 0.0897 Steps: 145400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003360, Sample Num: 53760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00065435, Log Avg loss: 0.00363421, Global Avg Loss: 0.00466807, Time: 0.4056 Steps: 145600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003560, Sample Num: 56960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00061792, Log Avg loss: 0.00000593, Global Avg Loss: 0.00466167, Time: 0.1463 Steps: 145800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003760, Sample Num: 60160, Cur Loss: 0.46288812, Cur Avg Loss: 0.00070817, Log Avg loss: 0.00231453, Global Avg Loss: 0.00465845, Time: 0.2432 Steps: 146000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003960, Sample Num: 63360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00067246, Log Avg loss: 0.00000120, Global Avg Loss: 0.00465208, Time: 0.1429 Steps: 146200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00064103, Log Avg loss: 0.00001858, Global Avg Loss: 0.00464575, Time: 0.1494 Steps: 146400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004360, Sample Num: 69760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00061163, Log Avg loss: 0.00000017, Global Avg Loss: 0.00463942, Time: 0.2627 Steps: 146600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004560, Sample Num: 72960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00092000, Log Avg loss: 0.00764252, Global Avg Loss: 0.00464351, Time: 0.0836 Steps: 146800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004760, Sample Num: 76160, Cur Loss: 0.00000006, Cur Avg Loss: 0.00100318, Log Avg loss: 0.00289955, Global Avg Loss: 0.00464113, Time: 0.1957 Steps: 147000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 004960, Sample Num: 79360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096278, Log Avg loss: 0.00000127, Global Avg Loss: 0.00463483, Time: 0.1377 Steps: 147200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 005160, Sample Num: 82560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00092547, Log Avg loss: 0.00000036, Global Avg Loss: 0.00462854, Time: 0.0992 Steps: 147400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005360, Sample Num: 85760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00089095, Log Avg loss: 0.00000036, Global Avg Loss: 0.00462227, Time: 0.2643 Steps: 147600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00085894, Log Avg loss: 0.00000088, Global Avg Loss: 0.00461602, Time: 0.3910 Steps: 147800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005760, Sample Num: 92160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082915, Log Avg loss: 0.00000111, Global Avg Loss: 0.00460978, Time: 0.6529 Steps: 148000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00113090, Log Avg loss: 0.00982117, Global Avg Loss: 0.00461681, Time: 0.0900 Steps: 148200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006160, Sample Num: 98560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130386, Log Avg loss: 0.00645800, Global Avg Loss: 0.00461930, Time: 0.1417 Steps: 148400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006360, Sample Num: 101760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126287, Log Avg loss: 0.00000044, Global Avg Loss: 0.00461308, Time: 0.1219 Steps: 148600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006560, Sample Num: 104960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122483, Log Avg loss: 0.00001535, Global Avg Loss: 0.00460690, Time: 0.0827 Steps: 148800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006760, Sample Num: 108160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118860, Log Avg loss: 0.00000003, Global Avg Loss: 0.00460072, Time: 0.1171 Steps: 149000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 006960, Sample Num: 111360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115444, Log Avg loss: 0.00000011, Global Avg Loss: 0.00459455, Time: 0.2644 Steps: 149200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 007160, Sample Num: 114560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112220, Log Avg loss: 0.00000001, Global Avg Loss: 0.00458840, Time: 0.3216 Steps: 149400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 007360, Sample Num: 117760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109171, Log Avg loss: 0.00000012, Global Avg Loss: 0.00458226, Time: 0.0730 Steps: 149600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007560, Sample Num: 120960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118565, Log Avg loss: 0.00464264, Global Avg Loss: 0.00458234, Time: 0.2623 Steps: 149800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115509, Log Avg loss: 0.00000009, Global Avg Loss: 0.00457623, Time: 0.2635 Steps: 150000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 007960, Sample Num: 127360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00146655, Log Avg loss: 0.01355100, Global Avg Loss: 0.00458818, Time: 0.2670 Steps: 150200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00000001, Cur Avg Loss: 0.00143060, Log Avg loss: 0.00000003, Global Avg Loss: 0.00458208, Time: 0.1137 Steps: 150400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00139642, Log Avg loss: 0.00000158, Global Avg Loss: 0.00457600, Time: 0.1203 Steps: 150600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008560, Sample Num: 136960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00136379, Log Avg loss: 0.00000001, Global Avg Loss: 0.00456993, Time: 0.1900 Steps: 150800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008760, Sample Num: 140160, Cur Loss: 0.00000001, Cur Avg Loss: 0.00133265, Log Avg loss: 0.00000006, Global Avg Loss: 0.00456388, Time: 0.1636 Steps: 151000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 008960, Sample Num: 143360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130291, Log Avg loss: 0.00000001, Global Avg Loss: 0.00455784, Time: 0.1874 Steps: 151200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 009160, Sample Num: 146560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00127446, Log Avg loss: 0.00000001, Global Avg Loss: 0.00455182, Time: 0.1495 Steps: 151400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124723, Log Avg loss: 0.00000000, Global Avg Loss: 0.00454582, Time: 0.2660 Steps: 151600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009560, Sample Num: 152960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00131761, Log Avg loss: 0.00461173, Global Avg Loss: 0.00454590, Time: 0.0880 Steps: 151800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009760, Sample Num: 156160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00129062, Log Avg loss: 0.00000028, Global Avg Loss: 0.00453992, Time: 0.1691 Steps: 152000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 009960, Sample Num: 159360, Cur Loss: 0.00000003, Cur Avg Loss: 0.00126470, Log Avg loss: 0.00000005, Global Avg Loss: 0.00453396, Time: 0.2094 Steps: 152200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010160, Sample Num: 162560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00123981, Log Avg loss: 0.00000026, Global Avg Loss: 0.00452801, Time: 0.1425 Steps: 152400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121588, Log Avg loss: 0.00000004, Global Avg Loss: 0.00452207, Time: 0.2011 Steps: 152600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010560, Sample Num: 168960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00119541, Log Avg loss: 0.00013485, Global Avg Loss: 0.00451633, Time: 0.3884 Steps: 152800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010760, Sample Num: 172160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00117319, Log Avg loss: 0.00000000, Global Avg Loss: 0.00451043, Time: 0.2669 Steps: 153000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 010960, Sample Num: 175360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115178, Log Avg loss: 0.00000001, Global Avg Loss: 0.00450454, Time: 0.0536 Steps: 153200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130764, Log Avg loss: 0.00984892, Global Avg Loss: 0.00451151, Time: 0.2772 Steps: 153400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 011360, Sample Num: 181760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00128465, Log Avg loss: 0.00000200, Global Avg Loss: 0.00450563, Time: 0.1207 Steps: 153600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00133388, Log Avg loss: 0.00412967, Global Avg Loss: 0.00450514, Time: 0.2596 Steps: 153800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00000018, Cur Avg Loss: 0.00131119, Log Avg loss: 0.00000003, Global Avg Loss: 0.00449929, Time: 0.4206 Steps: 154000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 011960, Sample Num: 191360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00128927, Log Avg loss: 0.00000001, Global Avg Loss: 0.00449346, Time: 0.0803 Steps: 154200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012160, Sample Num: 194560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126806, Log Avg loss: 0.00000001, Global Avg Loss: 0.00448764, Time: 0.3099 Steps: 154400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012360, Sample Num: 197760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00124755, Log Avg loss: 0.00000040, Global Avg Loss: 0.00448183, Time: 0.1442 Steps: 154600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012560, Sample Num: 200960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122768, Log Avg loss: 0.00000001, Global Avg Loss: 0.00447604, Time: 0.1648 Steps: 154800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012760, Sample Num: 204160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00128066, Log Avg loss: 0.00460761, Global Avg Loss: 0.00447621, Time: 0.1481 Steps: 155000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 012960, Sample Num: 207360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126090, Log Avg loss: 0.00000003, Global Avg Loss: 0.00447044, Time: 0.5084 Steps: 155200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 013160, Sample Num: 210560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00130008, Log Avg loss: 0.00383942, Global Avg Loss: 0.00446963, Time: 0.0805 Steps: 155400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 013360, Sample Num: 213760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00128139, Log Avg loss: 0.00005153, Global Avg Loss: 0.00446395, Time: 0.2557 Steps: 155600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013560, Sample Num: 216960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00126251, Log Avg loss: 0.00000073, Global Avg Loss: 0.00445822, Time: 0.1801 Steps: 155800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013760, Sample Num: 220160, Cur Loss: 0.00000006, Cur Avg Loss: 0.00124415, Log Avg loss: 0.00000002, Global Avg Loss: 0.00445251, Time: 0.1325 Steps: 156000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 013960, Sample Num: 223360, Cur Loss: 0.00000280, Cur Avg Loss: 0.00125436, Log Avg loss: 0.00195633, Global Avg Loss: 0.00444931, Time: 0.2813 Steps: 156200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014160, Sample Num: 226560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00123664, Log Avg loss: 0.00000002, Global Avg Loss: 0.00444362, Time: 0.0807 Steps: 156400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014360, Sample Num: 229760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00121944, Log Avg loss: 0.00000134, Global Avg Loss: 0.00443795, Time: 0.4253 Steps: 156600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014560, Sample Num: 232960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00120269, Log Avg loss: 0.00000009, Global Avg Loss: 0.00443229, Time: 0.1666 Steps: 156800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014760, Sample Num: 236160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118641, Log Avg loss: 0.00000171, Global Avg Loss: 0.00442664, Time: 0.2027 Steps: 157000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 014960, Sample Num: 239360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00117195, Log Avg loss: 0.00010488, Global Avg Loss: 0.00442115, Time: 0.0892 Steps: 157200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 015160, Sample Num: 242560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115649, Log Avg loss: 0.00000000, Global Avg Loss: 0.00441553, Time: 0.1776 Steps: 157400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 015360, Sample Num: 245760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00114144, Log Avg loss: 0.00000000, Global Avg Loss: 0.00440992, Time: 0.2331 Steps: 157600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015560, Sample Num: 248960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112676, Log Avg loss: 0.00000000, Global Avg Loss: 0.00440434, Time: 0.1330 Steps: 157800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015760, Sample Num: 252160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00111247, Log Avg loss: 0.00000001, Global Avg Loss: 0.00439876, Time: 0.2193 Steps: 158000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 015960, Sample Num: 255360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109852, Log Avg loss: 0.00000001, Global Avg Loss: 0.00439320, Time: 0.4324 Steps: 158200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016160, Sample Num: 258560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108493, Log Avg loss: 0.00000044, Global Avg Loss: 0.00438765, Time: 0.3053 Steps: 158400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016360, Sample Num: 261760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00107167, Log Avg loss: 0.00000001, Global Avg Loss: 0.00438212, Time: 0.1535 Steps: 158600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016560, Sample Num: 264960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106142, Log Avg loss: 0.00022313, Global Avg Loss: 0.00437688, Time: 0.1640 Steps: 158800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016760, Sample Num: 268160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104876, Log Avg loss: 0.00000001, Global Avg Loss: 0.00437138, Time: 0.2845 Steps: 159000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 016960, Sample Num: 271360, Cur Loss: 0.00000011, Cur Avg Loss: 0.00103639, Log Avg loss: 0.00000000, Global Avg Loss: 0.00436588, Time: 0.1929 Steps: 159200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 017160, Sample Num: 274560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00117571, Log Avg loss: 0.01299028, Global Avg Loss: 0.00437671, Time: 0.0663 Steps: 159400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 017360, Sample Num: 277760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00116223, Log Avg loss: 0.00000582, Global Avg Loss: 0.00437123, Time: 0.4267 Steps: 159600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017560, Sample Num: 280960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00114900, Log Avg loss: 0.00000016, Global Avg Loss: 0.00436576, Time: 0.1537 Steps: 159800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017760, Sample Num: 284160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00113606, Log Avg loss: 0.00000001, Global Avg Loss: 0.00436030, Time: 0.0801 Steps: 160000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 017960, Sample Num: 287360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112341, Log Avg loss: 0.00000040, Global Avg Loss: 0.00435486, Time: 0.1202 Steps: 160200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018160, Sample Num: 290560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00111104, Log Avg loss: 0.00000000, Global Avg Loss: 0.00434943, Time: 0.1472 Steps: 160400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018360, Sample Num: 293760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109894, Log Avg loss: 0.00000002, Global Avg Loss: 0.00434401, Time: 0.0941 Steps: 160600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018560, Sample Num: 296960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108779, Log Avg loss: 0.00006403, Global Avg Loss: 0.00433869, Time: 0.2693 Steps: 160800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018760, Sample Num: 300160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00107619, Log Avg loss: 0.00000003, Global Avg Loss: 0.00433330, Time: 0.0826 Steps: 161000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 018960, Sample Num: 303360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106900, Log Avg loss: 0.00039501, Global Avg Loss: 0.00432841, Time: 0.1312 Steps: 161200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 019160, Sample Num: 306560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105785, Log Avg loss: 0.00000001, Global Avg Loss: 0.00432305, Time: 0.2021 Steps: 161400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 019360, Sample Num: 309760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104692, Log Avg loss: 0.00000000, Global Avg Loss: 0.00431770, Time: 0.4472 Steps: 161600, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019560, Sample Num: 312960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00103621, Log Avg loss: 0.00000000, Global Avg Loss: 0.00431236, Time: 0.1018 Steps: 161800, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019760, Sample Num: 316160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102574, Log Avg loss: 0.00000190, Global Avg Loss: 0.00430704, Time: 0.1962 Steps: 162000, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 019960, Sample Num: 319360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101547, Log Avg loss: 0.00000000, Global Avg Loss: 0.00430173, Time: 0.1471 Steps: 162200, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 020160, Sample Num: 322560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100539, Log Avg loss: 0.00000000, Global Avg Loss: 0.00429643, Time: 0.0835 Steps: 162400, Updated lr: 0.000020 ***** Running evaluation checkpoint-162560 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-162560 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4315.955696, Avg time per batch (s): 0.210000 {"eval_avg_loss": 0.022371, "eval_total_loss": 60.624792, "eval_acc": 0.998224, "eval_prec": 0.998664, "eval_recall": 0.99779, "eval_f1": 0.998227, "eval_roc_auc": 0.999697, "eval_pr_auc": 0.999446, "eval_confusion_matrix": {"tn": 21598, "fp": 29, "fn": 48, "tp": 21674}, "eval_mcc2": 0.996448, "eval_mcc": 0.996448, "eval_sn": 0.99779, "eval_sp": 0.998659, "update_flag": false, "test_avg_loss": 0.017544, "test_total_loss": 71.298482, "test_acc": 0.998539, "test_prec": 0.998738, "test_recall": 0.998339, "test_f1": 0.998538, "test_roc_auc": 0.999841, "test_pr_auc": 0.99972, "test_confusion_matrix": {"tn": 32476, "fp": 41, "fn": 54, "tp": 32451}, "test_mcc2": 0.997078, "test_mcc": 0.997078, "test_sn": 0.998339, "test_sp": 0.998739, "lr": 2.001970443349754e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.004299540008722568, "train_cur_epoch_loss": 21.46157196848015, "train_cur_epoch_avg_loss": 0.0010561797228582752, "train_cur_epoch_time": 4315.955695867538, "train_cur_epoch_avg_time": 0.2123993944816702, "epoch": 8, "step": 162560} ################################################## Training, Epoch: 0009, Batch: 000040, Sample Num: 640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000000, Log Avg loss: 0.00596433, Global Avg Loss: 0.00429848, Time: 0.2117 Steps: 162600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000240, Sample Num: 3840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000002, Global Avg Loss: 0.00429320, Time: 0.0608 Steps: 162800, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000440, Sample Num: 7040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000001, Global Avg Loss: 0.00428793, Time: 0.0795 Steps: 163000, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000640, Sample Num: 10240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000000, Global Avg Loss: 0.00428268, Time: 0.0702 Steps: 163200, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000840, Sample Num: 13440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000000, Global Avg Loss: 0.00427744, Time: 0.1548 Steps: 163400, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 001040, Sample Num: 16640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000002, Global Avg Loss: 0.00427221, Time: 0.1574 Steps: 163600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 001240, Sample Num: 19840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000000, Global Avg Loss: 0.00426699, Time: 0.1655 Steps: 163800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001440, Sample Num: 23040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000008, Log Avg loss: 0.00000048, Global Avg Loss: 0.00426179, Time: 0.0858 Steps: 164000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001640, Sample Num: 26240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000007, Log Avg loss: 0.00000003, Global Avg Loss: 0.00425660, Time: 0.1953 Steps: 164200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001840, Sample Num: 29440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000006, Log Avg loss: 0.00000000, Global Avg Loss: 0.00425142, Time: 0.0958 Steps: 164400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002040, Sample Num: 32640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00006273, Log Avg loss: 0.00063926, Global Avg Loss: 0.00424703, Time: 0.1517 Steps: 164600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002240, Sample Num: 35840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00005713, Log Avg loss: 0.00000002, Global Avg Loss: 0.00424188, Time: 0.1404 Steps: 164800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002440, Sample Num: 39040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00005245, Log Avg loss: 0.00000000, Global Avg Loss: 0.00423673, Time: 0.1930 Steps: 165000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002640, Sample Num: 42240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00004850, Log Avg loss: 0.00000036, Global Avg Loss: 0.00423161, Time: 0.0839 Steps: 165200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 002840, Sample Num: 45440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00033841, Log Avg loss: 0.00416523, Global Avg Loss: 0.00423153, Time: 0.1018 Steps: 165400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 003040, Sample Num: 48640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00031615, Log Avg loss: 0.00000004, Global Avg Loss: 0.00422641, Time: 0.2669 Steps: 165600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 003240, Sample Num: 51840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00053418, Log Avg loss: 0.00384823, Global Avg Loss: 0.00422596, Time: 0.3499 Steps: 165800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003440, Sample Num: 55040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00050314, Log Avg loss: 0.00000028, Global Avg Loss: 0.00422087, Time: 0.1438 Steps: 166000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003640, Sample Num: 58240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00047550, Log Avg loss: 0.00000003, Global Avg Loss: 0.00421579, Time: 0.1276 Steps: 166200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003840, Sample Num: 61440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00057315, Log Avg loss: 0.00235055, Global Avg Loss: 0.00421355, Time: 0.1173 Steps: 166400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004040, Sample Num: 64640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00054482, Log Avg loss: 0.00000079, Global Avg Loss: 0.00420849, Time: 0.1442 Steps: 166600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004240, Sample Num: 67840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00051914, Log Avg loss: 0.00000041, Global Avg Loss: 0.00420344, Time: 0.1609 Steps: 166800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004440, Sample Num: 71040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083709, Log Avg loss: 0.00757765, Global Avg Loss: 0.00420748, Time: 0.1019 Steps: 167000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004640, Sample Num: 74240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084373, Log Avg loss: 0.00099103, Global Avg Loss: 0.00420364, Time: 0.2292 Steps: 167200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 004840, Sample Num: 77440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00080922, Log Avg loss: 0.00000874, Global Avg Loss: 0.00419863, Time: 0.2655 Steps: 167400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 005040, Sample Num: 80640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00077714, Log Avg loss: 0.00000070, Global Avg Loss: 0.00419362, Time: 0.1385 Steps: 167600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 005240, Sample Num: 83840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00074758, Log Avg loss: 0.00000282, Global Avg Loss: 0.00418862, Time: 0.0574 Steps: 167800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005440, Sample Num: 87040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00072010, Log Avg loss: 0.00000001, Global Avg Loss: 0.00418363, Time: 0.3674 Steps: 168000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005640, Sample Num: 90240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00069457, Log Avg loss: 0.00000012, Global Avg Loss: 0.00417866, Time: 0.0947 Steps: 168200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 005840, Sample Num: 93440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083851, Log Avg loss: 0.00489777, Global Avg Loss: 0.00417951, Time: 0.3374 Steps: 168400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006040, Sample Num: 96640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105056, Log Avg loss: 0.00724233, Global Avg Loss: 0.00418315, Time: 0.2746 Steps: 168600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006240, Sample Num: 99840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00112450, Log Avg loss: 0.00335757, Global Avg Loss: 0.00418217, Time: 0.0569 Steps: 168800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006440, Sample Num: 103040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108978, Log Avg loss: 0.00000632, Global Avg Loss: 0.00417723, Time: 0.0793 Steps: 169000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006640, Sample Num: 106240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105696, Log Avg loss: 0.00000007, Global Avg Loss: 0.00417229, Time: 0.2276 Steps: 169200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 006840, Sample Num: 109440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102606, Log Avg loss: 0.00000029, Global Avg Loss: 0.00416736, Time: 0.3426 Steps: 169400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 007040, Sample Num: 112640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099691, Log Avg loss: 0.00000007, Global Avg Loss: 0.00416245, Time: 0.6000 Steps: 169600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 007240, Sample Num: 115840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096937, Log Avg loss: 0.00000004, Global Avg Loss: 0.00415755, Time: 0.4263 Steps: 169800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007440, Sample Num: 119040, Cur Loss: 0.00000004, Cur Avg Loss: 0.00104739, Log Avg loss: 0.00387152, Global Avg Loss: 0.00415721, Time: 0.1506 Steps: 170000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007640, Sample Num: 122240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101997, Log Avg loss: 0.00000004, Global Avg Loss: 0.00415233, Time: 0.1424 Steps: 170200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 007840, Sample Num: 125440, Cur Loss: 0.00000001, Cur Avg Loss: 0.00109345, Log Avg loss: 0.00390025, Global Avg Loss: 0.00415203, Time: 0.0796 Steps: 170400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008040, Sample Num: 128640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00118798, Log Avg loss: 0.00489369, Global Avg Loss: 0.00415290, Time: 0.0489 Steps: 170600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008240, Sample Num: 131840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115915, Log Avg loss: 0.00000003, Global Avg Loss: 0.00414804, Time: 0.1237 Steps: 170800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008440, Sample Num: 135040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00113168, Log Avg loss: 0.00000002, Global Avg Loss: 0.00414318, Time: 0.3423 Steps: 171000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008640, Sample Num: 138240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00110548, Log Avg loss: 0.00000001, Global Avg Loss: 0.00413834, Time: 0.1194 Steps: 171200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 008840, Sample Num: 141440, Cur Loss: 0.00000035, Cur Avg Loss: 0.00108047, Log Avg loss: 0.00000001, Global Avg Loss: 0.00413352, Time: 0.1039 Steps: 171400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 009040, Sample Num: 144640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105659, Log Avg loss: 0.00000085, Global Avg Loss: 0.00412870, Time: 0.2627 Steps: 171600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 009240, Sample Num: 147840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00103372, Log Avg loss: 0.00000001, Global Avg Loss: 0.00412389, Time: 0.0565 Steps: 171800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009440, Sample Num: 151040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109609, Log Avg loss: 0.00397781, Global Avg Loss: 0.00412372, Time: 0.2663 Steps: 172000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009640, Sample Num: 154240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00107341, Log Avg loss: 0.00000271, Global Avg Loss: 0.00411894, Time: 0.3384 Steps: 172200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 009840, Sample Num: 157440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105159, Log Avg loss: 0.00000000, Global Avg Loss: 0.00411416, Time: 0.1287 Steps: 172400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010040, Sample Num: 160640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00103064, Log Avg loss: 0.00000006, Global Avg Loss: 0.00410939, Time: 0.5364 Steps: 172600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010240, Sample Num: 163840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101051, Log Avg loss: 0.00000002, Global Avg Loss: 0.00410463, Time: 0.1413 Steps: 172800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010440, Sample Num: 167040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099116, Log Avg loss: 0.00000002, Global Avg Loss: 0.00409989, Time: 0.2603 Steps: 173000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010640, Sample Num: 170240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00097263, Log Avg loss: 0.00000534, Global Avg Loss: 0.00409516, Time: 0.1016 Steps: 173200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 010840, Sample Num: 173440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00095469, Log Avg loss: 0.00000024, Global Avg Loss: 0.00409044, Time: 0.1807 Steps: 173400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 011040, Sample Num: 176640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100974, Log Avg loss: 0.00399375, Global Avg Loss: 0.00409033, Time: 0.2640 Steps: 173600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 011240, Sample Num: 179840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099407, Log Avg loss: 0.00012883, Global Avg Loss: 0.00408577, Time: 0.1397 Steps: 173800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011440, Sample Num: 183040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099027, Log Avg loss: 0.00077699, Global Avg Loss: 0.00408197, Time: 0.1411 Steps: 174000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011640, Sample Num: 186240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00103851, Log Avg loss: 0.00379760, Global Avg Loss: 0.00408164, Time: 0.3806 Steps: 174200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 011840, Sample Num: 189440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102097, Log Avg loss: 0.00000001, Global Avg Loss: 0.00407696, Time: 0.3823 Steps: 174400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012040, Sample Num: 192640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100401, Log Avg loss: 0.00000000, Global Avg Loss: 0.00407229, Time: 0.1124 Steps: 174600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012240, Sample Num: 195840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098760, Log Avg loss: 0.00000001, Global Avg Loss: 0.00406763, Time: 0.4029 Steps: 174800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012440, Sample Num: 199040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00097172, Log Avg loss: 0.00000002, Global Avg Loss: 0.00406298, Time: 0.4562 Steps: 175000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012640, Sample Num: 202240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00095635, Log Avg loss: 0.00000005, Global Avg Loss: 0.00405834, Time: 0.1163 Steps: 175200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 012840, Sample Num: 205440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098166, Log Avg loss: 0.00258144, Global Avg Loss: 0.00405666, Time: 0.3736 Steps: 175400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 013040, Sample Num: 208640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096661, Log Avg loss: 0.00000003, Global Avg Loss: 0.00405204, Time: 0.2974 Steps: 175600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 013240, Sample Num: 211840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101671, Log Avg loss: 0.00428349, Global Avg Loss: 0.00405230, Time: 0.4740 Steps: 175800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013440, Sample Num: 215040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100158, Log Avg loss: 0.00000010, Global Avg Loss: 0.00404770, Time: 0.2832 Steps: 176000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013640, Sample Num: 218240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098692, Log Avg loss: 0.00000163, Global Avg Loss: 0.00404310, Time: 0.2279 Steps: 176200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 013840, Sample Num: 221440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100448, Log Avg loss: 0.00220244, Global Avg Loss: 0.00404102, Time: 0.4042 Steps: 176400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014040, Sample Num: 224640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099018, Log Avg loss: 0.00000019, Global Avg Loss: 0.00403644, Time: 0.2809 Steps: 176600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014240, Sample Num: 227840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00097627, Log Avg loss: 0.00000001, Global Avg Loss: 0.00403187, Time: 0.2647 Steps: 176800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014440, Sample Num: 231040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096275, Log Avg loss: 0.00000009, Global Avg Loss: 0.00402732, Time: 0.1345 Steps: 177000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014640, Sample Num: 234240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00094960, Log Avg loss: 0.00000021, Global Avg Loss: 0.00402277, Time: 0.4373 Steps: 177200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 014840, Sample Num: 237440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00093681, Log Avg loss: 0.00000032, Global Avg Loss: 0.00401824, Time: 0.0631 Steps: 177400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015040, Sample Num: 240640, Cur Loss: 0.00000002, Cur Avg Loss: 0.00092435, Log Avg loss: 0.00000002, Global Avg Loss: 0.00401371, Time: 0.5407 Steps: 177600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015240, Sample Num: 243840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00091222, Log Avg loss: 0.00000000, Global Avg Loss: 0.00400920, Time: 0.0831 Steps: 177800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 015440, Sample Num: 247040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00090040, Log Avg loss: 0.00000000, Global Avg Loss: 0.00400469, Time: 0.4056 Steps: 178000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 015640, Sample Num: 250240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088889, Log Avg loss: 0.00000000, Global Avg Loss: 0.00400020, Time: 0.2539 Steps: 178200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 015840, Sample Num: 253440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087767, Log Avg loss: 0.00000000, Global Avg Loss: 0.00399571, Time: 0.1411 Steps: 178400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016040, Sample Num: 256640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00086674, Log Avg loss: 0.00000133, Global Avg Loss: 0.00399124, Time: 0.4210 Steps: 178600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016240, Sample Num: 259840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00085607, Log Avg loss: 0.00000000, Global Avg Loss: 0.00398678, Time: 0.1559 Steps: 178800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016440, Sample Num: 263040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084565, Log Avg loss: 0.00000001, Global Avg Loss: 0.00398232, Time: 0.2636 Steps: 179000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016640, Sample Num: 266240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083551, Log Avg loss: 0.00000181, Global Avg Loss: 0.00397788, Time: 0.1383 Steps: 179200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 016840, Sample Num: 269440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082559, Log Avg loss: 0.00000000, Global Avg Loss: 0.00397345, Time: 0.0824 Steps: 179400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017040, Sample Num: 272640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083598, Log Avg loss: 0.00171132, Global Avg Loss: 0.00397093, Time: 0.1207 Steps: 179600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017240, Sample Num: 275840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00091731, Log Avg loss: 0.00784668, Global Avg Loss: 0.00397524, Time: 0.4914 Steps: 179800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 017440, Sample Num: 279040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00090679, Log Avg loss: 0.00000001, Global Avg Loss: 0.00397082, Time: 0.5299 Steps: 180000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 017640, Sample Num: 282240, Cur Loss: 0.00000465, Cur Avg Loss: 0.00089651, Log Avg loss: 0.00000002, Global Avg Loss: 0.00396641, Time: 0.2656 Steps: 180200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 017840, Sample Num: 285440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088646, Log Avg loss: 0.00000002, Global Avg Loss: 0.00396202, Time: 0.1588 Steps: 180400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018040, Sample Num: 288640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087664, Log Avg loss: 0.00000028, Global Avg Loss: 0.00395763, Time: 0.0892 Steps: 180600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018240, Sample Num: 291840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00086702, Log Avg loss: 0.00000000, Global Avg Loss: 0.00395325, Time: 0.2147 Steps: 180800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018440, Sample Num: 295040, Cur Loss: 0.00000001, Cur Avg Loss: 0.00085763, Log Avg loss: 0.00000049, Global Avg Loss: 0.00394888, Time: 0.1150 Steps: 181000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018640, Sample Num: 298240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084842, Log Avg loss: 0.00000003, Global Avg Loss: 0.00394452, Time: 0.1047 Steps: 181200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 018840, Sample Num: 301440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084190, Log Avg loss: 0.00023412, Global Avg Loss: 0.00394043, Time: 0.3645 Steps: 181400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019040, Sample Num: 304640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083306, Log Avg loss: 0.00000013, Global Avg Loss: 0.00393609, Time: 0.0894 Steps: 181600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019240, Sample Num: 307840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082440, Log Avg loss: 0.00000000, Global Avg Loss: 0.00393176, Time: 0.1236 Steps: 181800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 019440, Sample Num: 311040, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081592, Log Avg loss: 0.00000000, Global Avg Loss: 0.00392744, Time: 0.4257 Steps: 182000, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 019640, Sample Num: 314240, Cur Loss: 0.00000000, Cur Avg Loss: 0.00080765, Log Avg loss: 0.00000391, Global Avg Loss: 0.00392314, Time: 0.0901 Steps: 182200, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 019840, Sample Num: 317440, Cur Loss: 0.00000000, Cur Avg Loss: 0.00079951, Log Avg loss: 0.00000000, Global Avg Loss: 0.00391883, Time: 0.2492 Steps: 182400, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 020040, Sample Num: 320640, Cur Loss: 0.00000000, Cur Avg Loss: 0.00079153, Log Avg loss: 0.00000000, Global Avg Loss: 0.00391454, Time: 0.2657 Steps: 182600, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 020240, Sample Num: 323840, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082699, Log Avg loss: 0.00438031, Global Avg Loss: 0.00391505, Time: 0.1152 Steps: 182800, Updated lr: 0.000010 ***** Running evaluation checkpoint-182880 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-182880 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4266.980628, Avg time per batch (s): 0.210000 {"eval_avg_loss": 0.023697, "eval_total_loss": 64.218953, "eval_acc": 0.998224, "eval_prec": 0.998664, "eval_recall": 0.99779, "eval_f1": 0.998227, "eval_roc_auc": 0.999697, "eval_pr_auc": 0.999447, "eval_confusion_matrix": {"tn": 21598, "fp": 29, "fn": 48, "tp": 21674}, "eval_mcc2": 0.996448, "eval_mcc": 0.996448, "eval_sn": 0.99779, "eval_sp": 0.998659, "update_flag": false, "test_avg_loss": 0.018509, "test_total_loss": 75.21924, "test_acc": 0.998539, "test_prec": 0.998707, "test_recall": 0.998369, "test_f1": 0.998538, "test_roc_auc": 0.99981, "test_pr_auc": 0.999658, "test_confusion_matrix": {"tn": 32475, "fp": 42, "fn": 53, "tp": 32452}, "test_mcc2": 0.997078, "test_mcc": 0.997078, "test_sn": 0.998369, "test_sp": 0.998708, "lr": 1.000985221674877e-05, "cur_epoch_step": 20320, "train_global_avg_loss": 0.0039146923350203465, "train_cur_epoch_loss": 16.985710410578342, "train_cur_epoch_avg_loss": 0.0008359109454024775, "train_cur_epoch_time": 4266.980628013611, "train_cur_epoch_avg_time": 0.2099892041345281, "epoch": 9, "step": 182880} ################################################## Training, Epoch: 0010, Batch: 000120, Sample Num: 1920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000003, Log Avg loss: 0.00123689, Global Avg Loss: 0.00391213, Time: 0.2669 Steps: 183000, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000320, Sample Num: 5120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000004, Log Avg loss: 0.00000004, Global Avg Loss: 0.00390785, Time: 0.1549 Steps: 183200, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000520, Sample Num: 8320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00390359, Time: 0.4509 Steps: 183400, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000720, Sample Num: 11520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00389934, Time: 0.0916 Steps: 183600, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000920, Sample Num: 14720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000000, Global Avg Loss: 0.00389510, Time: 0.0943 Steps: 183800, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 001120, Sample Num: 17920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000005, Global Avg Loss: 0.00389086, Time: 0.1115 Steps: 184000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001320, Sample Num: 21120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00388664, Time: 0.5116 Steps: 184200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001520, Sample Num: 24320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000002, Log Avg loss: 0.00000000, Global Avg Loss: 0.00388242, Time: 0.3634 Steps: 184400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001720, Sample Num: 27520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000001, Global Avg Loss: 0.00387822, Time: 0.4154 Steps: 184600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001920, Sample Num: 30720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000000, Global Avg Loss: 0.00387402, Time: 0.1545 Steps: 184800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002120, Sample Num: 33920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001515, Log Avg loss: 0.00016047, Global Avg Loss: 0.00387001, Time: 0.0927 Steps: 185000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002320, Sample Num: 37120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001385, Log Avg loss: 0.00000001, Global Avg Loss: 0.00386583, Time: 0.0870 Steps: 185200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002520, Sample Num: 40320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001275, Log Avg loss: 0.00000009, Global Avg Loss: 0.00386166, Time: 0.3622 Steps: 185400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002720, Sample Num: 43520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00001182, Log Avg loss: 0.00000007, Global Avg Loss: 0.00385750, Time: 0.2653 Steps: 185600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 002920, Sample Num: 46720, Cur Loss: 0.00000010, Cur Avg Loss: 0.00031210, Log Avg loss: 0.00439590, Global Avg Loss: 0.00385807, Time: 0.0841 Steps: 185800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 003120, Sample Num: 49920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00029209, Log Avg loss: 0.00000000, Global Avg Loss: 0.00385393, Time: 0.4038 Steps: 186000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003320, Sample Num: 53120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00052794, Log Avg loss: 0.00420719, Global Avg Loss: 0.00385431, Time: 0.4042 Steps: 186200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003520, Sample Num: 56320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00049795, Log Avg loss: 0.00000004, Global Avg Loss: 0.00385017, Time: 0.1390 Steps: 186400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003720, Sample Num: 59520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00047118, Log Avg loss: 0.00000001, Global Avg Loss: 0.00384604, Time: 0.0870 Steps: 186600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003920, Sample Num: 62720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00060067, Log Avg loss: 0.00300917, Global Avg Loss: 0.00384515, Time: 0.1566 Steps: 186800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004120, Sample Num: 65920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00057151, Log Avg loss: 0.00000002, Global Avg Loss: 0.00384104, Time: 0.4086 Steps: 187000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004320, Sample Num: 69120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00054505, Log Avg loss: 0.00000001, Global Avg Loss: 0.00383693, Time: 0.1372 Steps: 187200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004520, Sample Num: 72320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083654, Log Avg loss: 0.00713274, Global Avg Loss: 0.00384045, Time: 0.1595 Steps: 187400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004720, Sample Num: 75520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081323, Log Avg loss: 0.00028633, Global Avg Loss: 0.00383666, Time: 0.2647 Steps: 187600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 004920, Sample Num: 78720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00078017, Log Avg loss: 0.00000006, Global Avg Loss: 0.00383257, Time: 0.1435 Steps: 187800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 005120, Sample Num: 81920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00074971, Log Avg loss: 0.00000027, Global Avg Loss: 0.00382850, Time: 0.5125 Steps: 188000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005320, Sample Num: 85120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00072152, Log Avg loss: 0.00000004, Global Avg Loss: 0.00382443, Time: 0.0926 Steps: 188200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005520, Sample Num: 88320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00069538, Log Avg loss: 0.00000001, Global Avg Loss: 0.00382037, Time: 0.5088 Steps: 188400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005720, Sample Num: 91520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00069131, Log Avg loss: 0.00057891, Global Avg Loss: 0.00381693, Time: 0.2827 Steps: 188600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 005920, Sample Num: 94720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096456, Log Avg loss: 0.00877959, Global Avg Loss: 0.00382219, Time: 0.0727 Steps: 188800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006120, Sample Num: 97920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00111827, Log Avg loss: 0.00566793, Global Avg Loss: 0.00382414, Time: 0.2402 Steps: 189000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006320, Sample Num: 101120, Cur Loss: 0.00000042, Cur Avg Loss: 0.00108288, Log Avg loss: 0.00000012, Global Avg Loss: 0.00382010, Time: 0.0918 Steps: 189200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006520, Sample Num: 104320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104969, Log Avg loss: 0.00000087, Global Avg Loss: 0.00381607, Time: 0.0866 Steps: 189400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006720, Sample Num: 107520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101845, Log Avg loss: 0.00000005, Global Avg Loss: 0.00381204, Time: 0.6727 Steps: 189600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 006920, Sample Num: 110720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098902, Log Avg loss: 0.00000005, Global Avg Loss: 0.00380802, Time: 0.0645 Steps: 189800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 007120, Sample Num: 113920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096124, Log Avg loss: 0.00000000, Global Avg Loss: 0.00380402, Time: 0.3169 Steps: 190000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 007320, Sample Num: 117120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00093499, Log Avg loss: 0.00000053, Global Avg Loss: 0.00380002, Time: 0.1429 Steps: 190200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007520, Sample Num: 120320, Cur Loss: 0.00000010, Cur Avg Loss: 0.00100744, Log Avg loss: 0.00365912, Global Avg Loss: 0.00379987, Time: 0.1219 Steps: 190400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007720, Sample Num: 123520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098134, Log Avg loss: 0.00000007, Global Avg Loss: 0.00379588, Time: 0.0741 Steps: 190600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 007920, Sample Num: 126720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00128342, Log Avg loss: 0.01294362, Global Avg Loss: 0.00380547, Time: 0.4237 Steps: 190800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008120, Sample Num: 129920, Cur Loss: 0.00000018, Cur Avg Loss: 0.00125181, Log Avg loss: 0.00000013, Global Avg Loss: 0.00380149, Time: 0.0576 Steps: 191000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008320, Sample Num: 133120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00122172, Log Avg loss: 0.00000010, Global Avg Loss: 0.00379751, Time: 0.2522 Steps: 191200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008520, Sample Num: 136320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00119305, Log Avg loss: 0.00000011, Global Avg Loss: 0.00379354, Time: 0.1187 Steps: 191400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008720, Sample Num: 139520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00116568, Log Avg loss: 0.00000000, Global Avg Loss: 0.00378958, Time: 0.4241 Steps: 191600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 008920, Sample Num: 142720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00113955, Log Avg loss: 0.00000000, Global Avg Loss: 0.00378563, Time: 0.1614 Steps: 191800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 009120, Sample Num: 145920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00111456, Log Avg loss: 0.00000000, Global Avg Loss: 0.00378169, Time: 0.0701 Steps: 192000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 009320, Sample Num: 149120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109064, Log Avg loss: 0.00000001, Global Avg Loss: 0.00377775, Time: 0.4318 Steps: 192200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009520, Sample Num: 152320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00115764, Log Avg loss: 0.00428003, Global Avg Loss: 0.00377827, Time: 0.1431 Steps: 192400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009720, Sample Num: 155520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00113382, Log Avg loss: 0.00000000, Global Avg Loss: 0.00377435, Time: 0.1255 Steps: 192600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 009920, Sample Num: 158720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00111096, Log Avg loss: 0.00000001, Global Avg Loss: 0.00377043, Time: 0.2959 Steps: 192800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010120, Sample Num: 161920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00108901, Log Avg loss: 0.00000004, Global Avg Loss: 0.00376653, Time: 0.4235 Steps: 193000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010320, Sample Num: 165120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106790, Log Avg loss: 0.00000005, Global Avg Loss: 0.00376263, Time: 0.2614 Steps: 193200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010520, Sample Num: 168320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104761, Log Avg loss: 0.00000018, Global Avg Loss: 0.00375874, Time: 0.2628 Steps: 193400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010720, Sample Num: 171520, Cur Loss: 0.00000024, Cur Avg Loss: 0.00102806, Log Avg loss: 0.00000007, Global Avg Loss: 0.00375485, Time: 0.1415 Steps: 193600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 010920, Sample Num: 174720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100923, Log Avg loss: 0.00000003, Global Avg Loss: 0.00375098, Time: 0.2855 Steps: 193800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 011120, Sample Num: 177920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00107500, Log Avg loss: 0.00466600, Global Avg Loss: 0.00375192, Time: 0.1403 Steps: 194000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 011320, Sample Num: 181120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00105602, Log Avg loss: 0.00000050, Global Avg Loss: 0.00374806, Time: 0.2638 Steps: 194200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011520, Sample Num: 184320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00109818, Log Avg loss: 0.00348424, Global Avg Loss: 0.00374779, Time: 0.5018 Steps: 194400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011720, Sample Num: 187520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00107944, Log Avg loss: 0.00000003, Global Avg Loss: 0.00374394, Time: 0.2456 Steps: 194600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 011920, Sample Num: 190720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00106132, Log Avg loss: 0.00000000, Global Avg Loss: 0.00374009, Time: 0.3420 Steps: 194800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012120, Sample Num: 193920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00104381, Log Avg loss: 0.00000000, Global Avg Loss: 0.00373626, Time: 0.4173 Steps: 195000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012320, Sample Num: 197120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00102687, Log Avg loss: 0.00000013, Global Avg Loss: 0.00373243, Time: 0.1275 Steps: 195200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012520, Sample Num: 200320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101047, Log Avg loss: 0.00000005, Global Avg Loss: 0.00372861, Time: 0.5573 Steps: 195400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012720, Sample Num: 203520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101617, Log Avg loss: 0.00137305, Global Avg Loss: 0.00372620, Time: 0.3970 Steps: 195600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 012920, Sample Num: 206720, Cur Loss: 0.00000021, Cur Avg Loss: 0.00100044, Log Avg loss: 0.00000000, Global Avg Loss: 0.00372239, Time: 0.2402 Steps: 195800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 013120, Sample Num: 209920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00101461, Log Avg loss: 0.00192993, Global Avg Loss: 0.00372056, Time: 0.3975 Steps: 196000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 013320, Sample Num: 213120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00099937, Log Avg loss: 0.00000001, Global Avg Loss: 0.00371677, Time: 0.2479 Steps: 196200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013520, Sample Num: 216320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098460, Log Avg loss: 0.00000069, Global Avg Loss: 0.00371299, Time: 0.0911 Steps: 196400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013720, Sample Num: 219520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00097025, Log Avg loss: 0.00000001, Global Avg Loss: 0.00370921, Time: 0.3090 Steps: 196600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 013920, Sample Num: 222720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00098278, Log Avg loss: 0.00184263, Global Avg Loss: 0.00370731, Time: 0.1083 Steps: 196800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014120, Sample Num: 225920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00096886, Log Avg loss: 0.00000002, Global Avg Loss: 0.00370355, Time: 0.0932 Steps: 197000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014320, Sample Num: 229120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00095533, Log Avg loss: 0.00000006, Global Avg Loss: 0.00369979, Time: 0.1213 Steps: 197200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014520, Sample Num: 232320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00094217, Log Avg loss: 0.00000005, Global Avg Loss: 0.00369604, Time: 0.0491 Steps: 197400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014720, Sample Num: 235520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00092937, Log Avg loss: 0.00000003, Global Avg Loss: 0.00369230, Time: 0.6074 Steps: 197600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 014920, Sample Num: 238720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00091692, Log Avg loss: 0.00000028, Global Avg Loss: 0.00368857, Time: 0.2150 Steps: 197800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 015120, Sample Num: 241920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00090479, Log Avg loss: 0.00000001, Global Avg Loss: 0.00368485, Time: 0.1188 Steps: 198000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 015320, Sample Num: 245120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00089297, Log Avg loss: 0.00000000, Global Avg Loss: 0.00368113, Time: 0.1111 Steps: 198200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015520, Sample Num: 248320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00088147, Log Avg loss: 0.00000001, Global Avg Loss: 0.00367742, Time: 0.2816 Steps: 198400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015720, Sample Num: 251520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087025, Log Avg loss: 0.00000000, Global Avg Loss: 0.00367371, Time: 0.3883 Steps: 198600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 015920, Sample Num: 254720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00085932, Log Avg loss: 0.00000019, Global Avg Loss: 0.00367002, Time: 0.0634 Steps: 198800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016120, Sample Num: 257920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084867, Log Avg loss: 0.00000099, Global Avg Loss: 0.00366633, Time: 0.0857 Steps: 199000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016320, Sample Num: 261120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00083827, Log Avg loss: 0.00000003, Global Avg Loss: 0.00366265, Time: 0.1186 Steps: 199200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016520, Sample Num: 264320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082824, Log Avg loss: 0.00000964, Global Avg Loss: 0.00365898, Time: 0.1429 Steps: 199400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016720, Sample Num: 267520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081833, Log Avg loss: 0.00000003, Global Avg Loss: 0.00365532, Time: 0.1221 Steps: 199600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 016920, Sample Num: 270720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00080866, Log Avg loss: 0.00000005, Global Avg Loss: 0.00365166, Time: 0.1097 Steps: 199800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 017120, Sample Num: 273920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00085497, Log Avg loss: 0.00477247, Global Avg Loss: 0.00365278, Time: 0.1083 Steps: 200000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 017320, Sample Num: 277120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00084510, Log Avg loss: 0.00000006, Global Avg Loss: 0.00364913, Time: 0.4222 Steps: 200200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017520, Sample Num: 280320, Cur Loss: 0.00000001, Cur Avg Loss: 0.00083545, Log Avg loss: 0.00000000, Global Avg Loss: 0.00364549, Time: 0.5211 Steps: 200400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017720, Sample Num: 283520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00082602, Log Avg loss: 0.00000002, Global Avg Loss: 0.00364185, Time: 0.0903 Steps: 200600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 017920, Sample Num: 286720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00081680, Log Avg loss: 0.00000022, Global Avg Loss: 0.00363823, Time: 0.6071 Steps: 200800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018120, Sample Num: 289920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00080779, Log Avg loss: 0.00000001, Global Avg Loss: 0.00363461, Time: 0.1289 Steps: 201000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018320, Sample Num: 293120, Cur Loss: 0.00000047, Cur Avg Loss: 0.00079897, Log Avg loss: 0.00000000, Global Avg Loss: 0.00363099, Time: 0.4223 Steps: 201200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018520, Sample Num: 296320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00079034, Log Avg loss: 0.00000010, Global Avg Loss: 0.00362739, Time: 0.4014 Steps: 201400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018720, Sample Num: 299520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00078190, Log Avg loss: 0.00000002, Global Avg Loss: 0.00362379, Time: 0.1285 Steps: 201600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 018920, Sample Num: 302720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00077366, Log Avg loss: 0.00000207, Global Avg Loss: 0.00362020, Time: 0.2596 Steps: 201800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 019120, Sample Num: 305920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00076556, Log Avg loss: 0.00000001, Global Avg Loss: 0.00361662, Time: 0.0664 Steps: 202000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 019320, Sample Num: 309120, Cur Loss: 0.00000000, Cur Avg Loss: 0.00075764, Log Avg loss: 0.00000000, Global Avg Loss: 0.00361304, Time: 0.1559 Steps: 202200, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019520, Sample Num: 312320, Cur Loss: 0.00000000, Cur Avg Loss: 0.00074988, Log Avg loss: 0.00000001, Global Avg Loss: 0.00360947, Time: 0.3582 Steps: 202400, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019720, Sample Num: 315520, Cur Loss: 0.00000000, Cur Avg Loss: 0.00074229, Log Avg loss: 0.00000148, Global Avg Loss: 0.00360591, Time: 0.3458 Steps: 202600, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 019920, Sample Num: 318720, Cur Loss: 0.00000000, Cur Avg Loss: 0.00073483, Log Avg loss: 0.00000000, Global Avg Loss: 0.00360235, Time: 0.4049 Steps: 202800, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 020120, Sample Num: 321920, Cur Loss: 0.00000000, Cur Avg Loss: 0.00072753, Log Avg loss: 0.00000002, Global Avg Loss: 0.00359880, Time: 0.0860 Steps: 203000, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 020320, Sample Num: 325113, Cur Loss: 0.00000000, Cur Avg Loss: 0.00074498, Log Avg loss: 0.00250060, Global Avg Loss: 0.00359772, Time: 0.0547 Steps: 203200, Updated lr: 0.000000 ***** Running evaluation checkpoint-203200 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## ***** Running testing checkpoint-203200 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## Epoch Time: 4193.335958, Avg time per batch (s): 0.210000 {"eval_avg_loss": 0.023738, "eval_total_loss": 64.329487, "eval_acc": 0.998224, "eval_prec": 0.99871, "eval_recall": 0.997744, "eval_f1": 0.998227, "eval_roc_auc": 0.999696, "eval_pr_auc": 0.999446, "eval_confusion_matrix": {"tn": 21599, "fp": 28, "fn": 49, "tp": 21673}, "eval_mcc2": 0.996448, "eval_mcc": 0.996448, "eval_sn": 0.997744, "eval_sp": 0.998705, "update_flag": false, "test_avg_loss": 0.018293, "test_total_loss": 74.340901, "test_acc": 0.998585, "test_prec": 0.9988, "test_recall": 0.998369, "test_f1": 0.998585, "test_roc_auc": 0.99981, "test_pr_auc": 0.999658, "test_confusion_matrix": {"tn": 32478, "fp": 39, "fn": 53, "tp": 32452}, "test_mcc2": 0.99717, "test_mcc": 0.99717, "test_sn": 0.998369, "test_sp": 0.998801, "lr": 0.0, "cur_epoch_step": 20320, "train_global_avg_loss": 0.003597721067995114, "train_cur_epoch_loss": 15.137986788084394, "train_cur_epoch_avg_loss": 0.0007449796647679328, "train_cur_epoch_time": 4193.335958480835, "train_cur_epoch_avg_time": 0.20636495858665527, "epoch": 10, "step": 203200} ################################################## #########################Best Metric######################### {"epoch": 2, "global_step": 40640, "eval_avg_loss": 0.009136, "eval_total_loss": 24.75942, "eval_acc": 0.998316, "eval_prec": 0.998664, "eval_recall": 0.997974, "eval_f1": 0.998319, "eval_roc_auc": 0.999918, "eval_pr_auc": 0.999922, "eval_confusion_matrix": {"tn": 21598, "fp": 29, "fn": 44, "tp": 21678}, "eval_mcc2": 0.996632, "eval_mcc": 0.996632, "eval_sn": 0.997974, "eval_sp": 0.998659, "update_flag": true, "test_avg_loss": 0.008006, "test_total_loss": 32.538096, "test_acc": 0.998431, "test_prec": 0.998584, "test_recall": 0.998277, "test_f1": 0.998431, "test_roc_auc": 0.999939, "test_pr_auc": 0.999945, "test_confusion_matrix": {"tn": 32471, "fp": 46, "fn": 56, "tp": 32449}, "test_mcc2": 0.996863, "test_mcc": 0.996863, "test_sn": 0.998277, "test_sp": 0.998585} ################################################## Total Time: 722156.922724, Avg time per epoch(10 epochs): 72215.690000 ++++++++++++Validation+++++++++++++ best acc global step: 40640 checkpoint path: ../models/ViralCapsid/protein/binary_class/luca_base/seq_matrix/20250106024245/checkpoint-40640 ***** Running evaluation checkpoint-40640 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 43349 ################################################## {"evaluation_avg_loss_40640": 0.009136, "evaluation_total_loss_40640": 24.75942, "evaluation_acc_40640": 0.998316, "evaluation_prec_40640": 0.998664, "evaluation_recall_40640": 0.997974, "evaluation_f1_40640": 0.998319, "evaluation_roc_auc_40640": 0.999918, "evaluation_pr_auc_40640": 0.999922, "evaluation_confusion_matrix_40640": {"tn": 21598, "fp": 29, "fn": 44, "tp": 21678}, "evaluation_mcc2_40640": 0.996632, "evaluation_mcc_40640": 0.996632, "evaluation_sn_40640": 0.997974, "evaluation_sp_40640": 0.998659} ++++++++++++Testing+++++++++++++ best acc global step: 40640 checkpoint path: ../models/ViralCapsid/protein/binary_class/luca_base/seq_matrix/20250106024245/checkpoint-40640 ***** Running testing checkpoint-40640 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [65022] ################################################## {"evaluation_avg_loss_40640": 0.008006, "evaluation_total_loss_40640": 32.538096, "evaluation_acc_40640": 0.998431, "evaluation_prec_40640": 0.998584, "evaluation_recall_40640": 0.998277, "evaluation_f1_40640": 0.998431, "evaluation_roc_auc_40640": 0.999939, "evaluation_pr_auc_40640": 0.999945, "evaluation_confusion_matrix_40640": {"tn": 32471, "fp": 46, "fn": 56, "tp": 32449}, "evaluation_mcc2_40640": 0.996863, "evaluation_mcc_40640": 0.996863, "evaluation_sn_40640": 0.998277, "evaluation_sp_40640": 0.998585}