{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "f1", "beta1": 0.9, "beta2": 0.98, "buffer_size": 1024, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 256, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "RdRP", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/RdRP/protein/binary_class/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 2560, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "seq_matrix", "intermediate_size": 4096, "label_filepath": "../dataset/RdRP/protein/binary_class/label.txt", "label_size": 2, "label_type": "RdRP", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": "../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000", "llm_step": "3800000", "llm_task_level": "token_level,span_level,seq_level", "llm_time_str": "20240815023346", "llm_type": "lucaone_virus", "llm_version": "v1.0", "local_rank": -1, "log_dir": "../logs/RdRP/protein/binary_class/luca_base/seq_matrix/20241227183207", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": false, "matrix_dirpath": "../matrices/RdRP/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": false, "no_token_type_embeddings": true, "non_ignore": false, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": true, "not_seq_encoder_shared": false, "num_attention_heads": 4, "num_hidden_layers": 2, "num_train_epochs": 10, "output_dir": "../models/RdRP/protein/binary_class/luca_base/seq_matrix/20241227183207", "output_mode": "binary_class", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 40.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "128", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "binary_class", "tb_log_dir": "../tb-logs/RdRP/protein/binary_class/luca_base/seq_matrix/20241227183207", "test_data_dir": "../dataset/RdRP/protein/binary_class/test/", "time_str": "20241227183210", "train_data_dir": "../dataset/RdRP/protein/binary_class/train/", "trunc_type": "right", "vector_dirpath": "../vectors/RdRP/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 200, "weight": null, "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,seq,embedding_matrix ################################################## Encoder Config: {'llm_type': 'lucaone_virus', 'llm_version': 'v1.0', 'llm_step': '3800000', 'llm_dirpath': '../llm/models/lucaone_virus/v1.0/token_level,span_level,seq_level/lucaone_virus/20240815023346/checkpoint-step3800000', 'input_type': 'seq_matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/RdRP/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'matrix_dirpath': '../matrices/RdRP/protein/binary_class/luca_base/v1.0/lucaone_virus/20240815023346/3800000', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': False, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': False, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 256, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 2560, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4096, "no_position_embeddings": true, "no_token_embeddings": false, "no_token_type_embeddings": true, "num_attention_heads": 4, "num_hidden_layers": 2, "pad_token_id": 0, "pos_weight": 40.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": [ 128 ], "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.29.0", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39 } ################################################## Mode Architecture: LucaBase( (seq_encoder): LucaTransformer( (embeddings): LucaEmbeddings( (word_embeddings): Embedding(39, 1024, padding_idx=0) (dropout): Dropout(p=0.1, inplace=False) ) (encoder): ModuleList( (0): LucaTransformerLayer( (pre_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (self_attn): LucaMultiHeadAttention( (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) (rot_emb): RotaryEmbedding() ) (post_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) ) (1): LucaTransformerLayer( (pre_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (self_attn): LucaMultiHeadAttention( (k_proj): Linear(in_features=1024, out_features=1024, bias=True) (v_proj): Linear(in_features=1024, out_features=1024, bias=True) (q_proj): Linear(in_features=1024, out_features=1024, bias=True) (out_proj): Linear(in_features=1024, out_features=1024, bias=True) (rot_emb): RotaryEmbedding() ) (post_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=1024, out_features=4096, bias=True) (fc2): Linear(in_features=4096, out_features=1024, bias=True) ) ) (last_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) ) (seq_pooler): GlobalMaskValueAttentionPooling1D (1024 -> 1024) (matrix_pooler): GlobalMaskValueAttentionPooling1D (2560 -> 2560) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=1024, out_features=128, bias=True) (1): GELU(approximate='none') ) (1): ModuleList( (0): Linear(in_features=2560, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=256, out_features=256, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=256, out_features=1, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 48566017 ################################################## {"total_num": "46.320000M", "total_size": "185.270000MB", "param_sum": "46.320000M", "param_size": "185.260000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "46.316163M", "trainable_size": "185.264652MB"} ################################################## Train dataset len: 190846, batch size: 16, batch num: 11928 Train dataset t_total: 119280, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 190846 Train Dataset Num Epochs = 10 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 119280 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.03179338, Cur Avg Loss: 2.06935861, Log Avg loss: 2.06935861, Global Avg Loss: 2.06935861, Time: 0.2459 Steps: 200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.00404397, Cur Avg Loss: 1.73170381, Log Avg loss: 1.39404901, Global Avg Loss: 1.73170381, Time: 0.3225 Steps: 400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.00847973, Cur Avg Loss: 1.18980521, Log Avg loss: 0.10600803, Global Avg Loss: 1.18980521, Time: 0.7788 Steps: 600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.00002867, Cur Avg Loss: 0.92019240, Log Avg loss: 0.11135397, Global Avg Loss: 0.92019240, Time: 0.7697 Steps: 800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.00442904, Cur Avg Loss: 0.81010501, Log Avg loss: 0.36975543, Global Avg Loss: 0.81010501, Time: 0.4907 Steps: 1000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.00001568, Cur Avg Loss: 0.72250030, Log Avg loss: 0.28447677, Global Avg Loss: 0.72250030, Time: 0.3216 Steps: 1200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.00010905, Cur Avg Loss: 0.64585600, Log Avg loss: 0.18599022, Global Avg Loss: 0.64585600, Time: 0.2100 Steps: 1400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00023976, Cur Avg Loss: 0.56780964, Log Avg loss: 0.02148508, Global Avg Loss: 0.56780964, Time: 0.3937 Steps: 1600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.00003552, Cur Avg Loss: 0.51849755, Log Avg loss: 0.12400080, Global Avg Loss: 0.51849755, Time: 0.2489 Steps: 1800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 8.42113018, Cur Avg Loss: 0.47136631, Log Avg loss: 0.04718516, Global Avg Loss: 0.47136631, Time: 0.3535 Steps: 2000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.00003040, Cur Avg Loss: 0.45058419, Log Avg loss: 0.24276303, Global Avg Loss: 0.45058419, Time: 0.4146 Steps: 2200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.00000042, Cur Avg Loss: 0.41338077, Log Avg loss: 0.00414311, Global Avg Loss: 0.41338077, Time: 0.1570 Steps: 2400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00002092, Cur Avg Loss: 0.39037835, Log Avg loss: 0.11434931, Global Avg Loss: 0.39037835, Time: 0.3909 Steps: 2600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00000370, Cur Avg Loss: 0.37047997, Log Avg loss: 0.11180107, Global Avg Loss: 0.37047997, Time: 0.3873 Steps: 2800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00000775, Cur Avg Loss: 0.35042404, Log Avg loss: 0.06964094, Global Avg Loss: 0.35042404, Time: 0.3845 Steps: 3000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00005087, Cur Avg Loss: 0.33609973, Log Avg loss: 0.12123512, Global Avg Loss: 0.33609973, Time: 1.4863 Steps: 3200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.00000250, Cur Avg Loss: 0.31718994, Log Avg loss: 0.01463335, Global Avg Loss: 0.31718994, Time: 0.5586 Steps: 3400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.00000745, Cur Avg Loss: 0.31397837, Log Avg loss: 0.25938173, Global Avg Loss: 0.31397837, Time: 0.3777 Steps: 3600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.00078610, Cur Avg Loss: 0.29764101, Log Avg loss: 0.00356848, Global Avg Loss: 0.29764101, Time: 0.2486 Steps: 3800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00000793, Cur Avg Loss: 0.28428339, Log Avg loss: 0.03048855, Global Avg Loss: 0.28428339, Time: 0.2366 Steps: 4000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.00000107, Cur Avg Loss: 0.27161998, Log Avg loss: 0.01835173, Global Avg Loss: 0.27161998, Time: 0.3390 Steps: 4200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00034398, Cur Avg Loss: 0.26337233, Log Avg loss: 0.09017187, Global Avg Loss: 0.26337233, Time: 0.7494 Steps: 4400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00000000, Cur Avg Loss: 0.25192238, Log Avg loss: 0.00002348, Global Avg Loss: 0.25192238, Time: 0.3995 Steps: 4600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.00000000, Cur Avg Loss: 0.24200474, Log Avg loss: 0.01389903, Global Avg Loss: 0.24200474, Time: 0.2622 Steps: 4800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005000, Sample Num: 80000, Cur Loss: 0.00000155, Cur Avg Loss: 0.23868088, Log Avg loss: 0.15890812, Global Avg Loss: 0.23868088, Time: 0.7093 Steps: 5000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00000513, Cur Avg Loss: 0.23186764, Log Avg loss: 0.06153661, Global Avg Loss: 0.23186764, Time: 0.4391 Steps: 5200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00000328, Cur Avg Loss: 0.22935586, Log Avg loss: 0.16404959, Global Avg Loss: 0.22935586, Time: 0.2985 Steps: 5400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005600, Sample Num: 89600, Cur Loss: 0.00000000, Cur Avg Loss: 0.22118636, Log Avg loss: 0.00061000, Global Avg Loss: 0.22118636, Time: 0.3778 Steps: 5600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 005800, Sample Num: 92800, Cur Loss: 0.00000000, Cur Avg Loss: 0.21367775, Log Avg loss: 0.00343650, Global Avg Loss: 0.21367775, Time: 0.6286 Steps: 5800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006000, Sample Num: 96000, Cur Loss: 0.00000006, Cur Avg Loss: 0.20773835, Log Avg loss: 0.03549593, Global Avg Loss: 0.20773835, Time: 0.6409 Steps: 6000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006200, Sample Num: 99200, Cur Loss: 0.00000834, Cur Avg Loss: 0.20719088, Log Avg loss: 0.19076673, Global Avg Loss: 0.20719088, Time: 0.2720 Steps: 6200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00000060, Cur Avg Loss: 0.20080224, Log Avg loss: 0.00275434, Global Avg Loss: 0.20080224, Time: 0.3032 Steps: 6400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006600, Sample Num: 105600, Cur Loss: 0.00000018, Cur Avg Loss: 0.19755758, Log Avg loss: 0.09372832, Global Avg Loss: 0.19755758, Time: 1.4090 Steps: 6600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006800, Sample Num: 108800, Cur Loss: 0.00000113, Cur Avg Loss: 0.19183086, Log Avg loss: 0.00284918, Global Avg Loss: 0.19183086, Time: 0.6938 Steps: 6800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00000012, Cur Avg Loss: 0.18637167, Log Avg loss: 0.00075923, Global Avg Loss: 0.18637167, Time: 0.2995 Steps: 7000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007200, Sample Num: 115200, Cur Loss: 0.00000083, Cur Avg Loss: 0.18223173, Log Avg loss: 0.03733386, Global Avg Loss: 0.18223173, Time: 0.3332 Steps: 7200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00000000, Cur Avg Loss: 0.17731120, Log Avg loss: 0.00017208, Global Avg Loss: 0.17731120, Time: 0.3235 Steps: 7400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00000000, Cur Avg Loss: 0.17270407, Log Avg loss: 0.00224046, Global Avg Loss: 0.17270407, Time: 0.6582 Steps: 7600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007800, Sample Num: 124800, Cur Loss: 0.00000030, Cur Avg Loss: 0.17519256, Log Avg loss: 0.26975495, Global Avg Loss: 0.17519256, Time: 0.2484 Steps: 7800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00000101, Cur Avg Loss: 0.17258709, Log Avg loss: 0.07097386, Global Avg Loss: 0.17258709, Time: 0.7934 Steps: 8000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00000542, Cur Avg Loss: 0.16853950, Log Avg loss: 0.00663604, Global Avg Loss: 0.16853950, Time: 0.7521 Steps: 8200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008400, Sample Num: 134400, Cur Loss: 0.00038770, Cur Avg Loss: 0.16819075, Log Avg loss: 0.15389170, Global Avg Loss: 0.16819075, Time: 0.7556 Steps: 8400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008600, Sample Num: 137600, Cur Loss: 0.01460145, Cur Avg Loss: 0.16839272, Log Avg loss: 0.17687554, Global Avg Loss: 0.16839272, Time: 1.1305 Steps: 8600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00001436, Cur Avg Loss: 0.16755490, Log Avg loss: 0.13152883, Global Avg Loss: 0.16755490, Time: 0.2485 Steps: 8800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00000161, Cur Avg Loss: 0.16391004, Log Avg loss: 0.00353608, Global Avg Loss: 0.16391004, Time: 0.2874 Steps: 9000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00000691, Cur Avg Loss: 0.16390583, Log Avg loss: 0.16371617, Global Avg Loss: 0.16390583, Time: 0.4225 Steps: 9200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00000393, Cur Avg Loss: 0.16237860, Log Avg loss: 0.09212640, Global Avg Loss: 0.16237860, Time: 0.3777 Steps: 9400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00000459, Cur Avg Loss: 0.15906500, Log Avg loss: 0.00332577, Global Avg Loss: 0.15906500, Time: 0.2711 Steps: 9600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009800, Sample Num: 156800, Cur Loss: 0.00000113, Cur Avg Loss: 0.15892253, Log Avg loss: 0.15208373, Global Avg Loss: 0.15892253, Time: 0.6649 Steps: 9800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00000077, Cur Avg Loss: 0.15638746, Log Avg loss: 0.03216921, Global Avg Loss: 0.15638746, Time: 0.3205 Steps: 10000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010200, Sample Num: 163200, Cur Loss: 0.00000018, Cur Avg Loss: 0.15332624, Log Avg loss: 0.00026508, Global Avg Loss: 0.15332624, Time: 0.3328 Steps: 10200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010400, Sample Num: 166400, Cur Loss: 0.00000000, Cur Avg Loss: 0.15079278, Log Avg loss: 0.02158627, Global Avg Loss: 0.15079278, Time: 0.7528 Steps: 10400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00000304, Cur Avg Loss: 0.14798910, Log Avg loss: 0.00219789, Global Avg Loss: 0.14798910, Time: 0.2060 Steps: 10600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00000024, Cur Avg Loss: 0.14822730, Log Avg loss: 0.16085164, Global Avg Loss: 0.14822730, Time: 0.5473 Steps: 10800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00000262, Cur Avg Loss: 0.14594737, Log Avg loss: 0.02283163, Global Avg Loss: 0.14594737, Time: 0.2851 Steps: 11000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00000226, Cur Avg Loss: 0.14378276, Log Avg loss: 0.02472875, Global Avg Loss: 0.14378276, Time: 0.2983 Steps: 11200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00000000, Cur Avg Loss: 0.14127536, Log Avg loss: 0.00086124, Global Avg Loss: 0.14127536, Time: 0.5885 Steps: 11400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00000000, Cur Avg Loss: 0.13884140, Log Avg loss: 0.00010541, Global Avg Loss: 0.13884140, Time: 0.3266 Steps: 11600, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 011800, Sample Num: 188800, Cur Loss: 0.00000340, Cur Avg Loss: 0.13761051, Log Avg loss: 0.06621903, Global Avg Loss: 0.13761051, Time: 1.1231 Steps: 11800, Updated lr: 0.000090 ***** Running evaluation checkpoint-11928 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-11928 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 6198.576749, Avg time per batch (s): 0.520000 {"eval_avg_loss": 0.121892, "eval_total_loss": 169.795551, "eval_acc": 0.999417, "eval_prec": 0.99811, "eval_recall": 0.977778, "eval_f1": 0.987839, "eval_roc_auc": 0.999943, "eval_pr_auc": 0.998818, "eval_confusion_matrix": {"tn": 21743, "fp": 1, "fn": 12, "tp": 528}, "eval_mcc2": 0.987595, "eval_mcc": 0.987595, "eval_sn": 0.977778, "eval_sp": 0.999954, "update_flag": true, "test_avg_loss": 0.17564, "test_total_loss": 244.666763, "test_acc": 0.999102, "test_prec": 1.0, "test_recall": 0.962963, "test_f1": 0.981132, "test_roc_auc": 0.999993, "test_pr_auc": 0.999729, "test_confusion_matrix": {"tn": 21744, "fp": 0, "fn": 20, "tp": 520}, "test_mcc2": 0.980856, "test_mcc": 0.980856, "test_sn": 0.962963, "test_sp": 1.0, "lr": 9.015115888478334e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.13617299084249723, "train_cur_epoch_loss": 1624.271434769307, "train_cur_epoch_avg_loss": 0.13617299084249723, "train_cur_epoch_time": 6198.576748847961, "train_cur_epoch_avg_time": 0.519666058756536, "epoch": 1, "step": 11928} ################################################## Training, Epoch: 0002, Batch: 000072, Sample Num: 1152, Cur Loss: 0.00010490, Cur Avg Loss: 0.00330430, Log Avg loss: 0.00352668, Global Avg Loss: 0.13537578, Time: 0.2632 Steps: 12000, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000272, Sample Num: 4352, Cur Loss: 0.00000042, Cur Avg Loss: 0.00114344, Log Avg loss: 0.00036553, Global Avg Loss: 0.13316250, Time: 0.7132 Steps: 12200, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000472, Sample Num: 7552, Cur Loss: 0.00000030, Cur Avg Loss: 0.01879556, Log Avg loss: 0.04280245, Global Avg Loss: 0.13170508, Time: 0.5861 Steps: 12400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000672, Sample Num: 10752, Cur Loss: 0.00002337, Cur Avg Loss: 0.01321022, Log Avg loss: 0.00002882, Global Avg Loss: 0.12961498, Time: 0.7212 Steps: 12600, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000872, Sample Num: 13952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01328215, Log Avg loss: 0.01352384, Global Avg Loss: 0.12780105, Time: 0.4006 Steps: 12800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001072, Sample Num: 17152, Cur Loss: 16.28421021, Cur Avg Loss: 0.06672939, Log Avg loss: 0.29975935, Global Avg Loss: 0.13044656, Time: 0.3381 Steps: 13000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001272, Sample Num: 20352, Cur Loss: 0.00000000, Cur Avg Loss: 0.06051110, Log Avg loss: 0.02718104, Global Avg Loss: 0.12888194, Time: 0.6066 Steps: 13200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001472, Sample Num: 23552, Cur Loss: 0.00000000, Cur Avg Loss: 0.05562081, Log Avg loss: 0.02451861, Global Avg Loss: 0.12732427, Time: 0.3168 Steps: 13400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001672, Sample Num: 26752, Cur Loss: 0.00000185, Cur Avg Loss: 0.05894481, Log Avg loss: 0.08340941, Global Avg Loss: 0.12667847, Time: 0.3887 Steps: 13600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001872, Sample Num: 29952, Cur Loss: 0.00000024, Cur Avg Loss: 0.05265894, Log Avg loss: 0.00010911, Global Avg Loss: 0.12484413, Time: 0.4852 Steps: 13800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002072, Sample Num: 33152, Cur Loss: 0.00000101, Cur Avg Loss: 0.04834465, Log Avg loss: 0.00796291, Global Avg Loss: 0.12317440, Time: 0.2537 Steps: 14000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002272, Sample Num: 36352, Cur Loss: 0.00000000, Cur Avg Loss: 0.05049232, Log Avg loss: 0.07274211, Global Avg Loss: 0.12246408, Time: 0.7081 Steps: 14200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002472, Sample Num: 39552, Cur Loss: 0.00000042, Cur Avg Loss: 0.05188109, Log Avg loss: 0.06765761, Global Avg Loss: 0.12170288, Time: 0.3632 Steps: 14400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002672, Sample Num: 42752, Cur Loss: 0.00000602, Cur Avg Loss: 0.06159618, Log Avg loss: 0.18167463, Global Avg Loss: 0.12252441, Time: 0.2639 Steps: 14600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002872, Sample Num: 45952, Cur Loss: 0.00000012, Cur Avg Loss: 0.05730971, Log Avg loss: 0.00004254, Global Avg Loss: 0.12086925, Time: 0.3213 Steps: 14800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003072, Sample Num: 49152, Cur Loss: 0.00000000, Cur Avg Loss: 0.05381637, Log Avg loss: 0.00365193, Global Avg Loss: 0.11930635, Time: 0.2384 Steps: 15000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003272, Sample Num: 52352, Cur Loss: 0.00000018, Cur Avg Loss: 0.05053622, Log Avg loss: 0.00015315, Global Avg Loss: 0.11773855, Time: 0.5463 Steps: 15200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003472, Sample Num: 55552, Cur Loss: 0.00000000, Cur Avg Loss: 0.04781707, Log Avg loss: 0.00333179, Global Avg Loss: 0.11625275, Time: 0.2471 Steps: 15400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003672, Sample Num: 58752, Cur Loss: 0.00000000, Cur Avg Loss: 0.06389736, Log Avg loss: 0.34305114, Global Avg Loss: 0.11916042, Time: 0.4089 Steps: 15600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003872, Sample Num: 61952, Cur Loss: 0.00000048, Cur Avg Loss: 0.06528490, Log Avg loss: 0.09076024, Global Avg Loss: 0.11880092, Time: 0.7980 Steps: 15800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004072, Sample Num: 65152, Cur Loss: 0.00000030, Cur Avg Loss: 0.06215188, Log Avg loss: 0.00149660, Global Avg Loss: 0.11733462, Time: 0.3219 Steps: 16000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004272, Sample Num: 68352, Cur Loss: 0.00007015, Cur Avg Loss: 0.06038553, Log Avg loss: 0.02442253, Global Avg Loss: 0.11618756, Time: 0.9896 Steps: 16200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004472, Sample Num: 71552, Cur Loss: 0.00000006, Cur Avg Loss: 0.05768823, Log Avg loss: 0.00007393, Global Avg Loss: 0.11477154, Time: 0.7822 Steps: 16400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 004672, Sample Num: 74752, Cur Loss: 0.00000000, Cur Avg Loss: 0.05521874, Log Avg loss: 0.00000088, Global Avg Loss: 0.11338876, Time: 0.2046 Steps: 16600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 004872, Sample Num: 77952, Cur Loss: 0.00000000, Cur Avg Loss: 0.05525073, Log Avg loss: 0.05599806, Global Avg Loss: 0.11270553, Time: 0.3703 Steps: 16800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005072, Sample Num: 81152, Cur Loss: 0.00000000, Cur Avg Loss: 0.05422912, Log Avg loss: 0.02934275, Global Avg Loss: 0.11172480, Time: 0.3394 Steps: 17000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005272, Sample Num: 84352, Cur Loss: 0.00000012, Cur Avg Loss: 0.05443789, Log Avg loss: 0.05973230, Global Avg Loss: 0.11112023, Time: 0.4135 Steps: 17200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005472, Sample Num: 87552, Cur Loss: 0.00000095, Cur Avg Loss: 0.05245078, Log Avg loss: 0.00007052, Global Avg Loss: 0.10984380, Time: 0.4705 Steps: 17400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005672, Sample Num: 90752, Cur Loss: 0.00000000, Cur Avg Loss: 0.05060144, Log Avg loss: 0.00000341, Global Avg Loss: 0.10859561, Time: 0.3222 Steps: 17600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 005872, Sample Num: 93952, Cur Loss: 0.00000012, Cur Avg Loss: 0.04934580, Log Avg loss: 0.01373585, Global Avg Loss: 0.10752977, Time: 1.5640 Steps: 17800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006072, Sample Num: 97152, Cur Loss: 0.00000334, Cur Avg Loss: 0.04772133, Log Avg loss: 0.00002712, Global Avg Loss: 0.10633530, Time: 1.5633 Steps: 18000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006272, Sample Num: 100352, Cur Loss: 0.00000066, Cur Avg Loss: 0.05241023, Log Avg loss: 0.19476500, Global Avg Loss: 0.10730705, Time: 0.6085 Steps: 18200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006472, Sample Num: 103552, Cur Loss: 0.00000006, Cur Avg Loss: 0.05084973, Log Avg loss: 0.00191270, Global Avg Loss: 0.10616146, Time: 0.7059 Steps: 18400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006672, Sample Num: 106752, Cur Loss: 0.00000000, Cur Avg Loss: 0.04945518, Log Avg loss: 0.00432736, Global Avg Loss: 0.10506647, Time: 0.3434 Steps: 18600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006872, Sample Num: 109952, Cur Loss: 0.00000000, Cur Avg Loss: 0.04802813, Log Avg loss: 0.00042180, Global Avg Loss: 0.10395323, Time: 0.3897 Steps: 18800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007072, Sample Num: 113152, Cur Loss: 0.00000006, Cur Avg Loss: 0.04773069, Log Avg loss: 0.03751054, Global Avg Loss: 0.10325383, Time: 0.5854 Steps: 19000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007272, Sample Num: 116352, Cur Loss: 0.00000000, Cur Avg Loss: 0.04641828, Log Avg loss: 0.00001173, Global Avg Loss: 0.10217840, Time: 0.4081 Steps: 19200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007472, Sample Num: 119552, Cur Loss: 0.00000000, Cur Avg Loss: 0.04518721, Log Avg loss: 0.00042539, Global Avg Loss: 0.10112940, Time: 0.1540 Steps: 19400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007672, Sample Num: 122752, Cur Loss: 0.00000000, Cur Avg Loss: 0.04401557, Log Avg loss: 0.00024299, Global Avg Loss: 0.10009994, Time: 0.7974 Steps: 19600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007872, Sample Num: 125952, Cur Loss: 0.00000352, Cur Avg Loss: 0.04833086, Log Avg loss: 0.21386539, Global Avg Loss: 0.10124909, Time: 0.3712 Steps: 19800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 008072, Sample Num: 129152, Cur Loss: 0.00000000, Cur Avg Loss: 0.04723502, Log Avg loss: 0.00410265, Global Avg Loss: 0.10027762, Time: 0.1652 Steps: 20000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008272, Sample Num: 132352, Cur Loss: 0.00000000, Cur Avg Loss: 0.04658763, Log Avg loss: 0.02045930, Global Avg Loss: 0.09948734, Time: 0.2619 Steps: 20200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008472, Sample Num: 135552, Cur Loss: 0.00000113, Cur Avg Loss: 0.04990573, Log Avg loss: 0.18714210, Global Avg Loss: 0.10034670, Time: 0.5980 Steps: 20400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008672, Sample Num: 138752, Cur Loss: 0.00000036, Cur Avg Loss: 0.04880365, Log Avg loss: 0.00211952, Global Avg Loss: 0.09939304, Time: 0.2843 Steps: 20600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008872, Sample Num: 141952, Cur Loss: 0.00000119, Cur Avg Loss: 0.04772408, Log Avg loss: 0.00091408, Global Avg Loss: 0.09844613, Time: 0.2832 Steps: 20800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 009072, Sample Num: 145152, Cur Loss: 0.00000018, Cur Avg Loss: 0.04667320, Log Avg loss: 0.00005631, Global Avg Loss: 0.09750908, Time: 0.3425 Steps: 21000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 009272, Sample Num: 148352, Cur Loss: 0.00000000, Cur Avg Loss: 0.04637573, Log Avg loss: 0.03288240, Global Avg Loss: 0.09689940, Time: 0.4004 Steps: 21200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009472, Sample Num: 151552, Cur Loss: 0.00000042, Cur Avg Loss: 0.04578925, Log Avg loss: 0.01859972, Global Avg Loss: 0.09616762, Time: 0.7953 Steps: 21400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009672, Sample Num: 154752, Cur Loss: 0.00000006, Cur Avg Loss: 0.04486672, Log Avg loss: 0.00117609, Global Avg Loss: 0.09528807, Time: 0.3896 Steps: 21600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009872, Sample Num: 157952, Cur Loss: 0.00000042, Cur Avg Loss: 0.04630243, Log Avg loss: 0.11573321, Global Avg Loss: 0.09547564, Time: 0.7898 Steps: 21800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010072, Sample Num: 161152, Cur Loss: 0.00000000, Cur Avg Loss: 0.04538474, Log Avg loss: 0.00008748, Global Avg Loss: 0.09460848, Time: 0.4305 Steps: 22000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010272, Sample Num: 164352, Cur Loss: 0.00000030, Cur Avg Loss: 0.04450201, Log Avg loss: 0.00004781, Global Avg Loss: 0.09375658, Time: 1.5628 Steps: 22200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010472, Sample Num: 167552, Cur Loss: 0.00003225, Cur Avg Loss: 0.04368785, Log Avg loss: 0.00187268, Global Avg Loss: 0.09293619, Time: 0.2998 Steps: 22400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 010672, Sample Num: 170752, Cur Loss: 0.00000000, Cur Avg Loss: 0.04291872, Log Avg loss: 0.00264682, Global Avg Loss: 0.09213717, Time: 0.6942 Steps: 22600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 010872, Sample Num: 173952, Cur Loss: 0.00000000, Cur Avg Loss: 0.04212931, Log Avg loss: 0.00000639, Global Avg Loss: 0.09132900, Time: 0.3334 Steps: 22800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011072, Sample Num: 177152, Cur Loss: 0.00000006, Cur Avg Loss: 0.04144395, Log Avg loss: 0.00418777, Global Avg Loss: 0.09057125, Time: 0.4994 Steps: 23000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011272, Sample Num: 180352, Cur Loss: 0.00000000, Cur Avg Loss: 0.04072818, Log Avg loss: 0.00110323, Global Avg Loss: 0.08979998, Time: 0.7385 Steps: 23200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011472, Sample Num: 183552, Cur Loss: 0.00000000, Cur Avg Loss: 0.04001815, Log Avg loss: 0.00000089, Global Avg Loss: 0.08903246, Time: 0.7889 Steps: 23400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011672, Sample Num: 186752, Cur Loss: 0.00000000, Cur Avg Loss: 0.03933245, Log Avg loss: 0.00000076, Global Avg Loss: 0.08827796, Time: 0.3067 Steps: 23600, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 011872, Sample Num: 189952, Cur Loss: 0.00000000, Cur Avg Loss: 0.03876550, Log Avg loss: 0.00567839, Global Avg Loss: 0.08758384, Time: 0.9779 Steps: 23800, Updated lr: 0.000080 ***** Running evaluation checkpoint-23856 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-23856 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 6180.525057, Avg time per batch (s): 0.520000 {"eval_avg_loss": 0.098599, "eval_total_loss": 137.347768, "eval_acc": 0.999372, "eval_prec": 0.992509, "eval_recall": 0.981481, "eval_f1": 0.986965, "eval_roc_auc": 0.999989, "eval_pr_auc": 0.999572, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 10, "tp": 530}, "eval_mcc2": 0.986659, "eval_mcc": 0.986659, "eval_sn": 0.981481, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.11032, "test_total_loss": 153.675297, "test_acc": 0.999506, "test_prec": 1.0, "test_recall": 0.97963, "test_f1": 0.98971, "test_roc_auc": 0.999995, "test_pr_auc": 0.999791, "test_confusion_matrix": {"tn": 21744, "fp": 0, "fn": 11, "tp": 529}, "test_mcc2": 0.989512, "test_mcc": 0.989512, "test_sn": 0.97963, "test_sp": 1.0, "lr": 8.013436345314075e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.08737824896377087, "train_cur_epoch_loss": 460.22407251040215, "train_cur_epoch_avg_loss": 0.038583507085043774, "train_cur_epoch_time": 6180.525056600571, "train_cur_epoch_avg_time": 0.5181526707411612, "epoch": 2, "step": 23856} ################################################## Training, Epoch: 0003, Batch: 000144, Sample Num: 2304, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000016, Log Avg loss: 0.00000031, Global Avg Loss: 0.08685398, Time: 0.2334 Steps: 24000, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000344, Sample Num: 5504, Cur Loss: 0.00000077, Cur Avg Loss: 0.00681962, Log Avg loss: 0.01172964, Global Avg Loss: 0.08623312, Time: 0.8178 Steps: 24200, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000544, Sample Num: 8704, Cur Loss: 0.00000000, Cur Avg Loss: 0.00431578, Log Avg loss: 0.00000917, Global Avg Loss: 0.08552636, Time: 0.3196 Steps: 24400, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000744, Sample Num: 11904, Cur Loss: 0.00000000, Cur Avg Loss: 0.00319768, Log Avg loss: 0.00015644, Global Avg Loss: 0.08483230, Time: 0.2919 Steps: 24600, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000944, Sample Num: 15104, Cur Loss: 0.00000006, Cur Avg Loss: 0.04585961, Log Avg loss: 0.20456197, Global Avg Loss: 0.08579786, Time: 0.4712 Steps: 24800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001144, Sample Num: 18304, Cur Loss: 0.00000018, Cur Avg Loss: 0.04549504, Log Avg loss: 0.04377428, Global Avg Loss: 0.08546167, Time: 0.3047 Steps: 25000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001344, Sample Num: 21504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03872659, Log Avg loss: 0.00001105, Global Avg Loss: 0.08478349, Time: 0.5513 Steps: 25200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001544, Sample Num: 24704, Cur Loss: 0.00000000, Cur Avg Loss: 0.03371061, Log Avg loss: 0.00000321, Global Avg Loss: 0.08411593, Time: 0.3277 Steps: 25400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001744, Sample Num: 27904, Cur Loss: 0.00000024, Cur Avg Loss: 0.03388864, Log Avg loss: 0.03526302, Global Avg Loss: 0.08373427, Time: 0.3755 Steps: 25600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001944, Sample Num: 31104, Cur Loss: 0.00000000, Cur Avg Loss: 0.03040225, Log Avg loss: 0.00000094, Global Avg Loss: 0.08308517, Time: 0.3356 Steps: 25800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002144, Sample Num: 34304, Cur Loss: 0.00000185, Cur Avg Loss: 0.03822399, Log Avg loss: 0.11425130, Global Avg Loss: 0.08332491, Time: 0.2060 Steps: 26000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002344, Sample Num: 37504, Cur Loss: 0.00000006, Cur Avg Loss: 0.03496298, Log Avg loss: 0.00000493, Global Avg Loss: 0.08268888, Time: 0.3487 Steps: 26200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002544, Sample Num: 40704, Cur Loss: 0.00000024, Cur Avg Loss: 0.03235947, Log Avg loss: 0.00184642, Global Avg Loss: 0.08207644, Time: 0.3269 Steps: 26400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002744, Sample Num: 43904, Cur Loss: 0.00001729, Cur Avg Loss: 0.04437823, Log Avg loss: 0.19725687, Global Avg Loss: 0.08294246, Time: 0.7518 Steps: 26600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002944, Sample Num: 47104, Cur Loss: 0.00000006, Cur Avg Loss: 0.04141009, Log Avg loss: 0.00068715, Global Avg Loss: 0.08232861, Time: 0.2748 Steps: 26800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003144, Sample Num: 50304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03877604, Log Avg loss: 0.00000288, Global Avg Loss: 0.08171879, Time: 0.7141 Steps: 27000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003344, Sample Num: 53504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03656506, Log Avg loss: 0.00180835, Global Avg Loss: 0.08113122, Time: 0.5186 Steps: 27200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003544, Sample Num: 56704, Cur Loss: 0.00000030, Cur Avg Loss: 0.05177453, Log Avg loss: 0.30607688, Global Avg Loss: 0.08277315, Time: 0.5664 Steps: 27400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003744, Sample Num: 59904, Cur Loss: 0.00000012, Cur Avg Loss: 0.04913166, Log Avg loss: 0.00230004, Global Avg Loss: 0.08219002, Time: 0.3855 Steps: 27600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003944, Sample Num: 63104, Cur Loss: 0.00000018, Cur Avg Loss: 0.04664108, Log Avg loss: 0.00001752, Global Avg Loss: 0.08159885, Time: 0.3398 Steps: 27800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 004144, Sample Num: 66304, Cur Loss: 0.00000000, Cur Avg Loss: 0.04439021, Log Avg loss: 0.00000295, Global Avg Loss: 0.08101602, Time: 0.2805 Steps: 28000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 004344, Sample Num: 69504, Cur Loss: 0.00000000, Cur Avg Loss: 0.04234687, Log Avg loss: 0.00000898, Global Avg Loss: 0.08044150, Time: 0.8081 Steps: 28200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004544, Sample Num: 72704, Cur Loss: 0.00000000, Cur Avg Loss: 0.04048305, Log Avg loss: 0.00000071, Global Avg Loss: 0.07987502, Time: 0.3268 Steps: 28400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004744, Sample Num: 75904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03877636, Log Avg loss: 0.00000049, Global Avg Loss: 0.07931645, Time: 0.5512 Steps: 28600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004944, Sample Num: 79104, Cur Loss: 0.00000012, Cur Avg Loss: 0.03721070, Log Avg loss: 0.00007315, Global Avg Loss: 0.07876615, Time: 0.6603 Steps: 28800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005144, Sample Num: 82304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03576427, Log Avg loss: 0.00000857, Global Avg Loss: 0.07822300, Time: 0.1833 Steps: 29000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005344, Sample Num: 85504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03483647, Log Avg loss: 0.01097357, Global Avg Loss: 0.07776238, Time: 1.3099 Steps: 29200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005544, Sample Num: 88704, Cur Loss: 0.00000000, Cur Avg Loss: 0.03358004, Log Avg loss: 0.00000801, Global Avg Loss: 0.07723344, Time: 0.7454 Steps: 29400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 005744, Sample Num: 91904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03241082, Log Avg loss: 0.00000007, Global Avg Loss: 0.07671160, Time: 0.4652 Steps: 29600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 005944, Sample Num: 95104, Cur Loss: 0.00000602, Cur Avg Loss: 0.03132030, Log Avg loss: 0.00000059, Global Avg Loss: 0.07619676, Time: 0.2280 Steps: 29800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006144, Sample Num: 98304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03651928, Log Avg loss: 0.19103311, Global Avg Loss: 0.07696233, Time: 0.3349 Steps: 30000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006344, Sample Num: 101504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03536798, Log Avg loss: 0.00000007, Global Avg Loss: 0.07645265, Time: 0.2218 Steps: 30200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006544, Sample Num: 104704, Cur Loss: 0.00000000, Cur Avg Loss: 0.03428710, Log Avg loss: 0.00000135, Global Avg Loss: 0.07594968, Time: 0.3851 Steps: 30400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006744, Sample Num: 107904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03327028, Log Avg loss: 0.00000016, Global Avg Loss: 0.07545328, Time: 0.3337 Steps: 30600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 006944, Sample Num: 111104, Cur Loss: 0.00000000, Cur Avg Loss: 0.03231204, Log Avg loss: 0.00000002, Global Avg Loss: 0.07496332, Time: 0.3509 Steps: 30800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007144, Sample Num: 114304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03140747, Log Avg loss: 0.00000097, Global Avg Loss: 0.07447969, Time: 0.8419 Steps: 31000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007344, Sample Num: 117504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03055215, Log Avg loss: 0.00000009, Global Avg Loss: 0.07400226, Time: 0.3375 Steps: 31200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007544, Sample Num: 120704, Cur Loss: 0.00000000, Cur Avg Loss: 0.02974218, Log Avg loss: 0.00000000, Global Avg Loss: 0.07353091, Time: 0.3036 Steps: 31400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007744, Sample Num: 123904, Cur Loss: 0.00000000, Cur Avg Loss: 0.02897405, Log Avg loss: 0.00000002, Global Avg Loss: 0.07306552, Time: 0.2189 Steps: 31600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007944, Sample Num: 127104, Cur Loss: 0.00000000, Cur Avg Loss: 0.03387998, Log Avg loss: 0.22383782, Global Avg Loss: 0.07401378, Time: 0.3995 Steps: 31800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008144, Sample Num: 130304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03304796, Log Avg loss: 0.00000006, Global Avg Loss: 0.07355119, Time: 0.3856 Steps: 32000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008344, Sample Num: 133504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03234726, Log Avg loss: 0.00381456, Global Avg Loss: 0.07311804, Time: 1.5633 Steps: 32200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008544, Sample Num: 136704, Cur Loss: 0.00000000, Cur Avg Loss: 0.03758943, Log Avg loss: 0.25629288, Global Avg Loss: 0.07424875, Time: 0.3713 Steps: 32400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008744, Sample Num: 139904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03673057, Log Avg loss: 0.00004007, Global Avg Loss: 0.07379348, Time: 0.5488 Steps: 32600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008944, Sample Num: 143104, Cur Loss: 0.00000000, Cur Avg Loss: 0.03590941, Log Avg loss: 0.00000818, Global Avg Loss: 0.07334357, Time: 0.8539 Steps: 32800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 009144, Sample Num: 146304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03512399, Log Avg loss: 0.00000000, Global Avg Loss: 0.07289907, Time: 0.2488 Steps: 33000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009344, Sample Num: 149504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03437231, Log Avg loss: 0.00000587, Global Avg Loss: 0.07245995, Time: 0.3358 Steps: 33200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009544, Sample Num: 152704, Cur Loss: 0.00000000, Cur Avg Loss: 0.03365248, Log Avg loss: 0.00002166, Global Avg Loss: 0.07202619, Time: 0.9818 Steps: 33400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009744, Sample Num: 155904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03296176, Log Avg loss: 0.00000055, Global Avg Loss: 0.07159747, Time: 0.2066 Steps: 33600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009944, Sample Num: 159104, Cur Loss: 0.00000000, Cur Avg Loss: 0.03248325, Log Avg loss: 0.00917041, Global Avg Loss: 0.07122807, Time: 0.7555 Steps: 33800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 010144, Sample Num: 162304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03184284, Log Avg loss: 0.00000161, Global Avg Loss: 0.07080910, Time: 0.8592 Steps: 34000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 010344, Sample Num: 165504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03122956, Log Avg loss: 0.00012426, Global Avg Loss: 0.07039573, Time: 0.3627 Steps: 34200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010544, Sample Num: 168704, Cur Loss: 0.00000000, Cur Avg Loss: 0.03063879, Log Avg loss: 0.00008409, Global Avg Loss: 0.06998695, Time: 1.0615 Steps: 34400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010744, Sample Num: 171904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03006845, Log Avg loss: 0.00000011, Global Avg Loss: 0.06958240, Time: 0.4181 Steps: 34600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010944, Sample Num: 175104, Cur Loss: 0.00000000, Cur Avg Loss: 0.02951896, Log Avg loss: 0.00000004, Global Avg Loss: 0.06918250, Time: 0.4012 Steps: 34800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011144, Sample Num: 178304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02899022, Log Avg loss: 0.00005787, Global Avg Loss: 0.06878750, Time: 0.3702 Steps: 35000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011344, Sample Num: 181504, Cur Loss: 0.00000000, Cur Avg Loss: 0.02847916, Log Avg loss: 0.00000306, Global Avg Loss: 0.06839668, Time: 0.3903 Steps: 35200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011544, Sample Num: 184704, Cur Loss: 0.00000000, Cur Avg Loss: 0.02798576, Log Avg loss: 0.00000005, Global Avg Loss: 0.06801026, Time: 0.3264 Steps: 35400, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 011744, Sample Num: 187904, Cur Loss: 0.00000000, Cur Avg Loss: 0.02752957, Log Avg loss: 0.00119806, Global Avg Loss: 0.06763491, Time: 0.2631 Steps: 35600, Updated lr: 0.000070 ***** Running evaluation checkpoint-35784 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-35784 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 6187.566251, Avg time per batch (s): 0.520000 {"eval_avg_loss": 0.031275, "eval_total_loss": 43.566157, "eval_acc": 0.999551, "eval_prec": 0.987132, "eval_recall": 0.994444, "eval_f1": 0.990775, "eval_roc_auc": 0.999947, "eval_pr_auc": 0.995978, "eval_confusion_matrix": {"tn": 21737, "fp": 7, "fn": 3, "tp": 537}, "eval_mcc2": 0.990552, "eval_mcc": 0.990552, "eval_sn": 0.994444, "eval_sp": 0.999678, "update_flag": true, "test_avg_loss": 0.032117, "test_total_loss": 44.738852, "test_acc": 0.999686, "test_prec": 0.992606, "test_recall": 0.994444, "test_f1": 0.993525, "test_roc_auc": 0.999996, "test_pr_auc": 0.99986, "test_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 3, "tp": 537}, "test_mcc2": 0.993364, "test_mcc": 0.993364, "test_sn": 0.994444, "test_sp": 0.999816, "lr": 7.011756802149816e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.06728713359855988, "train_cur_epoch_loss": 323.30728141114355, "train_cur_epoch_avg_loss": 0.027104902868137454, "train_cur_epoch_time": 6187.566251039505, "train_cur_epoch_avg_time": 0.518742978792715, "epoch": 3, "step": 35784} ################################################## Training, Epoch: 0004, Batch: 000016, Sample Num: 256, Cur Loss: 0.00001067, Cur Avg Loss: 0.00000123, Log Avg loss: 0.00000021, Global Avg Loss: 0.06725706, Time: 0.8304 Steps: 35800, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000216, Sample Num: 3456, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000010, Log Avg loss: 0.00000001, Global Avg Loss: 0.06688341, Time: 0.1910 Steps: 36000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000416, Sample Num: 6656, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000038, Log Avg loss: 0.00000068, Global Avg Loss: 0.06651389, Time: 0.4259 Steps: 36200, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000616, Sample Num: 9856, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000026, Log Avg loss: 0.00000001, Global Avg Loss: 0.06614843, Time: 0.3839 Steps: 36400, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000816, Sample Num: 13056, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000102, Log Avg loss: 0.00000337, Global Avg Loss: 0.06578698, Time: 0.5089 Steps: 36600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001016, Sample Num: 16256, Cur Loss: 0.00000596, Cur Avg Loss: 0.02438449, Log Avg loss: 0.12386902, Global Avg Loss: 0.06610265, Time: 0.9409 Steps: 36800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001216, Sample Num: 19456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02037393, Log Avg loss: 0.00000029, Global Avg Loss: 0.06574534, Time: 0.8882 Steps: 37000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001416, Sample Num: 22656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01749626, Log Avg loss: 0.00000001, Global Avg Loss: 0.06539187, Time: 0.2675 Steps: 37200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001616, Sample Num: 25856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02485405, Log Avg loss: 0.07694725, Global Avg Loss: 0.06545366, Time: 0.3071 Steps: 37400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001816, Sample Num: 29056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02211682, Log Avg loss: 0.00000002, Global Avg Loss: 0.06510550, Time: 0.3046 Steps: 37600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002016, Sample Num: 32256, Cur Loss: 0.00000000, Cur Avg Loss: 0.02077536, Log Avg loss: 0.00859490, Global Avg Loss: 0.06480651, Time: 0.3845 Steps: 37800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002216, Sample Num: 35456, Cur Loss: 0.00000030, Cur Avg Loss: 0.03172140, Log Avg loss: 0.14205749, Global Avg Loss: 0.06521309, Time: 1.0216 Steps: 38000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002416, Sample Num: 38656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02909549, Log Avg loss: 0.00000032, Global Avg Loss: 0.06487166, Time: 0.4325 Steps: 38200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002616, Sample Num: 41856, Cur Loss: 0.00000000, Cur Avg Loss: 0.03836260, Log Avg loss: 0.15030930, Global Avg Loss: 0.06531665, Time: 0.3905 Steps: 38400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002816, Sample Num: 45056, Cur Loss: 0.00000000, Cur Avg Loss: 0.03571901, Log Avg loss: 0.00114090, Global Avg Loss: 0.06498413, Time: 0.5439 Steps: 38600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003016, Sample Num: 48256, Cur Loss: 0.00000000, Cur Avg Loss: 0.03335582, Log Avg loss: 0.00008204, Global Avg Loss: 0.06464959, Time: 0.8581 Steps: 38800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003216, Sample Num: 51456, Cur Loss: 0.00000000, Cur Avg Loss: 0.03128145, Log Avg loss: 0.00000003, Global Avg Loss: 0.06431805, Time: 0.4522 Steps: 39000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003416, Sample Num: 54656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02952896, Log Avg loss: 0.00134881, Global Avg Loss: 0.06399678, Time: 0.4119 Steps: 39200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003616, Sample Num: 57856, Cur Loss: 0.00000000, Cur Avg Loss: 0.04328323, Log Avg loss: 0.27820631, Global Avg Loss: 0.06508414, Time: 0.3605 Steps: 39400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003816, Sample Num: 61056, Cur Loss: 0.00033140, Cur Avg Loss: 0.04101489, Log Avg loss: 0.00000330, Global Avg Loss: 0.06475544, Time: 0.4689 Steps: 39600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004016, Sample Num: 64256, Cur Loss: 0.00000012, Cur Avg Loss: 0.03897235, Log Avg loss: 0.00000056, Global Avg Loss: 0.06443004, Time: 0.1375 Steps: 39800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004216, Sample Num: 67456, Cur Loss: 0.00000000, Cur Avg Loss: 0.03712357, Log Avg loss: 0.00000009, Global Avg Loss: 0.06410789, Time: 0.3013 Steps: 40000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004416, Sample Num: 70656, Cur Loss: 0.00000000, Cur Avg Loss: 0.03544225, Log Avg loss: 0.00000002, Global Avg Loss: 0.06378895, Time: 1.4706 Steps: 40200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 004616, Sample Num: 73856, Cur Loss: 0.00000000, Cur Avg Loss: 0.03390662, Log Avg loss: 0.00000000, Global Avg Loss: 0.06347316, Time: 0.7516 Steps: 40400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 004816, Sample Num: 77056, Cur Loss: 0.00000000, Cur Avg Loss: 0.03249854, Log Avg loss: 0.00000009, Global Avg Loss: 0.06316049, Time: 0.5970 Steps: 40600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005016, Sample Num: 80256, Cur Loss: 0.00000226, Cur Avg Loss: 0.03120277, Log Avg loss: 0.00000054, Global Avg Loss: 0.06285088, Time: 0.6933 Steps: 40800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005216, Sample Num: 83456, Cur Loss: 0.00000000, Cur Avg Loss: 0.03000638, Log Avg loss: 0.00000090, Global Avg Loss: 0.06254429, Time: 0.2277 Steps: 41000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005416, Sample Num: 86656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02889832, Log Avg loss: 0.00000001, Global Avg Loss: 0.06224068, Time: 0.3214 Steps: 41200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005616, Sample Num: 89856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02786917, Log Avg loss: 0.00000002, Global Avg Loss: 0.06194000, Time: 0.5737 Steps: 41400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 005816, Sample Num: 93056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02691081, Log Avg loss: 0.00000000, Global Avg Loss: 0.06164221, Time: 1.2458 Steps: 41600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006016, Sample Num: 96256, Cur Loss: 0.00000000, Cur Avg Loss: 0.02601617, Log Avg loss: 0.00000003, Global Avg Loss: 0.06134727, Time: 0.2626 Steps: 41800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006216, Sample Num: 99456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02825270, Log Avg loss: 0.09552747, Global Avg Loss: 0.06151004, Time: 0.2681 Steps: 42000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006416, Sample Num: 102656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02737200, Log Avg loss: 0.00000001, Global Avg Loss: 0.06121852, Time: 0.2293 Steps: 42200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006616, Sample Num: 105856, Cur Loss: 0.00000036, Cur Avg Loss: 0.02654456, Log Avg loss: 0.00000000, Global Avg Loss: 0.06092975, Time: 1.5645 Steps: 42400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006816, Sample Num: 109056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02576567, Log Avg loss: 0.00000000, Global Avg Loss: 0.06064370, Time: 0.2102 Steps: 42600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007016, Sample Num: 112256, Cur Loss: 0.00000000, Cur Avg Loss: 0.02503118, Log Avg loss: 0.00000000, Global Avg Loss: 0.06036032, Time: 0.1858 Steps: 42800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007216, Sample Num: 115456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02433743, Log Avg loss: 0.00000050, Global Avg Loss: 0.06007957, Time: 0.2898 Steps: 43000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007416, Sample Num: 118656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02368108, Log Avg loss: 0.00000013, Global Avg Loss: 0.05980143, Time: 0.6582 Steps: 43200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007616, Sample Num: 121856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02305921, Log Avg loss: 0.00000000, Global Avg Loss: 0.05952585, Time: 0.6090 Steps: 43400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007816, Sample Num: 125056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02246974, Log Avg loss: 0.00002299, Global Avg Loss: 0.05925290, Time: 0.3783 Steps: 43600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 008016, Sample Num: 128256, Cur Loss: 0.00000000, Cur Avg Loss: 0.02190912, Log Avg loss: 0.00000001, Global Avg Loss: 0.05898234, Time: 0.4797 Steps: 43800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008216, Sample Num: 131456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02137580, Log Avg loss: 0.00000026, Global Avg Loss: 0.05871424, Time: 0.3786 Steps: 44000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008416, Sample Num: 134656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02491121, Log Avg loss: 0.17014596, Global Avg Loss: 0.05921845, Time: 0.4801 Steps: 44200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008616, Sample Num: 137856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02433296, Log Avg loss: 0.00000002, Global Avg Loss: 0.05895170, Time: 0.7322 Steps: 44400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008816, Sample Num: 141056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02378094, Log Avg loss: 0.00000001, Global Avg Loss: 0.05868734, Time: 0.3629 Steps: 44600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 009016, Sample Num: 144256, Cur Loss: 0.00000000, Cur Avg Loss: 0.02325341, Log Avg loss: 0.00000002, Global Avg Loss: 0.05842535, Time: 0.2635 Steps: 44800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 009216, Sample Num: 147456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02274878, Log Avg loss: 0.00000000, Global Avg Loss: 0.05816568, Time: 0.3193 Steps: 45000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009416, Sample Num: 150656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02226559, Log Avg loss: 0.00000016, Global Avg Loss: 0.05790831, Time: 0.2034 Steps: 45200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009616, Sample Num: 153856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02180250, Log Avg loss: 0.00000003, Global Avg Loss: 0.05765321, Time: 0.4120 Steps: 45400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009816, Sample Num: 157056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02139171, Log Avg loss: 0.00164115, Global Avg Loss: 0.05740754, Time: 0.4166 Steps: 45600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010016, Sample Num: 160256, Cur Loss: 0.00000018, Cur Avg Loss: 0.02096458, Log Avg loss: 0.00000104, Global Avg Loss: 0.05715686, Time: 0.5854 Steps: 45800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010216, Sample Num: 163456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02055415, Log Avg loss: 0.00000003, Global Avg Loss: 0.05690835, Time: 0.2164 Steps: 46000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010416, Sample Num: 166656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02015949, Log Avg loss: 0.00000003, Global Avg Loss: 0.05666199, Time: 0.3608 Steps: 46200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 010616, Sample Num: 169856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01977977, Log Avg loss: 0.00000373, Global Avg Loss: 0.05641778, Time: 0.2228 Steps: 46400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 010816, Sample Num: 173056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01941402, Log Avg loss: 0.00000000, Global Avg Loss: 0.05617564, Time: 0.1730 Steps: 46600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011016, Sample Num: 176256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01906155, Log Avg loss: 0.00000002, Global Avg Loss: 0.05593557, Time: 0.5597 Steps: 46800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011216, Sample Num: 179456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01872165, Log Avg loss: 0.00000020, Global Avg Loss: 0.05569755, Time: 0.5038 Steps: 47000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011416, Sample Num: 182656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01839366, Log Avg loss: 0.00000000, Global Avg Loss: 0.05546154, Time: 0.2071 Steps: 47200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011616, Sample Num: 185856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01807697, Log Avg loss: 0.00000001, Global Avg Loss: 0.05522753, Time: 0.6438 Steps: 47400, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 011816, Sample Num: 189056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01777099, Log Avg loss: 0.00000002, Global Avg Loss: 0.05499548, Time: 0.3624 Steps: 47600, Updated lr: 0.000060 ***** Running evaluation checkpoint-47712 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-47712 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 6194.885201, Avg time per batch (s): 0.520000 {"eval_avg_loss": 0.023424, "eval_total_loss": 32.6299, "eval_acc": 0.999641, "eval_prec": 0.988971, "eval_recall": 0.996296, "eval_f1": 0.99262, "eval_roc_auc": 0.999926, "eval_pr_auc": 0.994185, "eval_confusion_matrix": {"tn": 21738, "fp": 6, "fn": 2, "tp": 538}, "eval_mcc2": 0.992443, "eval_mcc": 0.992443, "eval_sn": 0.996296, "eval_sp": 0.999724, "update_flag": true, "test_avg_loss": 0.033651, "test_total_loss": 46.875549, "test_acc": 0.999641, "test_prec": 0.990775, "test_recall": 0.994444, "test_f1": 0.992606, "test_roc_auc": 0.999996, "test_pr_auc": 0.999858, "test_confusion_matrix": {"tn": 21739, "fp": 5, "fn": 3, "tp": 537}, "test_mcc2": 0.992424, "test_mcc": 0.992424, "test_sn": 0.994444, "test_sp": 0.99977, "lr": 6.010077258985556e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.05486638310508151, "train_cur_epoch_loss": 209.98208201878063, "train_cur_epoch_avg_loss": 0.017604131624646264, "train_cur_epoch_time": 6194.885201215744, "train_cur_epoch_avg_time": 0.5193565728718766, "epoch": 4, "step": 47712} ################################################## Training, Epoch: 0005, Batch: 000088, Sample Num: 1408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000018, Global Avg Loss: 0.05476537, Time: 0.4403 Steps: 47800, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000288, Sample Num: 4608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000001, Global Avg Loss: 0.05453718, Time: 0.3038 Steps: 48000, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000488, Sample Num: 7808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00000000, Global Avg Loss: 0.05431089, Time: 0.2901 Steps: 48200, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000688, Sample Num: 11008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000000, Log Avg loss: 0.00000000, Global Avg Loss: 0.05408646, Time: 0.2875 Steps: 48400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000888, Sample Num: 14208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000000, Log Avg loss: 0.00000000, Global Avg Loss: 0.05386389, Time: 0.2465 Steps: 48600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001088, Sample Num: 17408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00119015, Log Avg loss: 0.00647441, Global Avg Loss: 0.05366967, Time: 0.1688 Steps: 48800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001288, Sample Num: 20608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00100535, Log Avg loss: 0.00000000, Global Avg Loss: 0.05345061, Time: 0.3157 Steps: 49000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001488, Sample Num: 23808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00087024, Log Avg loss: 0.00000014, Global Avg Loss: 0.05323333, Time: 0.8376 Steps: 49200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001688, Sample Num: 27008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00076713, Log Avg loss: 0.00000001, Global Avg Loss: 0.05301781, Time: 0.3037 Steps: 49400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001888, Sample Num: 30208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00068587, Log Avg loss: 0.00000000, Global Avg Loss: 0.05280403, Time: 1.0625 Steps: 49600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002088, Sample Num: 33408, Cur Loss: 0.00000024, Cur Avg Loss: 0.01169170, Log Avg loss: 0.11558677, Global Avg Loss: 0.05305617, Time: 0.6962 Steps: 49800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002288, Sample Num: 36608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01066972, Log Avg loss: 0.00000024, Global Avg Loss: 0.05284394, Time: 0.7107 Steps: 50000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002488, Sample Num: 39808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00981204, Log Avg loss: 0.00000022, Global Avg Loss: 0.05263341, Time: 1.5624 Steps: 50200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002688, Sample Num: 43008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00937332, Log Avg loss: 0.00391558, Global Avg Loss: 0.05244009, Time: 0.2911 Steps: 50400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002888, Sample Num: 46208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00872420, Log Avg loss: 0.00000001, Global Avg Loss: 0.05223281, Time: 0.3704 Steps: 50600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003088, Sample Num: 49408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00815918, Log Avg loss: 0.00000040, Global Avg Loss: 0.05202717, Time: 0.3252 Steps: 50800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003288, Sample Num: 52608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00921276, Log Avg loss: 0.02548005, Global Avg Loss: 0.05192307, Time: 0.4677 Steps: 51000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003488, Sample Num: 55808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00868573, Log Avg loss: 0.00002121, Global Avg Loss: 0.05172033, Time: 0.6871 Steps: 51200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003688, Sample Num: 59008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00939323, Log Avg loss: 0.02173216, Global Avg Loss: 0.05160364, Time: 0.7393 Steps: 51400, Updated lr: 0.000057