{"adam_epsilon": 1e-08, "alphabet": "gene_prot", "append_eos": true, "asl_gamma_neg": 4.0, "asl_gamma_pos": 1.0, "best_metric_type": "f1", "beta1": 0.9, "beta2": 0.98, "buffer_size": 1024, "cache_dir": null, "classifier_activate_func": "gelu", "classifier_size": 128, "codes_file": null, "config_path": "../config/luca_base/luca_base_config.json", "cross_atten": false, "dataset_name": "RdRP", "dataset_type": "protein", "delete_old": false, "dev_data_dir": "../dataset/RdRP/protein/binary_class/dev/", "device": "cuda", "do_eval": true, "do_lower_case": false, "do_metrics": true, "do_predict": true, "do_train": true, "dropout_prob": 0.1, "early_stop_epoch": -1, "emb_activate_func": "gelu", "embedding_complete": true, "embedding_complete_seg_overlap": true, "embedding_fixed_len_a_time": 3072, "embedding_input_size": 2560, "embedding_input_size_a": null, "embedding_input_size_b": null, "eval_all_checkpoints": false, "evaluate_during_training": true, "evaluate_steps": -1, "evaluate_strategy": "epoch", "fc_activate_func": "gelu", "focal_loss_alpha": 0.7, "focal_loss_gamma": 2.0, "focal_loss_reduce": false, "fp16": false, "fp16_embedding": false, "fp16_opt_level": "O1", "fusion_type": "concat", "gradient_accumulation_steps": 1, "hidden_size": 1024, "ignore_index": -100, "input_mode": "single", "input_type": "matrix", "intermediate_size": 4096, "label_filepath": "../dataset/RdRP/protein/binary_class/label.txt", "label_size": 2, "label_type": "RdRP", "learning_rate": 0.0001, "llm_dir": "..", "llm_dirpath": null, "llm_step": "3B", "llm_task_level": "token_level,span_level,seq_level,structure_level", "llm_time_str": null, "llm_type": "esm", "llm_version": "esm2", "lmdb_path": null, "local_rank": -1, "log_dir": "../logs/RdRP/protein/binary_class/luca_base/matrix/20250402062649", "logging_steps": 200, "loss_reduction": "mean", "loss_type": "bce", "lr_decay_rate": 0.9, "lr_update_strategy": "step", "matrix_add_special_token": true, "matrix_dirpath": "../matrices/RdRP/protein/binary_class/luca_base/esm2/esm//3B", "matrix_embedding_exists": false, "matrix_encoder": false, "matrix_encoder_act": false, "matrix_fc_size": "128", "matrix_max_length": 4096, "matrix_max_length_a": null, "matrix_max_length_b": null, "matrix_pooling_type": "value_attention", "max_grad_norm": 1.0, "max_sentence_length": null, "max_sentences": null, "max_steps": -1, "model_dirpath": null, "model_type": "luca_base", "n_gpu": 1, "no_cuda": false, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "non_ignore": false, "not_append_eos": false, "not_matrix_encoder_shared": false, "not_prepend_bos": false, "not_save_emb_to_disk": false, "not_seq_encoder_shared": false, "num_attention_heads": 0, "num_hidden_layers": 0, "num_train_epochs": 10, "output_dir": "../models/RdRP/protein/binary_class/luca_base/matrix/20250402062649", "output_mode": "binary_class", "overwrite_cache": false, "overwrite_output_dir": true, "per_gpu_eval_batch_size": 16, "per_gpu_train_batch_size": 16, "pos_weight": 40.0, "position_embedding_type": "absolute", "prepend_bos": true, "save_all": true, "save_steps": -1, "seed": 1221, "self_atten": false, "seq_fc_size": "null", "seq_max_length": 4096, "seq_max_length_a": null, "seq_max_length_b": null, "seq_pooling_type": "value_attention", "seq_subword": false, "seq_vocab_path": "gene_prot", "sigmoid": true, "task_level_type": "seq_level", "task_type": "binary_class", "tb_log_dir": "../tb-logs/RdRP/protein/binary_class/luca_base/matrix/20250402062649", "test_data_dir": "../dataset/RdRP/protein/binary_class/test/", "time_str": "20250402062656", "train_data_dir": "../dataset/RdRP/protein/binary_class/train/", "trunc_type": "right", "vector_dirpath": "../vectors/RdRP/protein/binary_class/luca_base/esm2/esm//3B", "vector_fc_size": "null", "vocab_size": 39, "warmup_steps": 200, "weight": null, "weight_decay": 0.01, "worker_num": 0} ################################################## n_gpu: 1 ################################################## Inputs: Input Name List: protein,embedding_matrix ################################################## Encoder Config: {'llm_type': 'esm', 'llm_version': 'esm2', 'llm_step': '3B', 'llm_dirpath': None, 'input_type': 'matrix', 'trunc_type': 'right', 'seq_max_length': 4096, 'atom_seq_max_length': None, 'vector_dirpath': '../vectors/RdRP/protein/binary_class/luca_base/esm2/esm//3B', 'matrix_dirpath': '../matrices/RdRP/protein/binary_class/luca_base/esm2/esm//3B', 'local_rank': -1, 'max_sentence_length': None, 'max_sentences': None, 'matrix_add_special_token': True, 'embedding_complete': True, 'embedding_complete_seg_overlap': True, 'embedding_fixed_len_a_time': 3072, 'matrix_embedding_exists': False, 'save_emb_to_disk': True, 'fp16_embedding': False} ################################################## Model Config: LucaConfig { "alphabet": "gene_prot", "attention_probs_dropout_prob": 0.1, "classifier_activate_func": "gelu", "classifier_dropout_prob": 0.1, "classifier_size": 128, "cls_token_id": 2, "cross_atten": false, "directionality": "bidi", "emb_activate_func": "gelu", "embedding_input_size": 2560, "fc_activate_func": "gelu", "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 1024, "ignore_index": -100, "initializer_range": 0.02, "intermediate_size": 4096, "kernel_size": 7, "layer_norm_eps": 1e-12, "loss_reduction": "mean", "matrix_fc_size": [ 128 ], "matrix_max_length": 4096, "matrix_pooling_type": "value_attention", "max_position_embeddings": 4098, "no_position_embeddings": true, "no_token_embeddings": true, "no_token_type_embeddings": true, "num_attention_heads": 8, "num_hidden_layers": 4, "pad_token_id": 0, "pos_weight": 40.0, "position_embedding_type": "absolute", "self_atten": false, "sep_token_id": 3, "seq_fc_size": null, "seq_max_length": 4096, "seq_pooling_type": "value_attention", "token_dropout": null, "transformers_version": "4.29.0", "type_vocab_size": 2, "use_luca_layer_norm_v2": true, "vector_fc_size": null, "vocab_size": 39 } ################################################## Mode Architecture: LucaBase( (matrix_pooler): GlobalMaskValueAttentionPooling1D (2560 -> 2560) (linear): ModuleList( (0): ModuleList( (0): Linear(in_features=2560, out_features=128, bias=True) (1): GELU(approximate='none') ) ) (dropout): Dropout(p=0.1, inplace=False) (hidden_layer): Linear(in_features=128, out_features=128, bias=True) (hidden_act): GELU(approximate='none') (classifier): Linear(in_features=128, out_features=1, bias=True) (output): Sigmoid() (loss_fct): MaskedBCEWithLogitsLoss( (criterion): BCEWithLogitsLoss() ) ) ################################################## Model parameters: 20005249 ################################################## {"total_num": "19.080000M", "total_size": "76.310000MB", "param_sum": "19.080000M", "param_size": "76.310000MB", "buffer_sum": "0.000000M", "buffer_size": "0.000000MB", "trainable_num": "19.078492M", "trainable_size": "76.313969MB"} ################################################## Train dataset len: 190846, batch size: 16, batch num: 11928 Train dataset t_total: 119280, max_steps: -1 ***** Running training ***** Train Dataset Num examples = 190846 Train Dataset Num Epochs = 10 Logging Steps = 200 Saving Steps = -1 Evaluating Strategy = epoch Train Dataset Instantaneous batch size per GPU = 16 Train Dataset Total train batch size (w. parallel, distributed & accumulation) = 16 Train Dataset Gradient Accumulation steps = 1 Train Dataset Total optimization steps = 119280 ################################################## Training, Epoch: 0001, Batch: 000200, Sample Num: 3200, Cur Loss: 0.16877076, Cur Avg Loss: 1.31501726, Log Avg loss: 1.31501726, Global Avg Loss: 1.31501726, Time: 0.0818 Steps: 200, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000400, Sample Num: 6400, Cur Loss: 0.05184180, Cur Avg Loss: 0.75546887, Log Avg loss: 0.19592049, Global Avg Loss: 0.75546887, Time: 0.1054 Steps: 400, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000600, Sample Num: 9600, Cur Loss: 0.00198776, Cur Avg Loss: 0.54120931, Log Avg loss: 0.11269019, Global Avg Loss: 0.54120931, Time: 0.2283 Steps: 600, Updated lr: 0.000100 Training, Epoch: 0001, Batch: 000800, Sample Num: 12800, Cur Loss: 0.00253350, Cur Avg Loss: 0.42923778, Log Avg loss: 0.09332320, Global Avg Loss: 0.42923778, Time: 0.2279 Steps: 800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001000, Sample Num: 16000, Cur Loss: 0.00001067, Cur Avg Loss: 0.35181993, Log Avg loss: 0.04214852, Global Avg Loss: 0.35181993, Time: 0.1532 Steps: 1000, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001200, Sample Num: 19200, Cur Loss: 0.00016066, Cur Avg Loss: 0.29323801, Log Avg loss: 0.00032838, Global Avg Loss: 0.29323801, Time: 0.1048 Steps: 1200, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001400, Sample Num: 22400, Cur Loss: 0.00031409, Cur Avg Loss: 0.25608983, Log Avg loss: 0.03320074, Global Avg Loss: 0.25608983, Time: 0.0710 Steps: 1400, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001600, Sample Num: 25600, Cur Loss: 0.00000113, Cur Avg Loss: 0.22889976, Log Avg loss: 0.03856930, Global Avg Loss: 0.22889976, Time: 0.1285 Steps: 1600, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 001800, Sample Num: 28800, Cur Loss: 0.00000000, Cur Avg Loss: 0.20350036, Log Avg loss: 0.00030519, Global Avg Loss: 0.20350036, Time: 0.0816 Steps: 1800, Updated lr: 0.000099 Training, Epoch: 0001, Batch: 002000, Sample Num: 32000, Cur Loss: 0.00005066, Cur Avg Loss: 0.18356364, Log Avg loss: 0.00413312, Global Avg Loss: 0.18356364, Time: 0.1128 Steps: 2000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002200, Sample Num: 35200, Cur Loss: 0.00002390, Cur Avg Loss: 0.16967643, Log Avg loss: 0.03080429, Global Avg Loss: 0.16967643, Time: 0.1318 Steps: 2200, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002400, Sample Num: 38400, Cur Loss: 0.00005800, Cur Avg Loss: 0.15556828, Log Avg loss: 0.00037864, Global Avg Loss: 0.15556828, Time: 0.0540 Steps: 2400, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002600, Sample Num: 41600, Cur Loss: 0.00000012, Cur Avg Loss: 0.14381229, Log Avg loss: 0.00274048, Global Avg Loss: 0.14381229, Time: 0.1263 Steps: 2600, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 002800, Sample Num: 44800, Cur Loss: 0.00000006, Cur Avg Loss: 0.13804151, Log Avg loss: 0.06302139, Global Avg Loss: 0.13804151, Time: 0.1252 Steps: 2800, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003000, Sample Num: 48000, Cur Loss: 0.00000000, Cur Avg Loss: 0.12930340, Log Avg loss: 0.00696983, Global Avg Loss: 0.12930340, Time: 0.1239 Steps: 3000, Updated lr: 0.000098 Training, Epoch: 0001, Batch: 003200, Sample Num: 51200, Cur Loss: 0.00000000, Cur Avg Loss: 0.12122870, Log Avg loss: 0.00010822, Global Avg Loss: 0.12122870, Time: 0.3857 Steps: 3200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003400, Sample Num: 54400, Cur Loss: 0.00004628, Cur Avg Loss: 0.11421622, Log Avg loss: 0.00201647, Global Avg Loss: 0.11421622, Time: 0.1726 Steps: 3400, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003600, Sample Num: 57600, Cur Loss: 0.00000000, Cur Avg Loss: 0.10817390, Log Avg loss: 0.00545440, Global Avg Loss: 0.10817390, Time: 0.1233 Steps: 3600, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 003800, Sample Num: 60800, Cur Loss: 0.00004837, Cur Avg Loss: 0.10257394, Log Avg loss: 0.00177483, Global Avg Loss: 0.10257394, Time: 0.0829 Steps: 3800, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004000, Sample Num: 64000, Cur Loss: 0.00000030, Cur Avg Loss: 0.09753457, Log Avg loss: 0.00178639, Global Avg Loss: 0.09753457, Time: 0.0796 Steps: 4000, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004200, Sample Num: 67200, Cur Loss: 0.00000000, Cur Avg Loss: 0.09300409, Log Avg loss: 0.00239461, Global Avg Loss: 0.09300409, Time: 0.1115 Steps: 4200, Updated lr: 0.000097 Training, Epoch: 0001, Batch: 004400, Sample Num: 70400, Cur Loss: 0.00001109, Cur Avg Loss: 0.08884764, Log Avg loss: 0.00156224, Global Avg Loss: 0.08884764, Time: 0.2225 Steps: 4400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 004600, Sample Num: 73600, Cur Loss: 0.00000006, Cur Avg Loss: 0.08498481, Log Avg loss: 0.00000256, Global Avg Loss: 0.08498481, Time: 0.1284 Steps: 4600, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 004800, Sample Num: 76800, Cur Loss: 0.00000000, Cur Avg Loss: 0.08159716, Log Avg loss: 0.00368105, Global Avg Loss: 0.08159716, Time: 0.0875 Steps: 4800, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005000, Sample Num: 80000, Cur Loss: 0.00000000, Cur Avg Loss: 0.07833616, Log Avg loss: 0.00007230, Global Avg Loss: 0.07833616, Time: 0.2121 Steps: 5000, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005200, Sample Num: 83200, Cur Loss: 0.00000000, Cur Avg Loss: 0.07532342, Log Avg loss: 0.00000485, Global Avg Loss: 0.07532342, Time: 0.1392 Steps: 5200, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005400, Sample Num: 86400, Cur Loss: 0.00000000, Cur Avg Loss: 0.07879045, Log Avg loss: 0.16893311, Global Avg Loss: 0.07879045, Time: 0.0988 Steps: 5400, Updated lr: 0.000096 Training, Epoch: 0001, Batch: 005600, Sample Num: 89600, Cur Loss: 0.00000000, Cur Avg Loss: 0.07597651, Log Avg loss: 0.00000032, Global Avg Loss: 0.07597651, Time: 0.1231 Steps: 5600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 005800, Sample Num: 92800, Cur Loss: 0.00000000, Cur Avg Loss: 0.07335863, Log Avg loss: 0.00005784, Global Avg Loss: 0.07335863, Time: 0.1899 Steps: 5800, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006000, Sample Num: 96000, Cur Loss: 0.00000209, Cur Avg Loss: 0.07349805, Log Avg loss: 0.07754126, Global Avg Loss: 0.07349805, Time: 0.1948 Steps: 6000, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006200, Sample Num: 99200, Cur Loss: 0.00000000, Cur Avg Loss: 0.07516154, Log Avg loss: 0.12506639, Global Avg Loss: 0.07516154, Time: 0.0904 Steps: 6200, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006400, Sample Num: 102400, Cur Loss: 0.00000000, Cur Avg Loss: 0.07290256, Log Avg loss: 0.00287401, Global Avg Loss: 0.07290256, Time: 0.1006 Steps: 6400, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006600, Sample Num: 105600, Cur Loss: 0.00000000, Cur Avg Loss: 0.07295197, Log Avg loss: 0.07453324, Global Avg Loss: 0.07295197, Time: 0.3704 Steps: 6600, Updated lr: 0.000095 Training, Epoch: 0001, Batch: 006800, Sample Num: 108800, Cur Loss: 0.00000000, Cur Avg Loss: 0.07084681, Log Avg loss: 0.00137627, Global Avg Loss: 0.07084681, Time: 0.2068 Steps: 6800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007000, Sample Num: 112000, Cur Loss: 0.00000000, Cur Avg Loss: 0.06941771, Log Avg loss: 0.02082842, Global Avg Loss: 0.06941771, Time: 0.0990 Steps: 7000, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007200, Sample Num: 115200, Cur Loss: 0.00000006, Cur Avg Loss: 0.06767643, Log Avg loss: 0.00673160, Global Avg Loss: 0.06767643, Time: 0.1091 Steps: 7200, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007400, Sample Num: 118400, Cur Loss: 0.00000000, Cur Avg Loss: 0.06584772, Log Avg loss: 0.00001415, Global Avg Loss: 0.06584772, Time: 0.1066 Steps: 7400, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007600, Sample Num: 121600, Cur Loss: 0.00000000, Cur Avg Loss: 0.06411495, Log Avg loss: 0.00000267, Global Avg Loss: 0.06411495, Time: 0.2001 Steps: 7600, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 007800, Sample Num: 124800, Cur Loss: 0.00000000, Cur Avg Loss: 0.06247130, Log Avg loss: 0.00001260, Global Avg Loss: 0.06247130, Time: 0.0832 Steps: 7800, Updated lr: 0.000094 Training, Epoch: 0001, Batch: 008000, Sample Num: 128000, Cur Loss: 0.00000030, Cur Avg Loss: 0.06098781, Log Avg loss: 0.00313152, Global Avg Loss: 0.06098781, Time: 0.2321 Steps: 8000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008200, Sample Num: 131200, Cur Loss: 0.00000000, Cur Avg Loss: 0.05957317, Log Avg loss: 0.00298762, Global Avg Loss: 0.05957317, Time: 0.2213 Steps: 8200, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008400, Sample Num: 134400, Cur Loss: 0.00000000, Cur Avg Loss: 0.06145696, Log Avg loss: 0.13869254, Global Avg Loss: 0.06145696, Time: 0.2225 Steps: 8400, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008600, Sample Num: 137600, Cur Loss: 0.00000268, Cur Avg Loss: 0.06493005, Log Avg loss: 0.21079954, Global Avg Loss: 0.06493005, Time: 0.3097 Steps: 8600, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 008800, Sample Num: 140800, Cur Loss: 0.00000000, Cur Avg Loss: 0.06348785, Log Avg loss: 0.00147342, Global Avg Loss: 0.06348785, Time: 0.0829 Steps: 8800, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 009000, Sample Num: 144000, Cur Loss: 0.00000000, Cur Avg Loss: 0.06207764, Log Avg loss: 0.00002846, Global Avg Loss: 0.06207764, Time: 0.0955 Steps: 9000, Updated lr: 0.000093 Training, Epoch: 0001, Batch: 009200, Sample Num: 147200, Cur Loss: 0.00000000, Cur Avg Loss: 0.06350005, Log Avg loss: 0.12750824, Global Avg Loss: 0.06350005, Time: 0.1343 Steps: 9200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009400, Sample Num: 150400, Cur Loss: 0.00000000, Cur Avg Loss: 0.06261735, Log Avg loss: 0.02201328, Global Avg Loss: 0.06261735, Time: 0.1227 Steps: 9400, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009600, Sample Num: 153600, Cur Loss: 0.00000000, Cur Avg Loss: 0.06131295, Log Avg loss: 0.00000595, Global Avg Loss: 0.06131295, Time: 0.0897 Steps: 9600, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 009800, Sample Num: 156800, Cur Loss: 0.00000000, Cur Avg Loss: 0.06138044, Log Avg loss: 0.06462034, Global Avg Loss: 0.06138044, Time: 0.2018 Steps: 9800, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010000, Sample Num: 160000, Cur Loss: 0.00000012, Cur Avg Loss: 0.06018753, Log Avg loss: 0.00173491, Global Avg Loss: 0.06018753, Time: 0.1046 Steps: 10000, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010200, Sample Num: 163200, Cur Loss: 0.00000000, Cur Avg Loss: 0.05902267, Log Avg loss: 0.00077954, Global Avg Loss: 0.05902267, Time: 0.1084 Steps: 10200, Updated lr: 0.000092 Training, Epoch: 0001, Batch: 010400, Sample Num: 166400, Cur Loss: 0.00000000, Cur Avg Loss: 0.05788795, Log Avg loss: 0.00001699, Global Avg Loss: 0.05788795, Time: 0.2220 Steps: 10400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 010600, Sample Num: 169600, Cur Loss: 0.00000000, Cur Avg Loss: 0.05685237, Log Avg loss: 0.00300239, Global Avg Loss: 0.05685237, Time: 0.0701 Steps: 10600, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 010800, Sample Num: 172800, Cur Loss: 0.00000000, Cur Avg Loss: 0.05579960, Log Avg loss: 0.00000296, Global Avg Loss: 0.05579960, Time: 0.1702 Steps: 10800, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011000, Sample Num: 176000, Cur Loss: 0.00000000, Cur Avg Loss: 0.05478508, Log Avg loss: 0.00000100, Global Avg Loss: 0.05478508, Time: 0.0947 Steps: 11000, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011200, Sample Num: 179200, Cur Loss: 0.00000000, Cur Avg Loss: 0.05386173, Log Avg loss: 0.00307735, Global Avg Loss: 0.05386173, Time: 0.0991 Steps: 11200, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011400, Sample Num: 182400, Cur Loss: 0.00000000, Cur Avg Loss: 0.05291679, Log Avg loss: 0.00000018, Global Avg Loss: 0.05291679, Time: 0.1812 Steps: 11400, Updated lr: 0.000091 Training, Epoch: 0001, Batch: 011600, Sample Num: 185600, Cur Loss: 0.00000000, Cur Avg Loss: 0.05203588, Log Avg loss: 0.00182411, Global Avg Loss: 0.05203588, Time: 0.1073 Steps: 11600, Updated lr: 0.000090 Training, Epoch: 0001, Batch: 011800, Sample Num: 188800, Cur Loss: 0.00000000, Cur Avg Loss: 0.05289309, Log Avg loss: 0.10261096, Global Avg Loss: 0.05289309, Time: 0.3088 Steps: 11800, Updated lr: 0.000090 ***** Running evaluation checkpoint-11928 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-11928 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1873.192789, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.000981, "eval_total_loss": 1.366584, "eval_acc": 0.99982, "eval_prec": 0.992647, "eval_recall": 1.0, "eval_f1": 0.99631, "eval_roc_auc": 0.999999, "eval_pr_auc": 0.999976, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 0, "tp": 540}, "eval_mcc2": 0.996225, "eval_mcc": 0.996225, "eval_sn": 1.0, "eval_sp": 0.999816, "update_flag": true, "test_avg_loss": 0.000856, "test_total_loss": 1.192489, "test_acc": 0.999865, "test_prec": 0.994475, "test_recall": 1.0, "test_f1": 0.99723, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21741, "fp": 3, "fn": 0, "tp": 540}, "test_mcc2": 0.997165, "test_mcc": 0.997165, "test_sn": 1.0, "test_sp": 0.999862, "lr": 9.015115888478334e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.052385241954588234, "train_cur_epoch_loss": 624.8511660343285, "train_cur_epoch_avg_loss": 0.052385241954588234, "train_cur_epoch_time": 1873.1927886009216, "train_cur_epoch_avg_time": 0.15704164894373923, "epoch": 1, "step": 11928} ################################################## Training, Epoch: 0002, Batch: 000072, Sample Num: 1152, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000149, Log Avg loss: 0.00356422, Global Avg Loss: 0.05207094, Time: 0.0878 Steps: 12000, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000272, Sample Num: 4352, Cur Loss: 0.00000000, Cur Avg Loss: 0.09915137, Log Avg loss: 0.13484533, Global Avg Loss: 0.05342790, Time: 0.2131 Steps: 12200, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000472, Sample Num: 7552, Cur Loss: 0.00000000, Cur Avg Loss: 0.05889543, Log Avg loss: 0.00414735, Global Avg Loss: 0.05263305, Time: 0.1776 Steps: 12400, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000672, Sample Num: 10752, Cur Loss: 0.00000000, Cur Avg Loss: 0.04136946, Log Avg loss: 0.00000818, Global Avg Loss: 0.05179773, Time: 0.2146 Steps: 12600, Updated lr: 0.000090 Training, Epoch: 0002, Batch: 000872, Sample Num: 13952, Cur Loss: 0.00000000, Cur Avg Loss: 0.03189939, Log Avg loss: 0.00007995, Global Avg Loss: 0.05098964, Time: 0.1278 Steps: 12800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001072, Sample Num: 17152, Cur Loss: 0.00000000, Cur Avg Loss: 0.03918069, Log Avg loss: 0.07092718, Global Avg Loss: 0.05129637, Time: 0.1101 Steps: 13000, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001272, Sample Num: 20352, Cur Loss: 0.00000000, Cur Avg Loss: 0.03302282, Log Avg loss: 0.00001662, Global Avg Loss: 0.05051941, Time: 0.1841 Steps: 13200, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001472, Sample Num: 23552, Cur Loss: 0.00000000, Cur Avg Loss: 0.02872150, Log Avg loss: 0.00136508, Global Avg Loss: 0.04978576, Time: 0.1192 Steps: 13400, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001672, Sample Num: 26752, Cur Loss: 0.00000000, Cur Avg Loss: 0.03079293, Log Avg loss: 0.04603867, Global Avg Loss: 0.04973066, Time: 0.1260 Steps: 13600, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 001872, Sample Num: 29952, Cur Loss: 0.00000000, Cur Avg Loss: 0.02750316, Log Avg loss: 0.00000067, Global Avg Loss: 0.04900993, Time: 0.1516 Steps: 13800, Updated lr: 0.000089 Training, Epoch: 0002, Batch: 002072, Sample Num: 33152, Cur Loss: 0.00000060, Cur Avg Loss: 0.02532422, Log Avg loss: 0.00492941, Global Avg Loss: 0.04838021, Time: 0.0846 Steps: 14000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002272, Sample Num: 36352, Cur Loss: 0.00000000, Cur Avg Loss: 0.02493150, Log Avg loss: 0.02086283, Global Avg Loss: 0.04799264, Time: 0.2089 Steps: 14200, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002472, Sample Num: 39552, Cur Loss: 0.00000155, Cur Avg Loss: 0.02291495, Log Avg loss: 0.00000706, Global Avg Loss: 0.04732618, Time: 0.1165 Steps: 14400, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002672, Sample Num: 42752, Cur Loss: 0.00000000, Cur Avg Loss: 0.02479650, Log Avg loss: 0.04805240, Global Avg Loss: 0.04733612, Time: 0.0873 Steps: 14600, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 002872, Sample Num: 45952, Cur Loss: 0.00000000, Cur Avg Loss: 0.02307674, Log Avg loss: 0.00010079, Global Avg Loss: 0.04669781, Time: 0.1047 Steps: 14800, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003072, Sample Num: 49152, Cur Loss: 0.00000000, Cur Avg Loss: 0.02170687, Log Avg loss: 0.00203547, Global Avg Loss: 0.04610231, Time: 0.0945 Steps: 15000, Updated lr: 0.000088 Training, Epoch: 0002, Batch: 003272, Sample Num: 52352, Cur Loss: 0.00000000, Cur Avg Loss: 0.02038058, Log Avg loss: 0.00000874, Global Avg Loss: 0.04549582, Time: 0.1667 Steps: 15200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003472, Sample Num: 55552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01928182, Log Avg loss: 0.00130607, Global Avg Loss: 0.04492192, Time: 0.0814 Steps: 15400, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003672, Sample Num: 58752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01952043, Log Avg loss: 0.02366278, Global Avg Loss: 0.04464937, Time: 0.1289 Steps: 15600, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 003872, Sample Num: 61952, Cur Loss: 0.00000000, Cur Avg Loss: 0.02045118, Log Avg loss: 0.03753978, Global Avg Loss: 0.04455938, Time: 0.2321 Steps: 15800, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004072, Sample Num: 65152, Cur Loss: 0.00000000, Cur Avg Loss: 0.01944690, Log Avg loss: 0.00000394, Global Avg Loss: 0.04400243, Time: 0.1040 Steps: 16000, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004272, Sample Num: 68352, Cur Loss: 0.00000000, Cur Avg Loss: 0.02041310, Log Avg loss: 0.04008499, Global Avg Loss: 0.04395407, Time: 0.2780 Steps: 16200, Updated lr: 0.000087 Training, Epoch: 0002, Batch: 004472, Sample Num: 71552, Cur Loss: 0.00000060, Cur Avg Loss: 0.01950726, Log Avg loss: 0.00015853, Global Avg Loss: 0.04341998, Time: 0.2277 Steps: 16400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 004672, Sample Num: 74752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01867222, Log Avg loss: 0.00000072, Global Avg Loss: 0.04289685, Time: 0.0818 Steps: 16600, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 004872, Sample Num: 77952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01982846, Log Avg loss: 0.04683820, Global Avg Loss: 0.04294378, Time: 0.1188 Steps: 16800, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005072, Sample Num: 81152, Cur Loss: 0.00000006, Cur Avg Loss: 0.01904922, Log Avg loss: 0.00006694, Global Avg Loss: 0.04243934, Time: 0.1106 Steps: 17000, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005272, Sample Num: 84352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01853925, Log Avg loss: 0.00560635, Global Avg Loss: 0.04201105, Time: 0.1323 Steps: 17200, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005472, Sample Num: 87552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01786174, Log Avg loss: 0.00000259, Global Avg Loss: 0.04152820, Time: 0.1475 Steps: 17400, Updated lr: 0.000086 Training, Epoch: 0002, Batch: 005672, Sample Num: 90752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01723194, Log Avg loss: 0.00000069, Global Avg Loss: 0.04105629, Time: 0.1057 Steps: 17600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 005872, Sample Num: 93952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01664523, Log Avg loss: 0.00000598, Global Avg Loss: 0.04059505, Time: 0.4079 Steps: 17800, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006072, Sample Num: 97152, Cur Loss: 0.00000030, Cur Avg Loss: 0.01609698, Log Avg loss: 0.00000037, Global Avg Loss: 0.04014400, Time: 0.4077 Steps: 18000, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006272, Sample Num: 100352, Cur Loss: 0.00000000, Cur Avg Loss: 0.02018524, Log Avg loss: 0.14430503, Global Avg Loss: 0.04128863, Time: 0.2194 Steps: 18200, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006472, Sample Num: 103552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01963537, Log Avg loss: 0.00239131, Global Avg Loss: 0.04086583, Time: 0.2088 Steps: 18400, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006672, Sample Num: 106752, Cur Loss: 0.00000000, Cur Avg Loss: 0.02056897, Log Avg loss: 0.05078026, Global Avg Loss: 0.04097244, Time: 0.1120 Steps: 18600, Updated lr: 0.000085 Training, Epoch: 0002, Batch: 006872, Sample Num: 109952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01999494, Log Avg loss: 0.00084518, Global Avg Loss: 0.04054555, Time: 0.1252 Steps: 18800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007072, Sample Num: 113152, Cur Loss: 0.00000030, Cur Avg Loss: 0.02032970, Log Avg loss: 0.03183208, Global Avg Loss: 0.04045383, Time: 0.1784 Steps: 19000, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007272, Sample Num: 116352, Cur Loss: 0.00000274, Cur Avg Loss: 0.01978667, Log Avg loss: 0.00058511, Global Avg Loss: 0.04003853, Time: 0.1309 Steps: 19200, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007472, Sample Num: 119552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01925705, Log Avg loss: 0.00000023, Global Avg Loss: 0.03962577, Time: 0.0531 Steps: 19400, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007672, Sample Num: 122752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01875806, Log Avg loss: 0.00011590, Global Avg Loss: 0.03922260, Time: 0.2333 Steps: 19600, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 007872, Sample Num: 125952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01828149, Log Avg loss: 0.00000021, Global Avg Loss: 0.03882642, Time: 0.1412 Steps: 19800, Updated lr: 0.000084 Training, Epoch: 0002, Batch: 008072, Sample Num: 129152, Cur Loss: 0.00000000, Cur Avg Loss: 0.01793948, Log Avg loss: 0.00447796, Global Avg Loss: 0.03848293, Time: 0.0564 Steps: 20000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008272, Sample Num: 132352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01796330, Log Avg loss: 0.01892472, Global Avg Loss: 0.03828929, Time: 0.0867 Steps: 20200, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008472, Sample Num: 135552, Cur Loss: 0.00000000, Cur Avg Loss: 0.02036921, Log Avg loss: 0.11987744, Global Avg Loss: 0.03908917, Time: 0.1818 Steps: 20400, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008672, Sample Num: 138752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01991381, Log Avg loss: 0.00062315, Global Avg Loss: 0.03871571, Time: 0.0934 Steps: 20600, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 008872, Sample Num: 141952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01946495, Log Avg loss: 0.00000229, Global Avg Loss: 0.03834347, Time: 0.0932 Steps: 20800, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 009072, Sample Num: 145152, Cur Loss: 0.00000000, Cur Avg Loss: 0.01903593, Log Avg loss: 0.00000459, Global Avg Loss: 0.03797834, Time: 0.1124 Steps: 21000, Updated lr: 0.000083 Training, Epoch: 0002, Batch: 009272, Sample Num: 148352, Cur Loss: 0.00000000, Cur Avg Loss: 0.02034414, Log Avg loss: 0.07968456, Global Avg Loss: 0.03837179, Time: 0.1279 Steps: 21200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009472, Sample Num: 151552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01991519, Log Avg loss: 0.00002936, Global Avg Loss: 0.03801345, Time: 0.2699 Steps: 21400, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009672, Sample Num: 154752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01950365, Log Avg loss: 0.00001316, Global Avg Loss: 0.03766160, Time: 0.1261 Steps: 21600, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 009872, Sample Num: 157952, Cur Loss: 0.00000000, Cur Avg Loss: 0.02145723, Log Avg loss: 0.11593189, Global Avg Loss: 0.03837967, Time: 0.2325 Steps: 21800, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010072, Sample Num: 161152, Cur Loss: 0.00000000, Cur Avg Loss: 0.02103320, Log Avg loss: 0.00010347, Global Avg Loss: 0.03803171, Time: 0.1379 Steps: 22000, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010272, Sample Num: 164352, Cur Loss: 0.00000209, Cur Avg Loss: 0.02062555, Log Avg loss: 0.00009594, Global Avg Loss: 0.03768994, Time: 0.4035 Steps: 22200, Updated lr: 0.000082 Training, Epoch: 0002, Batch: 010472, Sample Num: 167552, Cur Loss: 0.00000000, Cur Avg Loss: 0.02023166, Log Avg loss: 0.00000193, Global Avg Loss: 0.03735344, Time: 0.0991 Steps: 22400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 010672, Sample Num: 170752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01990460, Log Avg loss: 0.00277966, Global Avg Loss: 0.03704748, Time: 0.2078 Steps: 22600, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 010872, Sample Num: 173952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01953844, Log Avg loss: 0.00000016, Global Avg Loss: 0.03672251, Time: 0.1080 Steps: 22800, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011072, Sample Num: 177152, Cur Loss: 0.00000000, Cur Avg Loss: 0.01918551, Log Avg loss: 0.00000023, Global Avg Loss: 0.03640318, Time: 0.1830 Steps: 23000, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011272, Sample Num: 180352, Cur Loss: 0.00000000, Cur Avg Loss: 0.01888910, Log Avg loss: 0.00247965, Global Avg Loss: 0.03611074, Time: 0.2172 Steps: 23200, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011472, Sample Num: 183552, Cur Loss: 0.00000000, Cur Avg Loss: 0.01855985, Log Avg loss: 0.00000340, Global Avg Loss: 0.03580213, Time: 0.2298 Steps: 23400, Updated lr: 0.000081 Training, Epoch: 0002, Batch: 011672, Sample Num: 186752, Cur Loss: 0.00000000, Cur Avg Loss: 0.01824198, Log Avg loss: 0.00000861, Global Avg Loss: 0.03549879, Time: 0.1029 Steps: 23600, Updated lr: 0.000080 Training, Epoch: 0002, Batch: 011872, Sample Num: 189952, Cur Loss: 0.00000000, Cur Avg Loss: 0.01994329, Log Avg loss: 0.11923227, Global Avg Loss: 0.03620244, Time: 0.2787 Steps: 23800, Updated lr: 0.000080 ***** Running evaluation checkpoint-23856 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-23856 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1864.599850, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.011338, "eval_total_loss": 15.794309, "eval_acc": 0.99982, "eval_prec": 0.996296, "eval_recall": 0.996296, "eval_f1": 0.996296, "eval_roc_auc": 0.999999, "eval_pr_auc": 0.999959, "eval_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "eval_mcc2": 0.996204, "eval_mcc": 0.996204, "eval_sn": 0.996296, "eval_sp": 0.999908, "update_flag": false, "test_avg_loss": 0.013726, "test_total_loss": 19.120026, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 8.013436345314075e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.03611745295525998, "train_cur_epoch_loss": 236.76679166634113, "train_cur_epoch_avg_loss": 0.01984966395593068, "train_cur_epoch_time": 1864.5998497009277, "train_cur_epoch_avg_time": 0.15632124829819985, "epoch": 2, "step": 23856} ################################################## Training, Epoch: 0003, Batch: 000144, Sample Num: 2304, Cur Loss: 0.00000000, Cur Avg Loss: 0.26191196, Log Avg loss: 0.18857661, Global Avg Loss: 0.03747222, Time: 0.0770 Steps: 24000, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000344, Sample Num: 5504, Cur Loss: 0.00000000, Cur Avg Loss: 0.10973422, Log Avg loss: 0.00016624, Global Avg Loss: 0.03716391, Time: 0.2383 Steps: 24200, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000544, Sample Num: 8704, Cur Loss: 0.00000000, Cur Avg Loss: 0.06939160, Log Avg loss: 0.00000230, Global Avg Loss: 0.03685930, Time: 0.1032 Steps: 24400, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000744, Sample Num: 11904, Cur Loss: 0.00000000, Cur Avg Loss: 0.05164196, Log Avg loss: 0.00336294, Global Avg Loss: 0.03658697, Time: 0.0962 Steps: 24600, Updated lr: 0.000080 Training, Epoch: 0003, Batch: 000944, Sample Num: 15104, Cur Loss: 0.00000000, Cur Avg Loss: 0.05088733, Log Avg loss: 0.04808009, Global Avg Loss: 0.03667966, Time: 0.1495 Steps: 24800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001144, Sample Num: 18304, Cur Loss: 0.00000000, Cur Avg Loss: 0.04208514, Log Avg loss: 0.00053882, Global Avg Loss: 0.03639053, Time: 0.1005 Steps: 25000, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001344, Sample Num: 21504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03582306, Log Avg loss: 0.00000397, Global Avg Loss: 0.03610175, Time: 0.2000 Steps: 25200, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001544, Sample Num: 24704, Cur Loss: 0.00000048, Cur Avg Loss: 0.03120795, Log Avg loss: 0.00019440, Global Avg Loss: 0.03581902, Time: 0.1070 Steps: 25400, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001744, Sample Num: 27904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03598766, Log Avg loss: 0.07288705, Global Avg Loss: 0.03610861, Time: 0.1220 Steps: 25600, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 001944, Sample Num: 31104, Cur Loss: 0.00000000, Cur Avg Loss: 0.03228529, Log Avg loss: 0.00000062, Global Avg Loss: 0.03582870, Time: 0.1091 Steps: 25800, Updated lr: 0.000079 Training, Epoch: 0003, Batch: 002144, Sample Num: 34304, Cur Loss: 0.00000000, Cur Avg Loss: 0.03577137, Log Avg loss: 0.06965599, Global Avg Loss: 0.03608891, Time: 0.0707 Steps: 26000, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002344, Sample Num: 37504, Cur Loss: 0.00000000, Cur Avg Loss: 0.03271935, Log Avg loss: 0.00000170, Global Avg Loss: 0.03581344, Time: 0.1134 Steps: 26200, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002544, Sample Num: 40704, Cur Loss: 0.00000000, Cur Avg Loss: 0.03015638, Log Avg loss: 0.00011845, Global Avg Loss: 0.03554302, Time: 0.1069 Steps: 26400, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002744, Sample Num: 43904, Cur Loss: 0.00000000, Cur Avg Loss: 0.03300652, Log Avg loss: 0.06926021, Global Avg Loss: 0.03579654, Time: 0.2208 Steps: 26600, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 002944, Sample Num: 47104, Cur Loss: 0.00000000, Cur Avg Loss: 0.03085496, Log Avg loss: 0.00133558, Global Avg Loss: 0.03553936, Time: 0.1076 Steps: 26800, Updated lr: 0.000078 Training, Epoch: 0003, Batch: 003144, Sample Num: 50304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02889237, Log Avg loss: 0.00000316, Global Avg Loss: 0.03527613, Time: 0.2113 Steps: 27000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003344, Sample Num: 53504, Cur Loss: 0.00000000, Cur Avg Loss: 0.02719658, Log Avg loss: 0.00053867, Global Avg Loss: 0.03502071, Time: 0.1603 Steps: 27200, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003544, Sample Num: 56704, Cur Loss: 0.00000000, Cur Avg Loss: 0.02866616, Log Avg loss: 0.05323755, Global Avg Loss: 0.03515368, Time: 0.1749 Steps: 27400, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003744, Sample Num: 59904, Cur Loss: 0.00000000, Cur Avg Loss: 0.02766774, Log Avg loss: 0.00997580, Global Avg Loss: 0.03497123, Time: 0.1251 Steps: 27600, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 003944, Sample Num: 63104, Cur Loss: 0.00000000, Cur Avg Loss: 0.02626476, Log Avg loss: 0.00000087, Global Avg Loss: 0.03471965, Time: 0.1117 Steps: 27800, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 004144, Sample Num: 66304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02499926, Log Avg loss: 0.00004368, Global Avg Loss: 0.03447196, Time: 0.0916 Steps: 28000, Updated lr: 0.000077 Training, Epoch: 0003, Batch: 004344, Sample Num: 69504, Cur Loss: 0.00000000, Cur Avg Loss: 0.02473759, Log Avg loss: 0.01931579, Global Avg Loss: 0.03436447, Time: 0.2342 Steps: 28200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004544, Sample Num: 72704, Cur Loss: 0.00000000, Cur Avg Loss: 0.02364879, Log Avg loss: 0.00000004, Global Avg Loss: 0.03412247, Time: 0.1260 Steps: 28400, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004744, Sample Num: 75904, Cur Loss: 0.00000000, Cur Avg Loss: 0.02265180, Log Avg loss: 0.00000012, Global Avg Loss: 0.03388385, Time: 0.1693 Steps: 28600, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 004944, Sample Num: 79104, Cur Loss: 0.00000000, Cur Avg Loss: 0.02368889, Log Avg loss: 0.04828864, Global Avg Loss: 0.03398388, Time: 0.2007 Steps: 28800, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005144, Sample Num: 82304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02276795, Log Avg loss: 0.00000250, Global Avg Loss: 0.03374953, Time: 0.0634 Steps: 29000, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005344, Sample Num: 85504, Cur Loss: 0.00000000, Cur Avg Loss: 0.02191596, Log Avg loss: 0.00000277, Global Avg Loss: 0.03351839, Time: 0.3523 Steps: 29200, Updated lr: 0.000076 Training, Epoch: 0003, Batch: 005544, Sample Num: 88704, Cur Loss: 0.00000000, Cur Avg Loss: 0.02112535, Log Avg loss: 0.00000005, Global Avg Loss: 0.03329037, Time: 0.2177 Steps: 29400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 005744, Sample Num: 91904, Cur Loss: 0.00000000, Cur Avg Loss: 0.02038979, Log Avg loss: 0.00000009, Global Avg Loss: 0.03306544, Time: 0.1465 Steps: 29600, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 005944, Sample Num: 95104, Cur Loss: 0.00000006, Cur Avg Loss: 0.01970373, Log Avg loss: 0.00000006, Global Avg Loss: 0.03284352, Time: 0.0772 Steps: 29800, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006144, Sample Num: 98304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02359416, Log Avg loss: 0.13921777, Global Avg Loss: 0.03355268, Time: 0.1249 Steps: 30000, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006344, Sample Num: 101504, Cur Loss: 0.00000000, Cur Avg Loss: 0.02285033, Log Avg loss: 0.00000003, Global Avg Loss: 0.03333048, Time: 0.0754 Steps: 30200, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006544, Sample Num: 104704, Cur Loss: 0.00000000, Cur Avg Loss: 0.02226752, Log Avg loss: 0.00378065, Global Avg Loss: 0.03313607, Time: 0.1233 Steps: 30400, Updated lr: 0.000075 Training, Epoch: 0003, Batch: 006744, Sample Num: 107904, Cur Loss: 0.00000000, Cur Avg Loss: 0.02164768, Log Avg loss: 0.00136662, Global Avg Loss: 0.03292843, Time: 0.1079 Steps: 30600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 006944, Sample Num: 111104, Cur Loss: 0.00000000, Cur Avg Loss: 0.02104670, Log Avg loss: 0.00078175, Global Avg Loss: 0.03271968, Time: 0.1139 Steps: 30800, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007144, Sample Num: 114304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02048589, Log Avg loss: 0.00101443, Global Avg Loss: 0.03251513, Time: 0.2440 Steps: 31000, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007344, Sample Num: 117504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01992989, Log Avg loss: 0.00006945, Global Avg Loss: 0.03230715, Time: 0.1112 Steps: 31200, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007544, Sample Num: 120704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01940152, Log Avg loss: 0.00000005, Global Avg Loss: 0.03210137, Time: 0.1013 Steps: 31400, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007744, Sample Num: 123904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01890158, Log Avg loss: 0.00004369, Global Avg Loss: 0.03189847, Time: 0.0882 Steps: 31600, Updated lr: 0.000074 Training, Epoch: 0003, Batch: 007944, Sample Num: 127104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01849489, Log Avg loss: 0.00274770, Global Avg Loss: 0.03171514, Time: 0.1286 Steps: 31800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008144, Sample Num: 130304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01809470, Log Avg loss: 0.00219930, Global Avg Loss: 0.03153066, Time: 0.1250 Steps: 32000, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008344, Sample Num: 133504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01783736, Log Avg loss: 0.00735853, Global Avg Loss: 0.03138052, Time: 0.4079 Steps: 32200, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008544, Sample Num: 136704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01966472, Log Avg loss: 0.09590218, Global Avg Loss: 0.03177881, Time: 0.1209 Steps: 32400, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008744, Sample Num: 139904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01923360, Log Avg loss: 0.00081587, Global Avg Loss: 0.03158885, Time: 0.1683 Steps: 32600, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 008944, Sample Num: 143104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01880351, Log Avg loss: 0.00000015, Global Avg Loss: 0.03139624, Time: 0.2459 Steps: 32800, Updated lr: 0.000073 Training, Epoch: 0003, Batch: 009144, Sample Num: 146304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01839261, Log Avg loss: 0.00001704, Global Avg Loss: 0.03120606, Time: 0.0825 Steps: 33000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009344, Sample Num: 149504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01909532, Log Avg loss: 0.05122326, Global Avg Loss: 0.03132664, Time: 0.1286 Steps: 33200, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009544, Sample Num: 152704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01869546, Log Avg loss: 0.00001417, Global Avg Loss: 0.03113914, Time: 0.2764 Steps: 33400, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009744, Sample Num: 155904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01831187, Log Avg loss: 0.00000695, Global Avg Loss: 0.03095383, Time: 0.0700 Steps: 33600, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 009944, Sample Num: 159104, Cur Loss: 0.00000000, Cur Avg Loss: 0.02130827, Log Avg loss: 0.16729292, Global Avg Loss: 0.03176057, Time: 0.2220 Steps: 33800, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 010144, Sample Num: 162304, Cur Loss: 0.00000000, Cur Avg Loss: 0.02090853, Log Avg loss: 0.00103332, Global Avg Loss: 0.03157983, Time: 0.2482 Steps: 34000, Updated lr: 0.000072 Training, Epoch: 0003, Batch: 010344, Sample Num: 165504, Cur Loss: 0.00000000, Cur Avg Loss: 0.02051034, Log Avg loss: 0.00031413, Global Avg Loss: 0.03139699, Time: 0.1178 Steps: 34200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010544, Sample Num: 168704, Cur Loss: 0.00000018, Cur Avg Loss: 0.02012138, Log Avg loss: 0.00000430, Global Avg Loss: 0.03121447, Time: 0.2935 Steps: 34400, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010744, Sample Num: 171904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01978656, Log Avg loss: 0.00213490, Global Avg Loss: 0.03104638, Time: 0.1335 Steps: 34600, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 010944, Sample Num: 175104, Cur Loss: 0.00000000, Cur Avg Loss: 0.01942496, Log Avg loss: 0.00000008, Global Avg Loss: 0.03086795, Time: 0.1484 Steps: 34800, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011144, Sample Num: 178304, Cur Loss: 0.00000000, Cur Avg Loss: 0.01911137, Log Avg loss: 0.00195164, Global Avg Loss: 0.03070272, Time: 0.1198 Steps: 35000, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011344, Sample Num: 181504, Cur Loss: 0.00000000, Cur Avg Loss: 0.01877443, Log Avg loss: 0.00000012, Global Avg Loss: 0.03052827, Time: 0.1263 Steps: 35200, Updated lr: 0.000071 Training, Epoch: 0003, Batch: 011544, Sample Num: 184704, Cur Loss: 0.00000000, Cur Avg Loss: 0.01844917, Log Avg loss: 0.00000009, Global Avg Loss: 0.03035579, Time: 0.1074 Steps: 35400, Updated lr: 0.000070 Training, Epoch: 0003, Batch: 011744, Sample Num: 187904, Cur Loss: 0.00000000, Cur Avg Loss: 0.01821143, Log Avg loss: 0.00448935, Global Avg Loss: 0.03021048, Time: 0.0875 Steps: 35600, Updated lr: 0.000070 ***** Running evaluation checkpoint-35784 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-35784 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1869.428616, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.009079, "eval_total_loss": 12.646885, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999999, "eval_pr_auc": 0.999949, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.012805, "test_total_loss": 17.837533, "test_acc": 0.999776, "test_prec": 0.994455, "test_recall": 0.996296, "test_f1": 0.995375, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21741, "fp": 3, "fn": 2, "tp": 538}, "test_mcc2": 0.99526, "test_mcc": 0.99526, "test_sn": 0.996296, "test_sp": 0.999862, "lr": 7.011756802149816e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.030122105504349447, "train_cur_epoch_loss": 216.27146566695308, "train_cur_epoch_avg_loss": 0.018131410602527924, "train_cur_epoch_time": 1869.428615808487, "train_cur_epoch_avg_time": 0.15672607443062433, "epoch": 3, "step": 35784} ################################################## Training, Epoch: 0004, Batch: 000016, Sample Num: 256, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.01198213, Global Avg Loss: 0.03010864, Time: 0.2374 Steps: 35800, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000216, Sample Num: 3456, Cur Loss: 0.00000000, Cur Avg Loss: 0.13507036, Log Avg loss: 0.14587599, Global Avg Loss: 0.03075180, Time: 0.0647 Steps: 36000, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000416, Sample Num: 6656, Cur Loss: 0.00000000, Cur Avg Loss: 0.07013298, Log Avg loss: 0.00000061, Global Avg Loss: 0.03058190, Time: 0.1357 Steps: 36200, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000616, Sample Num: 9856, Cur Loss: 0.00000000, Cur Avg Loss: 0.04736257, Log Avg loss: 0.00000011, Global Avg Loss: 0.03041387, Time: 0.1236 Steps: 36400, Updated lr: 0.000070 Training, Epoch: 0004, Batch: 000816, Sample Num: 13056, Cur Loss: 0.00000000, Cur Avg Loss: 0.03886848, Log Avg loss: 0.01270668, Global Avg Loss: 0.03031711, Time: 0.1584 Steps: 36600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001016, Sample Num: 16256, Cur Loss: 0.00000411, Cur Avg Loss: 0.03476049, Log Avg loss: 0.01799989, Global Avg Loss: 0.03025017, Time: 0.2660 Steps: 36800, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001216, Sample Num: 19456, Cur Loss: 0.00000018, Cur Avg Loss: 0.02904341, Log Avg loss: 0.00000067, Global Avg Loss: 0.03008665, Time: 0.2742 Steps: 37000, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001416, Sample Num: 22656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02494129, Log Avg loss: 0.00000037, Global Avg Loss: 0.02992490, Time: 0.0894 Steps: 37200, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001616, Sample Num: 25856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02813192, Log Avg loss: 0.05072156, Global Avg Loss: 0.03003611, Time: 0.1018 Steps: 37400, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 001816, Sample Num: 29056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02503375, Log Avg loss: 0.00000055, Global Avg Loss: 0.02987635, Time: 0.1004 Steps: 37600, Updated lr: 0.000069 Training, Epoch: 0004, Batch: 002016, Sample Num: 32256, Cur Loss: 0.00000000, Cur Avg Loss: 0.02297440, Log Avg loss: 0.00427556, Global Avg Loss: 0.02974089, Time: 0.1234 Steps: 37800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002216, Sample Num: 35456, Cur Loss: 0.00000030, Cur Avg Loss: 0.02480351, Log Avg loss: 0.04324093, Global Avg Loss: 0.02981195, Time: 0.2848 Steps: 38000, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002416, Sample Num: 38656, Cur Loss: 0.00000000, Cur Avg Loss: 0.02275027, Log Avg loss: 0.00000032, Global Avg Loss: 0.02965587, Time: 0.1373 Steps: 38200, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002616, Sample Num: 41856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02483433, Log Avg loss: 0.05000978, Global Avg Loss: 0.02976188, Time: 0.1261 Steps: 38400, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 002816, Sample Num: 45056, Cur Loss: 0.00000000, Cur Avg Loss: 0.02380574, Log Avg loss: 0.01035183, Global Avg Loss: 0.02966131, Time: 0.1986 Steps: 38600, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003016, Sample Num: 48256, Cur Loss: 0.00000000, Cur Avg Loss: 0.02228927, Log Avg loss: 0.00093730, Global Avg Loss: 0.02951324, Time: 0.2471 Steps: 38800, Updated lr: 0.000068 Training, Epoch: 0004, Batch: 003216, Sample Num: 51456, Cur Loss: 0.00000000, Cur Avg Loss: 0.02090352, Log Avg loss: 0.00000650, Global Avg Loss: 0.02936193, Time: 0.1430 Steps: 39000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003416, Sample Num: 54656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01972302, Log Avg loss: 0.00074051, Global Avg Loss: 0.02921590, Time: 0.1325 Steps: 39200, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003616, Sample Num: 57856, Cur Loss: 0.00000000, Cur Avg Loss: 0.02036916, Log Avg loss: 0.03140527, Global Avg Loss: 0.02922701, Time: 0.1161 Steps: 39400, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 003816, Sample Num: 61056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01938120, Log Avg loss: 0.00151888, Global Avg Loss: 0.02908707, Time: 0.1473 Steps: 39600, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004016, Sample Num: 64256, Cur Loss: 0.00000030, Cur Avg Loss: 0.01841603, Log Avg loss: 0.00000057, Global Avg Loss: 0.02894091, Time: 0.0472 Steps: 39800, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004216, Sample Num: 67456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01768636, Log Avg loss: 0.00303458, Global Avg Loss: 0.02881138, Time: 0.0992 Steps: 40000, Updated lr: 0.000067 Training, Epoch: 0004, Batch: 004416, Sample Num: 70656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01689845, Log Avg loss: 0.00028926, Global Avg Loss: 0.02866948, Time: 0.4062 Steps: 40200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 004616, Sample Num: 73856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01616628, Log Avg loss: 0.00000002, Global Avg Loss: 0.02852755, Time: 0.2238 Steps: 40400, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 004816, Sample Num: 77056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01698408, Log Avg loss: 0.03585887, Global Avg Loss: 0.02856366, Time: 0.1806 Steps: 40600, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005016, Sample Num: 80256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01633929, Log Avg loss: 0.00081272, Global Avg Loss: 0.02842763, Time: 0.2042 Steps: 40800, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005216, Sample Num: 83456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01571280, Log Avg loss: 0.00000050, Global Avg Loss: 0.02828896, Time: 0.0765 Steps: 41000, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005416, Sample Num: 86656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01513260, Log Avg loss: 0.00000089, Global Avg Loss: 0.02815164, Time: 0.1044 Steps: 41200, Updated lr: 0.000066 Training, Epoch: 0004, Batch: 005616, Sample Num: 89856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01459369, Log Avg loss: 0.00000003, Global Avg Loss: 0.02801564, Time: 0.1764 Steps: 41400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 005816, Sample Num: 93056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01409184, Log Avg loss: 0.00000004, Global Avg Loss: 0.02788095, Time: 0.3359 Steps: 41600, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006016, Sample Num: 96256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01362337, Log Avg loss: 0.00000001, Global Avg Loss: 0.02774755, Time: 0.1030 Steps: 41800, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006216, Sample Num: 99456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01613575, Log Avg loss: 0.09170814, Global Avg Loss: 0.02805212, Time: 0.0892 Steps: 42000, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006416, Sample Num: 102656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01571022, Log Avg loss: 0.00248501, Global Avg Loss: 0.02793095, Time: 0.0773 Steps: 42200, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006616, Sample Num: 105856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01528183, Log Avg loss: 0.00153897, Global Avg Loss: 0.02780646, Time: 0.4050 Steps: 42400, Updated lr: 0.000065 Training, Epoch: 0004, Batch: 006816, Sample Num: 109056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01484863, Log Avg loss: 0.00051840, Global Avg Loss: 0.02767835, Time: 0.0710 Steps: 42600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007016, Sample Num: 112256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01442535, Log Avg loss: 0.00000002, Global Avg Loss: 0.02754901, Time: 0.0643 Steps: 42800, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007216, Sample Num: 115456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01403561, Log Avg loss: 0.00036351, Global Avg Loss: 0.02742257, Time: 0.0962 Steps: 43000, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007416, Sample Num: 118656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01365709, Log Avg loss: 0.00000000, Global Avg Loss: 0.02729561, Time: 0.1977 Steps: 43200, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007616, Sample Num: 121856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01331232, Log Avg loss: 0.00052816, Global Avg Loss: 0.02717226, Time: 0.2080 Steps: 43400, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 007816, Sample Num: 125056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01297168, Log Avg loss: 0.00000005, Global Avg Loss: 0.02704762, Time: 0.1232 Steps: 43600, Updated lr: 0.000064 Training, Epoch: 0004, Batch: 008016, Sample Num: 128256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01270476, Log Avg loss: 0.00227365, Global Avg Loss: 0.02693449, Time: 0.1506 Steps: 43800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008216, Sample Num: 131456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01254607, Log Avg loss: 0.00618599, Global Avg Loss: 0.02684018, Time: 0.1233 Steps: 44000, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008416, Sample Num: 134656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01319971, Log Avg loss: 0.04005112, Global Avg Loss: 0.02689996, Time: 0.1500 Steps: 44200, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008616, Sample Num: 137856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01289849, Log Avg loss: 0.00022291, Global Avg Loss: 0.02677979, Time: 0.2164 Steps: 44400, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 008816, Sample Num: 141056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01260810, Log Avg loss: 0.00009815, Global Avg Loss: 0.02666014, Time: 0.1169 Steps: 44600, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 009016, Sample Num: 144256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01232842, Log Avg loss: 0.00000009, Global Avg Loss: 0.02654113, Time: 0.0878 Steps: 44800, Updated lr: 0.000063 Training, Epoch: 0004, Batch: 009216, Sample Num: 147456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01324775, Log Avg loss: 0.05469132, Global Avg Loss: 0.02666624, Time: 0.1209 Steps: 45000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009416, Sample Num: 150656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01296722, Log Avg loss: 0.00004046, Global Avg Loss: 0.02654842, Time: 0.0689 Steps: 45200, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009616, Sample Num: 153856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01269757, Log Avg loss: 0.00000247, Global Avg Loss: 0.02643148, Time: 0.1304 Steps: 45400, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 009816, Sample Num: 157056, Cur Loss: 0.00000006, Cur Avg Loss: 0.01500559, Log Avg loss: 0.12597518, Global Avg Loss: 0.02686808, Time: 0.1317 Steps: 45600, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010016, Sample Num: 160256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01471066, Log Avg loss: 0.00023564, Global Avg Loss: 0.02675178, Time: 0.1784 Steps: 45800, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010216, Sample Num: 163456, Cur Loss: 0.00000250, Cur Avg Loss: 0.01442309, Log Avg loss: 0.00002155, Global Avg Loss: 0.02663556, Time: 0.0725 Steps: 46000, Updated lr: 0.000062 Training, Epoch: 0004, Batch: 010416, Sample Num: 166656, Cur Loss: 0.00000012, Cur Avg Loss: 0.01420492, Log Avg loss: 0.00306046, Global Avg Loss: 0.02653350, Time: 0.1169 Steps: 46200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 010616, Sample Num: 169856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01395901, Log Avg loss: 0.00115226, Global Avg Loss: 0.02642410, Time: 0.0755 Steps: 46400, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 010816, Sample Num: 173056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01370090, Log Avg loss: 0.00000008, Global Avg Loss: 0.02631069, Time: 0.0714 Steps: 46600, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011016, Sample Num: 176256, Cur Loss: 0.00000000, Cur Avg Loss: 0.01345215, Log Avg loss: 0.00000011, Global Avg Loss: 0.02619825, Time: 0.1714 Steps: 46800, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011216, Sample Num: 179456, Cur Loss: 0.00000000, Cur Avg Loss: 0.01324974, Log Avg loss: 0.00210098, Global Avg Loss: 0.02609571, Time: 0.1569 Steps: 47000, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011416, Sample Num: 182656, Cur Loss: 0.00000000, Cur Avg Loss: 0.01301763, Log Avg loss: 0.00000098, Global Avg Loss: 0.02598514, Time: 0.0704 Steps: 47200, Updated lr: 0.000061 Training, Epoch: 0004, Batch: 011616, Sample Num: 185856, Cur Loss: 0.00000000, Cur Avg Loss: 0.01279350, Log Avg loss: 0.00000002, Global Avg Loss: 0.02587550, Time: 0.1927 Steps: 47400, Updated lr: 0.000060 Training, Epoch: 0004, Batch: 011816, Sample Num: 189056, Cur Loss: 0.00000000, Cur Avg Loss: 0.01258628, Log Avg loss: 0.00055068, Global Avg Loss: 0.02576909, Time: 0.1174 Steps: 47600, Updated lr: 0.000060 ***** Running evaluation checkpoint-47712 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-47712 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1867.295878, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.009123, "eval_total_loss": 12.708809, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999938, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.017297, "test_total_loss": 24.094666, "test_acc": 0.999776, "test_prec": 0.994455, "test_recall": 0.996296, "test_f1": 0.995375, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21741, "fp": 3, "fn": 2, "tp": 538}, "test_mcc2": 0.99526, "test_mcc": 0.99526, "test_sn": 0.996296, "test_sp": 0.999862, "lr": 6.010077258985556e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.02579760379413592, "train_cur_epoch_loss": 152.9658488581693, "train_cur_epoch_avg_loss": 0.012824098663495079, "train_cur_epoch_time": 1867.2958781719208, "train_cur_epoch_avg_time": 0.15654727348859163, "epoch": 4, "step": 47712} ################################################## Training, Epoch: 0005, Batch: 000088, Sample Num: 1408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000000, Log Avg loss: 0.02123204, Global Avg Loss: 0.02575011, Time: 0.1393 Steps: 47800, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000288, Sample Num: 4608, Cur Loss: 0.00000000, Cur Avg Loss: 0.07224341, Log Avg loss: 0.10403051, Global Avg Loss: 0.02607628, Time: 0.1011 Steps: 48000, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000488, Sample Num: 7808, Cur Loss: 0.00000000, Cur Avg Loss: 0.04263561, Log Avg loss: 0.00000037, Global Avg Loss: 0.02596808, Time: 0.0963 Steps: 48200, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000688, Sample Num: 11008, Cur Loss: 0.00000000, Cur Avg Loss: 0.03024154, Log Avg loss: 0.00000001, Global Avg Loss: 0.02586077, Time: 0.0953 Steps: 48400, Updated lr: 0.000060 Training, Epoch: 0005, Batch: 000888, Sample Num: 14208, Cur Loss: 0.00000000, Cur Avg Loss: 0.02674638, Log Avg loss: 0.01472303, Global Avg Loss: 0.02581494, Time: 0.0819 Steps: 48600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001088, Sample Num: 17408, Cur Loss: 0.00000000, Cur Avg Loss: 0.02218580, Log Avg loss: 0.00193684, Global Avg Loss: 0.02571708, Time: 0.0691 Steps: 48800, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001288, Sample Num: 20608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01874088, Log Avg loss: 0.00000052, Global Avg Loss: 0.02561211, Time: 0.1017 Steps: 49000, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001488, Sample Num: 23808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01629032, Log Avg loss: 0.00050870, Global Avg Loss: 0.02551007, Time: 0.2427 Steps: 49200, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001688, Sample Num: 27008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01886083, Log Avg loss: 0.03798541, Global Avg Loss: 0.02556057, Time: 0.1008 Steps: 49400, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 001888, Sample Num: 30208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01686287, Log Avg loss: 0.00000011, Global Avg Loss: 0.02545751, Time: 0.2956 Steps: 49600, Updated lr: 0.000059 Training, Epoch: 0005, Batch: 002088, Sample Num: 33408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01936466, Log Avg loss: 0.04298151, Global Avg Loss: 0.02552789, Time: 0.2058 Steps: 49800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002288, Sample Num: 36608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01767196, Log Avg loss: 0.00000019, Global Avg Loss: 0.02542577, Time: 0.2122 Steps: 50000, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002488, Sample Num: 39808, Cur Loss: 0.00000060, Cur Avg Loss: 0.01625139, Log Avg loss: 0.00000006, Global Avg Loss: 0.02532448, Time: 0.4034 Steps: 50200, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002688, Sample Num: 43008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01695021, Log Avg loss: 0.02564356, Global Avg Loss: 0.02532574, Time: 0.1134 Steps: 50400, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 002888, Sample Num: 46208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01584630, Log Avg loss: 0.00100977, Global Avg Loss: 0.02522963, Time: 0.1192 Steps: 50600, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003088, Sample Num: 49408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01482003, Log Avg loss: 0.00000075, Global Avg Loss: 0.02513031, Time: 0.1058 Steps: 50800, Updated lr: 0.000058 Training, Epoch: 0005, Batch: 003288, Sample Num: 52608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01396633, Log Avg loss: 0.00078509, Global Avg Loss: 0.02503483, Time: 0.1457 Steps: 51000, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003488, Sample Num: 55808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01316553, Log Avg loss: 0.00000040, Global Avg Loss: 0.02493704, Time: 0.2040 Steps: 51200, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003688, Sample Num: 59008, Cur Loss: 0.00000018, Cur Avg Loss: 0.01426724, Log Avg loss: 0.03348113, Global Avg Loss: 0.02497029, Time: 0.2173 Steps: 51400, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 003888, Sample Num: 62208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01355306, Log Avg loss: 0.00038360, Global Avg Loss: 0.02487499, Time: 0.0788 Steps: 51600, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 004088, Sample Num: 65408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01289000, Log Avg loss: 0.00000009, Global Avg Loss: 0.02477895, Time: 0.1335 Steps: 51800, Updated lr: 0.000057 Training, Epoch: 0005, Batch: 004288, Sample Num: 68608, Cur Loss: 0.00000024, Cur Avg Loss: 0.01238897, Log Avg loss: 0.00214779, Global Avg Loss: 0.02469191, Time: 0.4444 Steps: 52000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 004488, Sample Num: 71808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01184193, Log Avg loss: 0.00011340, Global Avg Loss: 0.02459774, Time: 0.1958 Steps: 52200, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 004688, Sample Num: 75008, Cur Loss: 0.00000000, Cur Avg Loss: 0.01133673, Log Avg loss: 0.00000017, Global Avg Loss: 0.02450385, Time: 0.2868 Steps: 52400, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 004888, Sample Num: 78208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01162643, Log Avg loss: 0.01841699, Global Avg Loss: 0.02448071, Time: 0.2722 Steps: 52600, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 005088, Sample Num: 81408, Cur Loss: 0.00000000, Cur Avg Loss: 0.01116944, Log Avg loss: 0.00000051, Global Avg Loss: 0.02438798, Time: 0.1115 Steps: 52800, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 005288, Sample Num: 84608, Cur Loss: 0.00000000, Cur Avg Loss: 0.01074700, Log Avg loss: 0.00000028, Global Avg Loss: 0.02429595, Time: 0.0624 Steps: 53000, Updated lr: 0.000056 Training, Epoch: 0005, Batch: 005488, Sample Num: 87808, Cur Loss: 0.00000000, Cur Avg Loss: 0.01035535, Log Avg loss: 0.00000007, Global Avg Loss: 0.02420461, Time: 0.1390 Steps: 53200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 005688, Sample Num: 91008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00999124, Log Avg loss: 0.00000002, Global Avg Loss: 0.02411396, Time: 0.1662 Steps: 53400, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 005888, Sample Num: 94208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00965187, Log Avg loss: 0.00000007, Global Avg Loss: 0.02402398, Time: 0.1109 Steps: 53600, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006088, Sample Num: 97408, Cur Loss: 0.00348087, Cur Avg Loss: 0.00933536, Log Avg loss: 0.00001741, Global Avg Loss: 0.02393474, Time: 0.4048 Steps: 53800, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006288, Sample Num: 100608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00941102, Log Avg loss: 0.01171417, Global Avg Loss: 0.02388948, Time: 0.2210 Steps: 54000, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006488, Sample Num: 103808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00923973, Log Avg loss: 0.00385417, Global Avg Loss: 0.02381555, Time: 0.2357 Steps: 54200, Updated lr: 0.000055 Training, Epoch: 0005, Batch: 006688, Sample Num: 107008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00896720, Log Avg loss: 0.00012653, Global Avg Loss: 0.02372845, Time: 0.1032 Steps: 54400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 006888, Sample Num: 110208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00871397, Log Avg loss: 0.00024585, Global Avg Loss: 0.02364244, Time: 0.0793 Steps: 54600, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007088, Sample Num: 113408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00846896, Log Avg loss: 0.00003098, Global Avg Loss: 0.02355626, Time: 0.1373 Steps: 54800, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007288, Sample Num: 116608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00823669, Log Avg loss: 0.00000483, Global Avg Loss: 0.02347062, Time: 0.1026 Steps: 55000, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007488, Sample Num: 119808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00801669, Log Avg loss: 0.00000001, Global Avg Loss: 0.02338558, Time: 0.1507 Steps: 55200, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007688, Sample Num: 123008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00781954, Log Avg loss: 0.00043803, Global Avg Loss: 0.02330274, Time: 0.1117 Steps: 55400, Updated lr: 0.000054 Training, Epoch: 0005, Batch: 007888, Sample Num: 126208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00762127, Log Avg loss: 0.00000004, Global Avg Loss: 0.02321892, Time: 0.0776 Steps: 55600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008088, Sample Num: 129408, Cur Loss: 0.00000018, Cur Avg Loss: 0.00757309, Log Avg loss: 0.00567280, Global Avg Loss: 0.02315603, Time: 0.0711 Steps: 55800, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008288, Sample Num: 132608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00744867, Log Avg loss: 0.00241704, Global Avg Loss: 0.02308196, Time: 0.1230 Steps: 56000, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008488, Sample Num: 135808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00728236, Log Avg loss: 0.00039051, Global Avg Loss: 0.02300121, Time: 0.0964 Steps: 56200, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008688, Sample Num: 139008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00711552, Log Avg loss: 0.00003498, Global Avg Loss: 0.02291977, Time: 0.1058 Steps: 56400, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 008888, Sample Num: 142208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00695655, Log Avg loss: 0.00005091, Global Avg Loss: 0.02283896, Time: 0.1068 Steps: 56600, Updated lr: 0.000053 Training, Epoch: 0005, Batch: 009088, Sample Num: 145408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00680346, Log Avg loss: 0.00000004, Global Avg Loss: 0.02275854, Time: 0.1942 Steps: 56800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009288, Sample Num: 148608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00775023, Log Avg loss: 0.05077126, Global Avg Loss: 0.02285683, Time: 0.1333 Steps: 57000, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009488, Sample Num: 151808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00758698, Log Avg loss: 0.00000599, Global Avg Loss: 0.02277693, Time: 0.1985 Steps: 57200, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009688, Sample Num: 155008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00743261, Log Avg loss: 0.00010918, Global Avg Loss: 0.02269795, Time: 0.1213 Steps: 57400, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 009888, Sample Num: 158208, Cur Loss: 0.00000000, Cur Avg Loss: 0.01004530, Log Avg loss: 0.13660391, Global Avg Loss: 0.02309346, Time: 0.2173 Steps: 57600, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 010088, Sample Num: 161408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00984670, Log Avg loss: 0.00002790, Global Avg Loss: 0.02301365, Time: 0.0673 Steps: 57800, Updated lr: 0.000052 Training, Epoch: 0005, Batch: 010288, Sample Num: 164608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00968787, Log Avg loss: 0.00167650, Global Avg Loss: 0.02294007, Time: 0.0798 Steps: 58000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 010488, Sample Num: 167808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00950332, Log Avg loss: 0.00001026, Global Avg Loss: 0.02286127, Time: 0.0992 Steps: 58200, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 010688, Sample Num: 171008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00934586, Log Avg loss: 0.00108859, Global Avg Loss: 0.02278671, Time: 0.2810 Steps: 58400, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 010888, Sample Num: 174208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00917419, Log Avg loss: 0.00000001, Global Avg Loss: 0.02270894, Time: 0.0975 Steps: 58600, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 011088, Sample Num: 177408, Cur Loss: 0.00000000, Cur Avg Loss: 0.00904999, Log Avg loss: 0.00228849, Global Avg Loss: 0.02263948, Time: 0.0851 Steps: 58800, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 011288, Sample Num: 180608, Cur Loss: 0.00000000, Cur Avg Loss: 0.00888987, Log Avg loss: 0.00001322, Global Avg Loss: 0.02256278, Time: 0.1701 Steps: 59000, Updated lr: 0.000051 Training, Epoch: 0005, Batch: 011488, Sample Num: 183808, Cur Loss: 0.00000000, Cur Avg Loss: 0.00873511, Log Avg loss: 0.00000005, Global Avg Loss: 0.02248656, Time: 0.0981 Steps: 59200, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 011688, Sample Num: 187008, Cur Loss: 0.00000000, Cur Avg Loss: 0.00858582, Log Avg loss: 0.00001074, Global Avg Loss: 0.02241088, Time: 0.2520 Steps: 59400, Updated lr: 0.000050 Training, Epoch: 0005, Batch: 011888, Sample Num: 190208, Cur Loss: 0.00000000, Cur Avg Loss: 0.00860542, Log Avg loss: 0.00975063, Global Avg Loss: 0.02236840, Time: 0.2123 Steps: 59600, Updated lr: 0.000050 ***** Running evaluation checkpoint-59640 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-59640 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1868.434772, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.007378, "eval_total_loss": 10.277639, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999932, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.016135, "test_total_loss": 22.475547, "test_acc": 0.999776, "test_prec": 0.994455, "test_recall": 0.996296, "test_f1": 0.995375, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21741, "fp": 3, "fn": 2, "tp": 538}, "test_mcc2": 0.99526, "test_mcc": 0.99526, "test_sn": 0.996296, "test_sp": 0.999862, "lr": 5.008397715821297e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.022353394772192038, "train_cur_epoch_loss": 102.30119198771783, "train_cur_epoch_avg_loss": 0.008576558684416317, "train_cur_epoch_time": 1868.434772491455, "train_cur_epoch_avg_time": 0.15664275423301938, "epoch": 5, "step": 59640} ################################################## Training, Epoch: 0006, Batch: 000160, Sample Num: 2560, Cur Loss: 0.00000000, Cur Avg Loss: 0.06459604, Log Avg loss: 0.05167683, Global Avg Loss: 0.02246642, Time: 0.1132 Steps: 59800, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000360, Sample Num: 5760, Cur Loss: 0.00000000, Cur Avg Loss: 0.02870943, Log Avg loss: 0.00000014, Global Avg Loss: 0.02239153, Time: 0.3694 Steps: 60000, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000560, Sample Num: 8960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01845631, Log Avg loss: 0.00000069, Global Avg Loss: 0.02231714, Time: 0.2007 Steps: 60200, Updated lr: 0.000050 Training, Epoch: 0006, Batch: 000760, Sample Num: 12160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01359939, Log Avg loss: 0.00000002, Global Avg Loss: 0.02224325, Time: 0.3954 Steps: 60400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 000960, Sample Num: 15360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01297357, Log Avg loss: 0.01059545, Global Avg Loss: 0.02220480, Time: 0.0814 Steps: 60600, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001160, Sample Num: 18560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01077320, Log Avg loss: 0.00021146, Global Avg Loss: 0.02213246, Time: 0.1407 Steps: 60800, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001360, Sample Num: 21760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00918892, Log Avg loss: 0.00000005, Global Avg Loss: 0.02205989, Time: 0.3192 Steps: 61000, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001560, Sample Num: 24960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00810294, Log Avg loss: 0.00071831, Global Avg Loss: 0.02199015, Time: 0.0944 Steps: 61200, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001760, Sample Num: 28160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01167788, Log Avg loss: 0.03956235, Global Avg Loss: 0.02204739, Time: 0.0827 Steps: 61400, Updated lr: 0.000049 Training, Epoch: 0006, Batch: 001960, Sample Num: 31360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01048626, Log Avg loss: 0.00000003, Global Avg Loss: 0.02197580, Time: 0.0987 Steps: 61600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002160, Sample Num: 34560, Cur Loss: 0.00000060, Cur Avg Loss: 0.01462810, Log Avg loss: 0.05521816, Global Avg Loss: 0.02208338, Time: 0.1712 Steps: 61800, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002360, Sample Num: 37760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01338844, Log Avg loss: 0.00000008, Global Avg Loss: 0.02201215, Time: 0.1083 Steps: 62000, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002560, Sample Num: 40960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01438061, Log Avg loss: 0.02608828, Global Avg Loss: 0.02202525, Time: 0.1437 Steps: 62200, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002760, Sample Num: 44160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01379844, Log Avg loss: 0.00634661, Global Avg Loss: 0.02197500, Time: 0.0813 Steps: 62400, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 002960, Sample Num: 47360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01290492, Log Avg loss: 0.00057427, Global Avg Loss: 0.02190663, Time: 0.0981 Steps: 62600, Updated lr: 0.000048 Training, Epoch: 0006, Batch: 003160, Sample Num: 50560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01208816, Log Avg loss: 0.00000017, Global Avg Loss: 0.02183686, Time: 0.1653 Steps: 62800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003360, Sample Num: 53760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01141492, Log Avg loss: 0.00077774, Global Avg Loss: 0.02177001, Time: 0.1082 Steps: 63000, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003560, Sample Num: 56960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01343184, Log Avg loss: 0.04731605, Global Avg Loss: 0.02185085, Time: 0.1059 Steps: 63200, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003760, Sample Num: 60160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01275026, Log Avg loss: 0.00061817, Global Avg Loss: 0.02178387, Time: 0.0773 Steps: 63400, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 003960, Sample Num: 63360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01210631, Log Avg loss: 0.00000003, Global Avg Loss: 0.02171537, Time: 0.4037 Steps: 63600, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 004160, Sample Num: 66560, Cur Loss: 0.00000000, Cur Avg Loss: 0.01152463, Log Avg loss: 0.00000734, Global Avg Loss: 0.02164732, Time: 0.1741 Steps: 63800, Updated lr: 0.000047 Training, Epoch: 0006, Batch: 004360, Sample Num: 69760, Cur Loss: 0.00000000, Cur Avg Loss: 0.01102275, Log Avg loss: 0.00058378, Global Avg Loss: 0.02158149, Time: 0.0868 Steps: 64000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 004560, Sample Num: 72960, Cur Loss: 0.00000000, Cur Avg Loss: 0.01053930, Log Avg loss: 0.00000001, Global Avg Loss: 0.02151426, Time: 0.0918 Steps: 64200, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 004760, Sample Num: 76160, Cur Loss: 0.00000000, Cur Avg Loss: 0.01060226, Log Avg loss: 0.01203784, Global Avg Loss: 0.02148483, Time: 0.0666 Steps: 64400, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 004960, Sample Num: 79360, Cur Loss: 0.00000000, Cur Avg Loss: 0.01018064, Log Avg loss: 0.00014602, Global Avg Loss: 0.02141877, Time: 0.1861 Steps: 64600, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 005160, Sample Num: 82560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00978605, Log Avg loss: 0.00000008, Global Avg Loss: 0.02135266, Time: 0.0848 Steps: 64800, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 005360, Sample Num: 85760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00942090, Log Avg loss: 0.00000011, Global Avg Loss: 0.02128696, Time: 0.3584 Steps: 65000, Updated lr: 0.000046 Training, Epoch: 0006, Batch: 005560, Sample Num: 88960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00908202, Log Avg loss: 0.00000003, Global Avg Loss: 0.02122166, Time: 0.1368 Steps: 65200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 005760, Sample Num: 92160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00876667, Log Avg loss: 0.00000006, Global Avg Loss: 0.02115677, Time: 0.1076 Steps: 65400, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 005960, Sample Num: 95360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00847249, Log Avg loss: 0.00000001, Global Avg Loss: 0.02109226, Time: 0.0888 Steps: 65600, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006160, Sample Num: 98560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00820038, Log Avg loss: 0.00009137, Global Avg Loss: 0.02102843, Time: 0.1226 Steps: 65800, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006360, Sample Num: 101760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00794250, Log Avg loss: 0.00000002, Global Avg Loss: 0.02096471, Time: 0.0677 Steps: 66000, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006560, Sample Num: 104960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00780954, Log Avg loss: 0.00358124, Global Avg Loss: 0.02091219, Time: 0.2329 Steps: 66200, Updated lr: 0.000045 Training, Epoch: 0006, Batch: 006760, Sample Num: 108160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00758187, Log Avg loss: 0.00011456, Global Avg Loss: 0.02084955, Time: 0.1211 Steps: 66400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 006960, Sample Num: 111360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00736509, Log Avg loss: 0.00003782, Global Avg Loss: 0.02078705, Time: 0.1438 Steps: 66600, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007160, Sample Num: 114560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00715998, Log Avg loss: 0.00002217, Global Avg Loss: 0.02072488, Time: 0.1116 Steps: 66800, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007360, Sample Num: 117760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00696544, Log Avg loss: 0.00000084, Global Avg Loss: 0.02066302, Time: 0.1256 Steps: 67000, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007560, Sample Num: 120960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00678117, Log Avg loss: 0.00000000, Global Avg Loss: 0.02060152, Time: 0.0869 Steps: 67200, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007760, Sample Num: 124160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00676717, Log Avg loss: 0.00623800, Global Avg Loss: 0.02055890, Time: 0.1543 Steps: 67400, Updated lr: 0.000044 Training, Epoch: 0006, Batch: 007960, Sample Num: 127360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00664574, Log Avg loss: 0.00193438, Global Avg Loss: 0.02050380, Time: 0.0727 Steps: 67600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008160, Sample Num: 130560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00660901, Log Avg loss: 0.00514696, Global Avg Loss: 0.02045850, Time: 0.0666 Steps: 67800, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008360, Sample Num: 133760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00645365, Log Avg loss: 0.00011514, Global Avg Loss: 0.02039866, Time: 0.1291 Steps: 68000, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008560, Sample Num: 136960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00630446, Log Avg loss: 0.00006841, Global Avg Loss: 0.02033904, Time: 0.2093 Steps: 68200, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008760, Sample Num: 140160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00616054, Log Avg loss: 0.00000037, Global Avg Loss: 0.02027957, Time: 0.2688 Steps: 68400, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 008960, Sample Num: 143360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00602302, Log Avg loss: 0.00000006, Global Avg Loss: 0.02022045, Time: 0.1219 Steps: 68600, Updated lr: 0.000043 Training, Epoch: 0006, Batch: 009160, Sample Num: 146560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00589152, Log Avg loss: 0.00000010, Global Avg Loss: 0.02016167, Time: 0.1837 Steps: 68800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009360, Sample Num: 149760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00682678, Log Avg loss: 0.04966189, Global Avg Loss: 0.02024718, Time: 0.0920 Steps: 69000, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009560, Sample Num: 152960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00668401, Log Avg loss: 0.00000240, Global Avg Loss: 0.02018867, Time: 0.1061 Steps: 69200, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009760, Sample Num: 156160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00655737, Log Avg loss: 0.00050354, Global Avg Loss: 0.02013194, Time: 0.1610 Steps: 69400, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 009960, Sample Num: 159360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00853154, Log Avg loss: 0.10487130, Global Avg Loss: 0.02037544, Time: 0.1347 Steps: 69600, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 010160, Sample Num: 162560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00836399, Log Avg loss: 0.00001971, Global Avg Loss: 0.02031711, Time: 0.0905 Steps: 69800, Updated lr: 0.000042 Training, Epoch: 0006, Batch: 010360, Sample Num: 165760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00835672, Log Avg loss: 0.00798757, Global Avg Loss: 0.02028189, Time: 0.1994 Steps: 70000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 010560, Sample Num: 168960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00819873, Log Avg loss: 0.00001504, Global Avg Loss: 0.02022415, Time: 0.1128 Steps: 70200, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 010760, Sample Num: 172160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00805056, Log Avg loss: 0.00022703, Global Avg Loss: 0.02016734, Time: 0.1810 Steps: 70400, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 010960, Sample Num: 175360, Cur Loss: 0.00000000, Cur Avg Loss: 0.00790365, Log Avg loss: 0.00000001, Global Avg Loss: 0.02011021, Time: 0.3912 Steps: 70600, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 011160, Sample Num: 178560, Cur Loss: 0.00000000, Cur Avg Loss: 0.00780591, Log Avg loss: 0.00244965, Global Avg Loss: 0.02006032, Time: 0.1213 Steps: 70800, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 011360, Sample Num: 181760, Cur Loss: 0.00000000, Cur Avg Loss: 0.00766849, Log Avg loss: 0.00000052, Global Avg Loss: 0.02000381, Time: 0.1066 Steps: 71000, Updated lr: 0.000041 Training, Epoch: 0006, Batch: 011560, Sample Num: 184960, Cur Loss: 0.00000000, Cur Avg Loss: 0.00753582, Log Avg loss: 0.00000002, Global Avg Loss: 0.01994762, Time: 0.0689 Steps: 71200, Updated lr: 0.000040 Training, Epoch: 0006, Batch: 011760, Sample Num: 188160, Cur Loss: 0.00000000, Cur Avg Loss: 0.00740770, Log Avg loss: 0.00000253, Global Avg Loss: 0.01989175, Time: 0.1238 Steps: 71400, Updated lr: 0.000040 ***** Running evaluation checkpoint-71568 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-71568 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1871.099491, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.012686, "eval_total_loss": 17.67141, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999925, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.02246, "test_total_loss": 31.286561, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 4.006718172657038e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.01989927976530157, "train_cur_epoch_loss": 90.99519002956734, "train_cur_epoch_avg_loss": 0.007628704730849039, "train_cur_epoch_time": 1871.0994906425476, "train_cur_epoch_avg_time": 0.1568661544804282, "epoch": 6, "step": 71568} ################################################## Training, Epoch: 0007, Batch: 000032, Sample Num: 512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000000, Log Avg loss: 0.01940312, Global Avg Loss: 0.01989039, Time: 0.2196 Steps: 71600, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000232, Sample Num: 3712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01316917, Log Avg loss: 0.01527624, Global Avg Loss: 0.01987753, Time: 0.0624 Steps: 71800, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000432, Sample Num: 6912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00707236, Log Avg loss: 0.00000007, Global Avg Loss: 0.01982232, Time: 0.2286 Steps: 72000, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000632, Sample Num: 10112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00483428, Log Avg loss: 0.00000001, Global Avg Loss: 0.01976741, Time: 0.3423 Steps: 72200, Updated lr: 0.000040 Training, Epoch: 0007, Batch: 000832, Sample Num: 13312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00733843, Log Avg loss: 0.01525157, Global Avg Loss: 0.01975493, Time: 0.1187 Steps: 72400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001032, Sample Num: 16512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00621324, Log Avg loss: 0.00153243, Global Avg Loss: 0.01970473, Time: 0.1755 Steps: 72600, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001232, Sample Num: 19712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00520461, Log Avg loss: 0.00000005, Global Avg Loss: 0.01965060, Time: 0.1840 Steps: 72800, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001432, Sample Num: 22912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00447771, Log Avg loss: 0.00000004, Global Avg Loss: 0.01959676, Time: 0.1227 Steps: 73000, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001632, Sample Num: 26112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00934138, Log Avg loss: 0.04416525, Global Avg Loss: 0.01966389, Time: 0.1965 Steps: 73200, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 001832, Sample Num: 29312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00832158, Log Avg loss: 0.00000000, Global Avg Loss: 0.01961031, Time: 0.1100 Steps: 73400, Updated lr: 0.000039 Training, Epoch: 0007, Batch: 002032, Sample Num: 32512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00750253, Log Avg loss: 0.00000001, Global Avg Loss: 0.01955702, Time: 0.2111 Steps: 73600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002232, Sample Num: 35712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01413621, Log Avg loss: 0.08153440, Global Avg Loss: 0.01972498, Time: 0.0832 Steps: 73800, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002432, Sample Num: 38912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01297369, Log Avg loss: 0.00000001, Global Avg Loss: 0.01967167, Time: 0.1275 Steps: 74000, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002632, Sample Num: 42112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01747169, Log Avg loss: 0.07216737, Global Avg Loss: 0.01981317, Time: 0.0884 Steps: 74200, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 002832, Sample Num: 45312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01644144, Log Avg loss: 0.00288339, Global Avg Loss: 0.01976766, Time: 0.0857 Steps: 74400, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003032, Sample Num: 48512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01535692, Log Avg loss: 0.00000006, Global Avg Loss: 0.01971466, Time: 0.1604 Steps: 74600, Updated lr: 0.000038 Training, Epoch: 0007, Batch: 003232, Sample Num: 51712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01440665, Log Avg loss: 0.00000054, Global Avg Loss: 0.01966195, Time: 0.2005 Steps: 74800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 003432, Sample Num: 54912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01361285, Log Avg loss: 0.00078504, Global Avg Loss: 0.01961161, Time: 0.0710 Steps: 75000, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 003632, Sample Num: 58112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01644755, Log Avg loss: 0.06509111, Global Avg Loss: 0.01973257, Time: 0.1616 Steps: 75200, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 003832, Sample Num: 61312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01559346, Log Avg loss: 0.00008303, Global Avg Loss: 0.01968045, Time: 0.1091 Steps: 75400, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 004032, Sample Num: 64512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01481997, Log Avg loss: 0.00000002, Global Avg Loss: 0.01962838, Time: 0.1621 Steps: 75600, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 004232, Sample Num: 67712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01412578, Log Avg loss: 0.00013077, Global Avg Loss: 0.01957694, Time: 0.1040 Steps: 75800, Updated lr: 0.000037 Training, Epoch: 0007, Batch: 004432, Sample Num: 70912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01348866, Log Avg loss: 0.00000733, Global Avg Loss: 0.01952544, Time: 0.2219 Steps: 76000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 004632, Sample Num: 74112, Cur Loss: 0.00000066, Cur Avg Loss: 0.01290625, Log Avg loss: 0.00000001, Global Avg Loss: 0.01947419, Time: 0.1816 Steps: 76200, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 004832, Sample Num: 77312, Cur Loss: 0.00000036, Cur Avg Loss: 0.01242322, Log Avg loss: 0.00123628, Global Avg Loss: 0.01942645, Time: 0.1677 Steps: 76400, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005032, Sample Num: 80512, Cur Loss: 0.00000000, Cur Avg Loss: 0.01193179, Log Avg loss: 0.00005891, Global Avg Loss: 0.01937588, Time: 0.1189 Steps: 76600, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005232, Sample Num: 83712, Cur Loss: 0.00000000, Cur Avg Loss: 0.01147569, Log Avg loss: 0.00000003, Global Avg Loss: 0.01932542, Time: 0.0775 Steps: 76800, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005432, Sample Num: 86912, Cur Loss: 0.00000000, Cur Avg Loss: 0.01105317, Log Avg loss: 0.00000003, Global Avg Loss: 0.01927523, Time: 0.0875 Steps: 77000, Updated lr: 0.000036 Training, Epoch: 0007, Batch: 005632, Sample Num: 90112, Cur Loss: 0.00000000, Cur Avg Loss: 0.01066065, Log Avg loss: 0.00000001, Global Avg Loss: 0.01922529, Time: 0.1767 Steps: 77200, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 005832, Sample Num: 93312, Cur Loss: 0.00000000, Cur Avg Loss: 0.01029506, Log Avg loss: 0.00000002, Global Avg Loss: 0.01917561, Time: 0.0557 Steps: 77400, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006032, Sample Num: 96512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00995371, Log Avg loss: 0.00000000, Global Avg Loss: 0.01912619, Time: 0.2872 Steps: 77600, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006232, Sample Num: 99712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00963434, Log Avg loss: 0.00000197, Global Avg Loss: 0.01907703, Time: 0.1482 Steps: 77800, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006432, Sample Num: 102912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00942781, Log Avg loss: 0.00299234, Global Avg Loss: 0.01903579, Time: 0.0984 Steps: 78000, Updated lr: 0.000035 Training, Epoch: 0007, Batch: 006632, Sample Num: 106112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00914426, Log Avg loss: 0.00002547, Global Avg Loss: 0.01898717, Time: 0.0837 Steps: 78200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 006832, Sample Num: 109312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00887666, Log Avg loss: 0.00000299, Global Avg Loss: 0.01893874, Time: 0.1118 Steps: 78400, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007032, Sample Num: 112512, Cur Loss: 0.00108785, Cur Avg Loss: 0.00862435, Log Avg loss: 0.00000545, Global Avg Loss: 0.01889056, Time: 0.4041 Steps: 78600, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007232, Sample Num: 115712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00838587, Log Avg loss: 0.00000065, Global Avg Loss: 0.01884262, Time: 0.2528 Steps: 78800, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007432, Sample Num: 118912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00816020, Log Avg loss: 0.00000000, Global Avg Loss: 0.01879491, Time: 0.1568 Steps: 79000, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007632, Sample Num: 122112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00807332, Log Avg loss: 0.00484508, Global Avg Loss: 0.01875969, Time: 0.0790 Steps: 79200, Updated lr: 0.000034 Training, Epoch: 0007, Batch: 007832, Sample Num: 125312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00786716, Log Avg loss: 0.00000001, Global Avg Loss: 0.01871243, Time: 0.1941 Steps: 79400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008032, Sample Num: 128512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00779128, Log Avg loss: 0.00482001, Global Avg Loss: 0.01867753, Time: 0.1003 Steps: 79600, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008232, Sample Num: 131712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00767257, Log Avg loss: 0.00290504, Global Avg Loss: 0.01863800, Time: 0.1486 Steps: 79800, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008432, Sample Num: 134912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00749247, Log Avg loss: 0.00007942, Global Avg Loss: 0.01859160, Time: 0.0874 Steps: 80000, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008632, Sample Num: 138112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00731890, Log Avg loss: 0.00000147, Global Avg Loss: 0.01854524, Time: 0.2122 Steps: 80200, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 008832, Sample Num: 141312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00715317, Log Avg loss: 0.00000005, Global Avg Loss: 0.01849911, Time: 0.1589 Steps: 80400, Updated lr: 0.000033 Training, Epoch: 0007, Batch: 009032, Sample Num: 144512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00699477, Log Avg loss: 0.00000001, Global Avg Loss: 0.01845321, Time: 0.0937 Steps: 80600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009232, Sample Num: 147712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00771749, Log Avg loss: 0.04035519, Global Avg Loss: 0.01850742, Time: 0.1368 Steps: 80800, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009432, Sample Num: 150912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00755385, Log Avg loss: 0.00000036, Global Avg Loss: 0.01846172, Time: 0.1086 Steps: 81000, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009632, Sample Num: 154112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00739703, Log Avg loss: 0.00000122, Global Avg Loss: 0.01841625, Time: 0.0793 Steps: 81200, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 009832, Sample Num: 157312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00928914, Log Avg loss: 0.10041341, Global Avg Loss: 0.01861772, Time: 0.0918 Steps: 81400, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 010032, Sample Num: 160512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00910411, Log Avg loss: 0.00000821, Global Avg Loss: 0.01857211, Time: 0.3611 Steps: 81600, Updated lr: 0.000032 Training, Epoch: 0007, Batch: 010232, Sample Num: 163712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00892630, Log Avg loss: 0.00000695, Global Avg Loss: 0.01852672, Time: 0.1250 Steps: 81800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 010432, Sample Num: 166912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00885511, Log Avg loss: 0.00521318, Global Avg Loss: 0.01849425, Time: 0.0967 Steps: 82000, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 010632, Sample Num: 170112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00869121, Log Avg loss: 0.00014219, Global Avg Loss: 0.01844959, Time: 0.1045 Steps: 82200, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 010832, Sample Num: 173312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00853074, Log Avg loss: 0.00000001, Global Avg Loss: 0.01840481, Time: 0.2016 Steps: 82400, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 011032, Sample Num: 176512, Cur Loss: 0.00000000, Cur Avg Loss: 0.00837608, Log Avg loss: 0.00000001, Global Avg Loss: 0.01836025, Time: 0.2185 Steps: 82600, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 011232, Sample Num: 179712, Cur Loss: 0.00000000, Cur Avg Loss: 0.00826936, Log Avg loss: 0.00238268, Global Avg Loss: 0.01832166, Time: 0.2180 Steps: 82800, Updated lr: 0.000031 Training, Epoch: 0007, Batch: 011432, Sample Num: 182912, Cur Loss: 0.00000000, Cur Avg Loss: 0.00812469, Log Avg loss: 0.00000001, Global Avg Loss: 0.01827751, Time: 0.2201 Steps: 83000, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 011632, Sample Num: 186112, Cur Loss: 0.00000000, Cur Avg Loss: 0.00798502, Log Avg loss: 0.00000132, Global Avg Loss: 0.01823357, Time: 0.2328 Steps: 83200, Updated lr: 0.000030 Training, Epoch: 0007, Batch: 011832, Sample Num: 189312, Cur Loss: 0.00000000, Cur Avg Loss: 0.00839475, Log Avg loss: 0.03222483, Global Avg Loss: 0.01826713, Time: 0.3083 Steps: 83400, Updated lr: 0.000030 ***** Running evaluation checkpoint-83496 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-83496 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1871.150160, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.014971, "eval_total_loss": 20.854287, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999925, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.026068, "test_total_loss": 36.312246, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 3.005038629492778e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.01824612392060429, "train_cur_epoch_loss": 99.32670863167175, "train_cur_epoch_avg_loss": 0.008327188852420501, "train_cur_epoch_time": 1871.1501603126526, "train_cur_epoch_avg_time": 0.15687040244069858, "epoch": 7, "step": 83496} ################################################## Training, Epoch: 0008, Batch: 000104, Sample Num: 1664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00212437, Log Avg loss: 0.00110468, Global Avg Loss: 0.01822607, Time: 0.3041 Steps: 83600, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000304, Sample Num: 4864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00072678, Log Avg loss: 0.00000003, Global Avg Loss: 0.01818257, Time: 0.1659 Steps: 83800, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000504, Sample Num: 8064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00043843, Log Avg loss: 0.00000013, Global Avg Loss: 0.01813928, Time: 0.0961 Steps: 84000, Updated lr: 0.000030 Training, Epoch: 0008, Batch: 000704, Sample Num: 11264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00031387, Log Avg loss: 0.00000000, Global Avg Loss: 0.01809619, Time: 0.1377 Steps: 84200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 000904, Sample Num: 14464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00208915, Log Avg loss: 0.00833812, Global Avg Loss: 0.01807307, Time: 0.1050 Steps: 84400, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001104, Sample Num: 17664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00171125, Log Avg loss: 0.00000315, Global Avg Loss: 0.01803035, Time: 0.1763 Steps: 84600, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001304, Sample Num: 20864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00144879, Log Avg loss: 0.00000001, Global Avg Loss: 0.01798783, Time: 0.2279 Steps: 84800, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001504, Sample Num: 24064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00140020, Log Avg loss: 0.00108338, Global Avg Loss: 0.01794805, Time: 0.2477 Steps: 85000, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001704, Sample Num: 27264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00599776, Log Avg loss: 0.04057142, Global Avg Loss: 0.01800116, Time: 0.2222 Steps: 85200, Updated lr: 0.000029 Training, Epoch: 0008, Batch: 001904, Sample Num: 30464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00536774, Log Avg loss: 0.00000002, Global Avg Loss: 0.01795900, Time: 0.0885 Steps: 85400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002104, Sample Num: 33664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00528210, Log Avg loss: 0.00446675, Global Avg Loss: 0.01792748, Time: 0.2119 Steps: 85600, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002304, Sample Num: 36864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01265919, Log Avg loss: 0.09026622, Global Avg Loss: 0.01809610, Time: 0.1265 Steps: 85800, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002504, Sample Num: 40064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01164808, Log Avg loss: 0.00000000, Global Avg Loss: 0.01805401, Time: 0.1987 Steps: 86000, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002704, Sample Num: 43264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01502798, Log Avg loss: 0.05734440, Global Avg Loss: 0.01814517, Time: 0.1089 Steps: 86200, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 002904, Sample Num: 46464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01399646, Log Avg loss: 0.00005024, Global Avg Loss: 0.01810329, Time: 0.0935 Steps: 86400, Updated lr: 0.000028 Training, Epoch: 0008, Batch: 003104, Sample Num: 49664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01309462, Log Avg loss: 0.00000002, Global Avg Loss: 0.01806148, Time: 0.1100 Steps: 86600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003304, Sample Num: 52864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01233875, Log Avg loss: 0.00060753, Global Avg Loss: 0.01802126, Time: 0.1570 Steps: 86800, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003504, Sample Num: 56064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01163448, Log Avg loss: 0.00000004, Global Avg Loss: 0.01797983, Time: 0.1279 Steps: 87000, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003704, Sample Num: 59264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01517435, Log Avg loss: 0.07719282, Global Avg Loss: 0.01811564, Time: 0.2094 Steps: 87200, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 003904, Sample Num: 62464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01439697, Log Avg loss: 0.00000000, Global Avg Loss: 0.01807419, Time: 0.3721 Steps: 87400, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 004104, Sample Num: 65664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01369537, Log Avg loss: 0.00000000, Global Avg Loss: 0.01803292, Time: 0.2493 Steps: 87600, Updated lr: 0.000027 Training, Epoch: 0008, Batch: 004304, Sample Num: 68864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01306095, Log Avg loss: 0.00004280, Global Avg Loss: 0.01799194, Time: 0.0710 Steps: 87800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 004504, Sample Num: 72064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01248111, Log Avg loss: 0.00000278, Global Avg Loss: 0.01795106, Time: 0.3023 Steps: 88000, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 004704, Sample Num: 75264, Cur Loss: 0.00000000, Cur Avg Loss: 0.01195045, Log Avg loss: 0.00000000, Global Avg Loss: 0.01791035, Time: 0.2110 Steps: 88200, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 004904, Sample Num: 78464, Cur Loss: 0.00000000, Cur Avg Loss: 0.01147600, Log Avg loss: 0.00031698, Global Avg Loss: 0.01787055, Time: 0.1042 Steps: 88400, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 005104, Sample Num: 81664, Cur Loss: 0.00000000, Cur Avg Loss: 0.01102631, Log Avg loss: 0.00000004, Global Avg Loss: 0.01783021, Time: 0.1221 Steps: 88600, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 005304, Sample Num: 84864, Cur Loss: 0.00000000, Cur Avg Loss: 0.01061054, Log Avg loss: 0.00000001, Global Avg Loss: 0.01779005, Time: 0.1070 Steps: 88800, Updated lr: 0.000026 Training, Epoch: 0008, Batch: 005504, Sample Num: 88064, Cur Loss: 0.00000000, Cur Avg Loss: 0.01022498, Log Avg loss: 0.00000000, Global Avg Loss: 0.01775007, Time: 0.1479 Steps: 89000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 005704, Sample Num: 91264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00986646, Log Avg loss: 0.00000001, Global Avg Loss: 0.01771028, Time: 0.4053 Steps: 89200, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 005904, Sample Num: 94464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00953223, Log Avg loss: 0.00000001, Global Avg Loss: 0.01767066, Time: 0.1541 Steps: 89400, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006104, Sample Num: 97664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00921991, Log Avg loss: 0.00000004, Global Avg Loss: 0.01763121, Time: 0.1713 Steps: 89600, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006304, Sample Num: 100864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00892749, Log Avg loss: 0.00000292, Global Avg Loss: 0.01759195, Time: 0.0711 Steps: 89800, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006504, Sample Num: 104064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00874001, Log Avg loss: 0.00283071, Global Avg Loss: 0.01755915, Time: 0.1106 Steps: 90000, Updated lr: 0.000025 Training, Epoch: 0008, Batch: 006704, Sample Num: 107264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00847975, Log Avg loss: 0.00001613, Global Avg Loss: 0.01752025, Time: 0.0987 Steps: 90200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 006904, Sample Num: 110464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00823418, Log Avg loss: 0.00000263, Global Avg Loss: 0.01748150, Time: 0.1989 Steps: 90400, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007104, Sample Num: 113664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00800257, Log Avg loss: 0.00000715, Global Avg Loss: 0.01744292, Time: 0.1334 Steps: 90600, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007304, Sample Num: 116864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00778346, Log Avg loss: 0.00000065, Global Avg Loss: 0.01740450, Time: 0.1225 Steps: 90800, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007504, Sample Num: 120064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00757601, Log Avg loss: 0.00000000, Global Avg Loss: 0.01736625, Time: 0.1573 Steps: 91000, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007704, Sample Num: 123264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00773039, Log Avg loss: 0.01352295, Global Avg Loss: 0.01735782, Time: 0.2813 Steps: 91200, Updated lr: 0.000024 Training, Epoch: 0008, Batch: 007904, Sample Num: 126464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00756261, Log Avg loss: 0.00109941, Global Avg Loss: 0.01732225, Time: 0.1461 Steps: 91400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008104, Sample Num: 129664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00745282, Log Avg loss: 0.00311394, Global Avg Loss: 0.01729122, Time: 0.0646 Steps: 91600, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008304, Sample Num: 132864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00733748, Log Avg loss: 0.00266387, Global Avg Loss: 0.01725935, Time: 0.0907 Steps: 91800, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008504, Sample Num: 136064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00716640, Log Avg loss: 0.00006329, Global Avg Loss: 0.01722197, Time: 0.1714 Steps: 92000, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008704, Sample Num: 139264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00700173, Log Avg loss: 0.00000013, Global Avg Loss: 0.01718461, Time: 0.1925 Steps: 92200, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 008904, Sample Num: 142464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00684446, Log Avg loss: 0.00000001, Global Avg Loss: 0.01714742, Time: 0.1340 Steps: 92400, Updated lr: 0.000023 Training, Epoch: 0008, Batch: 009104, Sample Num: 145664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00669410, Log Avg loss: 0.00000000, Global Avg Loss: 0.01711038, Time: 0.1204 Steps: 92600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009304, Sample Num: 148864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00703039, Log Avg loss: 0.02233842, Global Avg Loss: 0.01712165, Time: 0.1223 Steps: 92800, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009504, Sample Num: 152064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00688246, Log Avg loss: 0.00000084, Global Avg Loss: 0.01708483, Time: 0.2563 Steps: 93000, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009704, Sample Num: 155264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00674062, Log Avg loss: 0.00000000, Global Avg Loss: 0.01704817, Time: 0.2884 Steps: 93200, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 009904, Sample Num: 158464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00838290, Log Avg loss: 0.08806660, Global Avg Loss: 0.01720024, Time: 0.1444 Steps: 93400, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 010104, Sample Num: 161664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00821706, Log Avg loss: 0.00000455, Global Avg Loss: 0.01716350, Time: 0.0578 Steps: 93600, Updated lr: 0.000022 Training, Epoch: 0008, Batch: 010304, Sample Num: 164864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00812263, Log Avg loss: 0.00335222, Global Avg Loss: 0.01713405, Time: 0.2079 Steps: 93800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 010504, Sample Num: 168064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00797584, Log Avg loss: 0.00041315, Global Avg Loss: 0.01709847, Time: 0.2121 Steps: 94000, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 010704, Sample Num: 171264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00782711, Log Avg loss: 0.00001591, Global Avg Loss: 0.01706221, Time: 0.0860 Steps: 94200, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 010904, Sample Num: 174464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00768355, Log Avg loss: 0.00000001, Global Avg Loss: 0.01702606, Time: 0.1131 Steps: 94400, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 011104, Sample Num: 177664, Cur Loss: 0.00000000, Cur Avg Loss: 0.00754516, Log Avg loss: 0.00000000, Global Avg Loss: 0.01699006, Time: 0.1576 Steps: 94600, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 011304, Sample Num: 180864, Cur Loss: 0.00000000, Cur Avg Loss: 0.00745090, Log Avg loss: 0.00221783, Global Avg Loss: 0.01695890, Time: 0.0959 Steps: 94800, Updated lr: 0.000021 Training, Epoch: 0008, Batch: 011504, Sample Num: 184064, Cur Loss: 0.00000000, Cur Avg Loss: 0.00732137, Log Avg loss: 0.00000010, Global Avg Loss: 0.01692319, Time: 0.1988 Steps: 95000, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 011704, Sample Num: 187264, Cur Loss: 0.00000000, Cur Avg Loss: 0.00719627, Log Avg loss: 0.00000086, Global Avg Loss: 0.01688764, Time: 0.0761 Steps: 95200, Updated lr: 0.000020 Training, Epoch: 0008, Batch: 011904, Sample Num: 190464, Cur Loss: 0.00000000, Cur Avg Loss: 0.00713690, Log Avg loss: 0.00366222, Global Avg Loss: 0.01685992, Time: 0.0974 Steps: 95400, Updated lr: 0.000020 ***** Running evaluation checkpoint-95424 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-95424 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1868.680234, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.013417, "eval_total_loss": 18.6898, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999922, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.021472, "test_total_loss": 29.91103, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 2.003359086328519e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.01685567568351686, "train_cur_epoch_loss": 84.9576335491364, "train_cur_epoch_avg_loss": 0.007122538023904796, "train_cur_epoch_time": 1868.6802344322205, "train_cur_epoch_avg_time": 0.156663332866551, "epoch": 8, "step": 95424} ################################################## Training, Epoch: 0009, Batch: 000176, Sample Num: 2816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00010104, Log Avg loss: 0.00008892, Global Avg Loss: 0.01682483, Time: 0.1464 Steps: 95600, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000376, Sample Num: 6016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00004732, Log Avg loss: 0.00000005, Global Avg Loss: 0.01678971, Time: 0.0930 Steps: 95800, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000576, Sample Num: 9216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00003089, Log Avg loss: 0.00000001, Global Avg Loss: 0.01675473, Time: 0.3529 Steps: 96000, Updated lr: 0.000020 Training, Epoch: 0009, Batch: 000776, Sample Num: 12416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00035815, Log Avg loss: 0.00130065, Global Avg Loss: 0.01672260, Time: 0.2833 Steps: 96200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 000976, Sample Num: 15616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00066857, Log Avg loss: 0.00187300, Global Avg Loss: 0.01669179, Time: 0.0974 Steps: 96400, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001176, Sample Num: 18816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00055552, Log Avg loss: 0.00000385, Global Avg Loss: 0.01665724, Time: 0.1356 Steps: 96600, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001376, Sample Num: 22016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00047479, Log Avg loss: 0.00000006, Global Avg Loss: 0.01662282, Time: 0.1074 Steps: 96800, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001576, Sample Num: 25216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00252906, Log Avg loss: 0.01666249, Global Avg Loss: 0.01662291, Time: 0.1370 Steps: 97000, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001776, Sample Num: 28416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00224426, Log Avg loss: 0.00000004, Global Avg Loss: 0.01658870, Time: 0.4036 Steps: 97200, Updated lr: 0.000019 Training, Epoch: 0009, Batch: 001976, Sample Num: 31616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00201711, Log Avg loss: 0.00000002, Global Avg Loss: 0.01655464, Time: 0.1415 Steps: 97400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002176, Sample Num: 34816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00863569, Log Avg loss: 0.07402722, Global Avg Loss: 0.01667241, Time: 0.1187 Steps: 97600, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002376, Sample Num: 38016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00790878, Log Avg loss: 0.00000000, Global Avg Loss: 0.01663832, Time: 0.1669 Steps: 97800, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002576, Sample Num: 41216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01068352, Log Avg loss: 0.04364747, Global Avg Loss: 0.01669344, Time: 0.0964 Steps: 98000, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002776, Sample Num: 44416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00991579, Log Avg loss: 0.00002739, Global Avg Loss: 0.01665949, Time: 0.1289 Steps: 98200, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 002976, Sample Num: 47616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00924979, Log Avg loss: 0.00000578, Global Avg Loss: 0.01662564, Time: 0.1065 Steps: 98400, Updated lr: 0.000018 Training, Epoch: 0009, Batch: 003176, Sample Num: 50816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00866750, Log Avg loss: 0.00000304, Global Avg Loss: 0.01659193, Time: 0.1103 Steps: 98600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003376, Sample Num: 54016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00819609, Log Avg loss: 0.00071009, Global Avg Loss: 0.01655978, Time: 0.0890 Steps: 98800, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003576, Sample Num: 57216, Cur Loss: 0.00000000, Cur Avg Loss: 0.01155184, Log Avg loss: 0.06819679, Global Avg Loss: 0.01666409, Time: 0.1746 Steps: 99000, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003776, Sample Num: 60416, Cur Loss: 0.00000000, Cur Avg Loss: 0.01094423, Log Avg loss: 0.00008023, Global Avg Loss: 0.01663066, Time: 0.3032 Steps: 99200, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 003976, Sample Num: 63616, Cur Loss: 0.00000000, Cur Avg Loss: 0.01039372, Log Avg loss: 0.00000000, Global Avg Loss: 0.01659720, Time: 0.3399 Steps: 99400, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 004176, Sample Num: 66816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00989597, Log Avg loss: 0.00000077, Global Avg Loss: 0.01656387, Time: 0.2062 Steps: 99600, Updated lr: 0.000017 Training, Epoch: 0009, Batch: 004376, Sample Num: 70016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00944400, Log Avg loss: 0.00000687, Global Avg Loss: 0.01653069, Time: 0.0876 Steps: 99800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 004576, Sample Num: 73216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00903124, Log Avg loss: 0.00000000, Global Avg Loss: 0.01649763, Time: 0.1818 Steps: 100000, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 004776, Sample Num: 76416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00865387, Log Avg loss: 0.00001965, Global Avg Loss: 0.01646474, Time: 0.0866 Steps: 100200, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 004976, Sample Num: 79616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00830847, Log Avg loss: 0.00006042, Global Avg Loss: 0.01643206, Time: 0.1041 Steps: 100400, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 005176, Sample Num: 82816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00798743, Log Avg loss: 0.00000001, Global Avg Loss: 0.01639939, Time: 0.2203 Steps: 100600, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 005376, Sample Num: 86016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00769028, Log Avg loss: 0.00000001, Global Avg Loss: 0.01636685, Time: 0.1550 Steps: 100800, Updated lr: 0.000016 Training, Epoch: 0009, Batch: 005576, Sample Num: 89216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00741445, Log Avg loss: 0.00000000, Global Avg Loss: 0.01633445, Time: 0.2236 Steps: 101000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 005776, Sample Num: 92416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00715771, Log Avg loss: 0.00000000, Global Avg Loss: 0.01630216, Time: 0.2131 Steps: 101200, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 005976, Sample Num: 95616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00691817, Log Avg loss: 0.00000000, Global Avg Loss: 0.01627001, Time: 0.0755 Steps: 101400, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006176, Sample Num: 98816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00669416, Log Avg loss: 0.00000089, Global Avg Loss: 0.01623798, Time: 0.1505 Steps: 101600, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006376, Sample Num: 102016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00656991, Log Avg loss: 0.00273319, Global Avg Loss: 0.01621145, Time: 0.1021 Steps: 101800, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006576, Sample Num: 105216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00637029, Log Avg loss: 0.00000630, Global Avg Loss: 0.01617968, Time: 0.0979 Steps: 102000, Updated lr: 0.000015 Training, Epoch: 0009, Batch: 006776, Sample Num: 108416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00618229, Log Avg loss: 0.00000073, Global Avg Loss: 0.01614802, Time: 0.0876 Steps: 102200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 006976, Sample Num: 111616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00600504, Log Avg loss: 0.00000001, Global Avg Loss: 0.01611648, Time: 0.1473 Steps: 102400, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007176, Sample Num: 114816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00583818, Log Avg loss: 0.00001808, Global Avg Loss: 0.01608510, Time: 0.1591 Steps: 102600, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007376, Sample Num: 118016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00567989, Log Avg loss: 0.00000045, Global Avg Loss: 0.01605380, Time: 0.1326 Steps: 102800, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007576, Sample Num: 121216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00623683, Log Avg loss: 0.02677682, Global Avg Loss: 0.01607462, Time: 0.1174 Steps: 103000, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007776, Sample Num: 124416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00607642, Log Avg loss: 0.00000000, Global Avg Loss: 0.01604347, Time: 0.0911 Steps: 103200, Updated lr: 0.000014 Training, Epoch: 0009, Batch: 007976, Sample Num: 127616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00593073, Log Avg loss: 0.00026625, Global Avg Loss: 0.01601295, Time: 0.1518 Steps: 103400, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008176, Sample Num: 130816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00585950, Log Avg loss: 0.00301892, Global Avg Loss: 0.01598787, Time: 0.0745 Steps: 103600, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008376, Sample Num: 134016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00577863, Log Avg loss: 0.00247254, Global Avg Loss: 0.01596183, Time: 0.4045 Steps: 103800, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008576, Sample Num: 137216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00564387, Log Avg loss: 0.00000028, Global Avg Loss: 0.01593113, Time: 0.1690 Steps: 104000, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008776, Sample Num: 140416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00551525, Log Avg loss: 0.00000007, Global Avg Loss: 0.01590056, Time: 0.1254 Steps: 104200, Updated lr: 0.000013 Training, Epoch: 0009, Batch: 008976, Sample Num: 143616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00539236, Log Avg loss: 0.00000000, Global Avg Loss: 0.01587009, Time: 0.3476 Steps: 104400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009176, Sample Num: 146816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00527484, Log Avg loss: 0.00000029, Global Avg Loss: 0.01583975, Time: 0.1288 Steps: 104600, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009376, Sample Num: 150016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00521367, Log Avg loss: 0.00240738, Global Avg Loss: 0.01581412, Time: 0.1330 Steps: 104800, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009576, Sample Num: 153216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00510507, Log Avg loss: 0.00001393, Global Avg Loss: 0.01578402, Time: 0.3545 Steps: 105000, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009776, Sample Num: 156416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00586612, Log Avg loss: 0.04230523, Global Avg Loss: 0.01583444, Time: 0.1497 Steps: 105200, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 009976, Sample Num: 159616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00638752, Log Avg loss: 0.03187332, Global Avg Loss: 0.01586488, Time: 0.1641 Steps: 105400, Updated lr: 0.000012 Training, Epoch: 0009, Batch: 010176, Sample Num: 162816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00626231, Log Avg loss: 0.00001705, Global Avg Loss: 0.01583486, Time: 0.0817 Steps: 105600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010376, Sample Num: 166016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00623297, Log Avg loss: 0.00474015, Global Avg Loss: 0.01581389, Time: 0.3394 Steps: 105800, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010576, Sample Num: 169216, Cur Loss: 0.00028744, Cur Avg Loss: 0.00616702, Log Avg loss: 0.00274530, Global Avg Loss: 0.01578923, Time: 0.4048 Steps: 106000, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010776, Sample Num: 172416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00605256, Log Avg loss: 0.00000000, Global Avg Loss: 0.01575950, Time: 0.0864 Steps: 106200, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 010976, Sample Num: 175616, Cur Loss: 0.00000000, Cur Avg Loss: 0.00594227, Log Avg loss: 0.00000000, Global Avg Loss: 0.01572987, Time: 0.1316 Steps: 106400, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 011176, Sample Num: 178816, Cur Loss: 0.00000000, Cur Avg Loss: 0.00586379, Log Avg loss: 0.00155646, Global Avg Loss: 0.01570328, Time: 0.1318 Steps: 106600, Updated lr: 0.000011 Training, Epoch: 0009, Batch: 011376, Sample Num: 182016, Cur Loss: 0.00000000, Cur Avg Loss: 0.00576070, Log Avg loss: 0.00000000, Global Avg Loss: 0.01567387, Time: 0.1000 Steps: 106800, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 011576, Sample Num: 185216, Cur Loss: 0.00000000, Cur Avg Loss: 0.00566117, Log Avg loss: 0.00000005, Global Avg Loss: 0.01564458, Time: 0.1483 Steps: 107000, Updated lr: 0.000010 Training, Epoch: 0009, Batch: 011776, Sample Num: 188416, Cur Loss: 0.00000000, Cur Avg Loss: 0.00556503, Log Avg loss: 0.00000040, Global Avg Loss: 0.01561539, Time: 0.2591 Steps: 107200, Updated lr: 0.000010 ***** Running evaluation checkpoint-107352 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-107352 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1870.376769, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.014729, "eval_total_loss": 20.51712, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999922, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.021159, "test_total_loss": 29.474785, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 1.0016795431642594e-05, "cur_epoch_step": 11928, "train_global_avg_loss": 0.015601691520479643, "train_cur_epoch_loss": 66.4367916826171, "train_cur_epoch_avg_loss": 0.005569818216181849, "train_cur_epoch_time": 1870.376769065857, "train_cur_epoch_avg_time": 0.15680556414033006, "epoch": 9, "step": 107352} ################################################## Training, Epoch: 0010, Batch: 000048, Sample Num: 768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000001, Log Avg loss: 0.00451508, Global Avg Loss: 0.01559472, Time: 0.1414 Steps: 107400, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000248, Sample Num: 3968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000045, Log Avg loss: 0.00000055, Global Avg Loss: 0.01556573, Time: 0.1204 Steps: 107600, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000448, Sample Num: 7168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000027, Log Avg loss: 0.00000006, Global Avg Loss: 0.01553685, Time: 0.1226 Steps: 107800, Updated lr: 0.000010 Training, Epoch: 0010, Batch: 000648, Sample Num: 10368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00000020, Log Avg loss: 0.00000002, Global Avg Loss: 0.01550808, Time: 0.0764 Steps: 108000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 000848, Sample Num: 13568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00006753, Log Avg loss: 0.00028569, Global Avg Loss: 0.01547995, Time: 0.0961 Steps: 108200, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001048, Sample Num: 16768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00029829, Log Avg loss: 0.00127672, Global Avg Loss: 0.01545374, Time: 0.0955 Steps: 108400, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001248, Sample Num: 19968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00025049, Log Avg loss: 0.00000000, Global Avg Loss: 0.01542528, Time: 0.2177 Steps: 108600, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001448, Sample Num: 23168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00039773, Log Avg loss: 0.00131654, Global Avg Loss: 0.01539934, Time: 0.1164 Steps: 108800, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001648, Sample Num: 26368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00371803, Log Avg loss: 0.02775702, Global Avg Loss: 0.01542202, Time: 0.0807 Steps: 109000, Updated lr: 0.000009 Training, Epoch: 0010, Batch: 001848, Sample Num: 29568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00331565, Log Avg loss: 0.00000000, Global Avg Loss: 0.01539377, Time: 0.1954 Steps: 109200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002048, Sample Num: 32768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00335970, Log Avg loss: 0.00376672, Global Avg Loss: 0.01537252, Time: 0.1106 Steps: 109400, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002248, Sample Num: 35968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00911370, Log Avg loss: 0.06803462, Global Avg Loss: 0.01546862, Time: 0.1135 Steps: 109600, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002448, Sample Num: 39168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00836911, Log Avg loss: 0.00000000, Global Avg Loss: 0.01544044, Time: 0.0669 Steps: 109800, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002648, Sample Num: 42368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01244923, Log Avg loss: 0.06238989, Global Avg Loss: 0.01552580, Time: 0.0789 Steps: 110000, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 002848, Sample Num: 45568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01157572, Log Avg loss: 0.00001039, Global Avg Loss: 0.01549764, Time: 0.0804 Steps: 110200, Updated lr: 0.000008 Training, Epoch: 0010, Batch: 003048, Sample Num: 48768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01081635, Log Avg loss: 0.00000292, Global Avg Loss: 0.01546957, Time: 0.0882 Steps: 110400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003248, Sample Num: 51968, Cur Loss: 0.00000000, Cur Avg Loss: 0.01015033, Log Avg loss: 0.00000015, Global Avg Loss: 0.01544160, Time: 0.4050 Steps: 110600, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003448, Sample Num: 55168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00957986, Log Avg loss: 0.00031548, Global Avg Loss: 0.01541430, Time: 0.1116 Steps: 110800, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003648, Sample Num: 58368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01325580, Log Avg loss: 0.07662897, Global Avg Loss: 0.01552459, Time: 0.1223 Steps: 111000, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 003848, Sample Num: 61568, Cur Loss: 0.00000000, Cur Avg Loss: 0.01256829, Log Avg loss: 0.00002815, Global Avg Loss: 0.01549672, Time: 0.1052 Steps: 111200, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 004048, Sample Num: 64768, Cur Loss: 0.00000000, Cur Avg Loss: 0.01194733, Log Avg loss: 0.00000000, Global Avg Loss: 0.01546890, Time: 0.1124 Steps: 111400, Updated lr: 0.000007 Training, Epoch: 0010, Batch: 004248, Sample Num: 67968, Cur Loss: 0.00000000, Cur Avg Loss: 0.01138504, Log Avg loss: 0.00000428, Global Avg Loss: 0.01544119, Time: 0.1112 Steps: 111600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 004448, Sample Num: 71168, Cur Loss: 0.00000000, Cur Avg Loss: 0.01087322, Log Avg loss: 0.00000231, Global Avg Loss: 0.01541357, Time: 0.2032 Steps: 111800, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 004648, Sample Num: 74368, Cur Loss: 0.00000000, Cur Avg Loss: 0.01040536, Log Avg loss: 0.00000000, Global Avg Loss: 0.01538604, Time: 0.1231 Steps: 112000, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 004848, Sample Num: 77568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00997630, Log Avg loss: 0.00000496, Global Avg Loss: 0.01535863, Time: 0.2178 Steps: 112200, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 005048, Sample Num: 80768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00958180, Log Avg loss: 0.00001917, Global Avg Loss: 0.01533133, Time: 0.2054 Steps: 112400, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 005248, Sample Num: 83968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00921664, Log Avg loss: 0.00000000, Global Avg Loss: 0.01530410, Time: 0.1172 Steps: 112600, Updated lr: 0.000006 Training, Epoch: 0010, Batch: 005448, Sample Num: 87168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00887829, Log Avg loss: 0.00000000, Global Avg Loss: 0.01527697, Time: 0.4036 Steps: 112800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 005648, Sample Num: 90368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00856390, Log Avg loss: 0.00000000, Global Avg Loss: 0.01524993, Time: 0.1172 Steps: 113000, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 005848, Sample Num: 93568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00827102, Log Avg loss: 0.00000000, Global Avg Loss: 0.01522298, Time: 0.0870 Steps: 113200, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006048, Sample Num: 96768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00799751, Log Avg loss: 0.00000000, Global Avg Loss: 0.01519614, Time: 0.0661 Steps: 113400, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006248, Sample Num: 99968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00774154, Log Avg loss: 0.00000116, Global Avg Loss: 0.01516938, Time: 0.1391 Steps: 113600, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006448, Sample Num: 103168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00757161, Log Avg loss: 0.00226281, Global Avg Loss: 0.01514670, Time: 0.1256 Steps: 113800, Updated lr: 0.000005 Training, Epoch: 0010, Batch: 006648, Sample Num: 106368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00734392, Log Avg loss: 0.00000322, Global Avg Loss: 0.01512013, Time: 0.2202 Steps: 114000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 006848, Sample Num: 109568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00712948, Log Avg loss: 0.00000149, Global Avg Loss: 0.01509366, Time: 0.3073 Steps: 114200, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007048, Sample Num: 112768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00692745, Log Avg loss: 0.00001002, Global Avg Loss: 0.01506729, Time: 0.1374 Steps: 114400, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007248, Sample Num: 115968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00673639, Log Avg loss: 0.00000347, Global Avg Loss: 0.01504100, Time: 0.0885 Steps: 114600, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007448, Sample Num: 119168, Cur Loss: 0.00000006, Cur Avg Loss: 0.00655550, Log Avg loss: 0.00000000, Global Avg Loss: 0.01501479, Time: 0.1068 Steps: 114800, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007648, Sample Num: 122368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00696076, Log Avg loss: 0.02205269, Global Avg Loss: 0.01502703, Time: 0.2293 Steps: 115000, Updated lr: 0.000004 Training, Epoch: 0010, Batch: 007848, Sample Num: 125568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00678337, Log Avg loss: 0.00000000, Global Avg Loss: 0.01500094, Time: 0.2544 Steps: 115200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008048, Sample Num: 128768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00668353, Log Avg loss: 0.00276578, Global Avg Loss: 0.01497974, Time: 0.1375 Steps: 115400, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008248, Sample Num: 131968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00658367, Log Avg loss: 0.00256513, Global Avg Loss: 0.01495826, Time: 0.1743 Steps: 115600, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008448, Sample Num: 135168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00643975, Log Avg loss: 0.00050465, Global Avg Loss: 0.01493330, Time: 0.4227 Steps: 115800, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008648, Sample Num: 138368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00629082, Log Avg loss: 0.00000009, Global Avg Loss: 0.01490755, Time: 0.0873 Steps: 116000, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 008848, Sample Num: 141568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00614863, Log Avg loss: 0.00000037, Global Avg Loss: 0.01488189, Time: 0.1197 Steps: 116200, Updated lr: 0.000003 Training, Epoch: 0010, Batch: 009048, Sample Num: 144768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00601272, Log Avg loss: 0.00000000, Global Avg Loss: 0.01485632, Time: 0.1006 Steps: 116400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009248, Sample Num: 147968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00588458, Log Avg loss: 0.00008756, Global Avg Loss: 0.01483099, Time: 0.1041 Steps: 116600, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009448, Sample Num: 151168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00576002, Log Avg loss: 0.00000041, Global Avg Loss: 0.01480559, Time: 0.0917 Steps: 116800, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009648, Sample Num: 154368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00564097, Log Avg loss: 0.00001680, Global Avg Loss: 0.01478031, Time: 0.1255 Steps: 117000, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 009848, Sample Num: 157568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00601177, Log Avg loss: 0.02389910, Global Avg Loss: 0.01479588, Time: 0.1287 Steps: 117200, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 010048, Sample Num: 160768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00589226, Log Avg loss: 0.00000761, Global Avg Loss: 0.01477068, Time: 0.2992 Steps: 117400, Updated lr: 0.000002 Training, Epoch: 0010, Batch: 010248, Sample Num: 163968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00577733, Log Avg loss: 0.00000353, Global Avg Loss: 0.01474557, Time: 0.0837 Steps: 117600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 010448, Sample Num: 167168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00598500, Log Avg loss: 0.01662592, Global Avg Loss: 0.01474876, Time: 0.2843 Steps: 117800, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 010648, Sample Num: 170368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00587259, Log Avg loss: 0.00000014, Global Avg Loss: 0.01472376, Time: 0.0750 Steps: 118000, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 010848, Sample Num: 173568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00576432, Log Avg loss: 0.00000002, Global Avg Loss: 0.01469885, Time: 0.0879 Steps: 118200, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 011048, Sample Num: 176768, Cur Loss: 0.00000000, Cur Avg Loss: 0.00565997, Log Avg loss: 0.00000000, Global Avg Loss: 0.01467402, Time: 0.0816 Steps: 118400, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 011248, Sample Num: 179968, Cur Loss: 0.00000000, Cur Avg Loss: 0.00556838, Log Avg loss: 0.00050918, Global Avg Loss: 0.01465013, Time: 0.2228 Steps: 118600, Updated lr: 0.000001 Training, Epoch: 0010, Batch: 011448, Sample Num: 183168, Cur Loss: 0.00000000, Cur Avg Loss: 0.00547110, Log Avg loss: 0.00000002, Global Avg Loss: 0.01462547, Time: 0.1810 Steps: 118800, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 011648, Sample Num: 186368, Cur Loss: 0.00000000, Cur Avg Loss: 0.00537716, Log Avg loss: 0.00000007, Global Avg Loss: 0.01460089, Time: 0.0956 Steps: 119000, Updated lr: 0.000000 Training, Epoch: 0010, Batch: 011848, Sample Num: 189568, Cur Loss: 0.00000000, Cur Avg Loss: 0.00531463, Log Avg loss: 0.00167290, Global Avg Loss: 0.01457920, Time: 0.2250 Steps: 119200, Updated lr: 0.000000 ***** Running evaluation checkpoint-119280 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## ***** Running testing checkpoint-119280 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## Epoch Time: 1870.749635, Avg time per batch (s): 0.160000 {"eval_avg_loss": 0.017034, "eval_total_loss": 23.728503, "eval_acc": 0.999776, "eval_prec": 0.992634, "eval_recall": 0.998148, "eval_f1": 0.995383, "eval_roc_auc": 0.999998, "eval_pr_auc": 0.999922, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 1, "tp": 539}, "eval_mcc2": 0.995272, "eval_mcc": 0.995272, "eval_sn": 0.998148, "eval_sp": 0.999816, "update_flag": false, "test_avg_loss": 0.023266, "test_total_loss": 32.409101, "test_acc": 0.99982, "test_prec": 0.996296, "test_recall": 0.996296, "test_f1": 0.996296, "test_roc_auc": 0.999999, "test_pr_auc": 0.999976, "test_confusion_matrix": {"tn": 21742, "fp": 2, "fn": 2, "tp": 538}, "test_mcc2": 0.996204, "test_mcc": 0.996204, "test_sn": 0.996296, "test_sp": 0.999908, "lr": 0.0, "cur_epoch_step": 11928, "train_global_avg_loss": 0.014569421074165754, "train_cur_epoch_loss": 62.96775761995997, "train_cur_epoch_avg_loss": 0.005278987057340708, "train_cur_epoch_time": 1870.7496347427368, "train_cur_epoch_avg_time": 0.15683682383825762, "epoch": 10, "step": 119280} ################################################## #########################Best Metric######################### {"epoch": 1, "global_step": 11928, "eval_avg_loss": 0.000981, "eval_total_loss": 1.366584, "eval_acc": 0.99982, "eval_prec": 0.992647, "eval_recall": 1.0, "eval_f1": 0.99631, "eval_roc_auc": 0.999999, "eval_pr_auc": 0.999976, "eval_confusion_matrix": {"tn": 21740, "fp": 4, "fn": 0, "tp": 540}, "eval_mcc2": 0.996225, "eval_mcc": 0.996225, "eval_sn": 1.0, "eval_sp": 0.999816, "update_flag": true, "test_avg_loss": 0.000856, "test_total_loss": 1.192489, "test_acc": 0.999865, "test_prec": 0.994475, "test_recall": 1.0, "test_f1": 0.99723, "test_roc_auc": 0.999999, "test_pr_auc": 0.99998, "test_confusion_matrix": {"tn": 21741, "fp": 3, "fn": 0, "tp": 540}, "test_mcc2": 0.997165, "test_mcc": 0.997165, "test_sn": 1.0, "test_sp": 0.999862} ################################################## Total Time: 160557.612562, Avg time per epoch(10 epochs): 16055.760000 ++++++++++++Validation+++++++++++++ best f1 global step: 11928 checkpoint path: ../models/RdRP/protein/binary_class/luca_base/matrix/20250402062649/checkpoint-11928 ***** Running evaluation checkpoint-11928 ***** Dev Dataset Instantaneous batch size per GPU = 16 Dev Dataset Num examples = 22284 ################################################## {"evaluation_avg_loss_11928": 0.000981, "evaluation_total_loss_11928": 1.366584, "evaluation_acc_11928": 0.99982, "evaluation_prec_11928": 0.992647, "evaluation_recall_11928": 1.0, "evaluation_f1_11928": 0.99631, "evaluation_roc_auc_11928": 0.999999, "evaluation_pr_auc_11928": 0.999976, "evaluation_confusion_matrix_11928": {"tn": 21740, "fp": 4, "fn": 0, "tp": 540}, "evaluation_mcc2_11928": 0.996225, "evaluation_mcc_11928": 0.996225, "evaluation_sn_11928": 1.0, "evaluation_sp_11928": 0.999816} ++++++++++++Testing+++++++++++++ best f1 global step: 11928 checkpoint path: ../models/RdRP/protein/binary_class/luca_base/matrix/20250402062649/checkpoint-11928 ***** Running testing checkpoint-11928 ***** Test Dataset Instantaneous batch size per GPU = 16 Test Dataset Num examples = [22284] ################################################## {"evaluation_avg_loss_11928": 0.000856, "evaluation_total_loss_11928": 1.192489, "evaluation_acc_11928": 0.999865, "evaluation_prec_11928": 0.994475, "evaluation_recall_11928": 1.0, "evaluation_f1_11928": 0.99723, "evaluation_roc_auc_11928": 0.999999, "evaluation_pr_auc_11928": 0.99998, "evaluation_confusion_matrix_11928": {"tn": 21741, "fp": 3, "fn": 0, "tp": 540}, "evaluation_mcc2_11928": 0.997165, "evaluation_mcc_11928": 0.997165, "evaluation_sn_11928": 1.0, "evaluation_sp_11928": 0.999862}