InternVL-reasoning-4b / trainer_state.json
dyang39's picture
Upload trainer_state.json with huggingface_hub
8db4663 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.986758952753536,
"eval_steps": 500,
"global_step": 828,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.6000000000000001e-06,
"loss": 2.2445,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 3.2000000000000003e-06,
"loss": 2.2531,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 4.800000000000001e-06,
"loss": 2.1523,
"step": 3
},
{
"epoch": 0.02,
"learning_rate": 6.4000000000000006e-06,
"loss": 1.8669,
"step": 4
},
{
"epoch": 0.02,
"learning_rate": 8.000000000000001e-06,
"loss": 1.5463,
"step": 5
},
{
"epoch": 0.03,
"learning_rate": 9.600000000000001e-06,
"loss": 1.2519,
"step": 6
},
{
"epoch": 0.03,
"learning_rate": 1.1200000000000001e-05,
"loss": 1.1507,
"step": 7
},
{
"epoch": 0.04,
"learning_rate": 1.2800000000000001e-05,
"loss": 1.0274,
"step": 8
},
{
"epoch": 0.04,
"learning_rate": 1.4400000000000001e-05,
"loss": 0.9904,
"step": 9
},
{
"epoch": 0.05,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.9728,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 1.76e-05,
"loss": 0.9535,
"step": 11
},
{
"epoch": 0.06,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.94,
"step": 12
},
{
"epoch": 0.06,
"learning_rate": 2.08e-05,
"loss": 0.9797,
"step": 13
},
{
"epoch": 0.07,
"learning_rate": 2.2400000000000002e-05,
"loss": 0.9679,
"step": 14
},
{
"epoch": 0.07,
"learning_rate": 2.4e-05,
"loss": 0.9039,
"step": 15
},
{
"epoch": 0.08,
"learning_rate": 2.5600000000000002e-05,
"loss": 0.8918,
"step": 16
},
{
"epoch": 0.08,
"learning_rate": 2.7200000000000004e-05,
"loss": 0.8514,
"step": 17
},
{
"epoch": 0.09,
"learning_rate": 2.8800000000000002e-05,
"loss": 0.7711,
"step": 18
},
{
"epoch": 0.09,
"learning_rate": 3.0400000000000004e-05,
"loss": 0.8096,
"step": 19
},
{
"epoch": 0.1,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.8154,
"step": 20
},
{
"epoch": 0.1,
"learning_rate": 3.3600000000000004e-05,
"loss": 0.8412,
"step": 21
},
{
"epoch": 0.11,
"learning_rate": 3.52e-05,
"loss": 0.8701,
"step": 22
},
{
"epoch": 0.11,
"learning_rate": 3.680000000000001e-05,
"loss": 0.8773,
"step": 23
},
{
"epoch": 0.12,
"learning_rate": 3.8400000000000005e-05,
"loss": 0.8483,
"step": 24
},
{
"epoch": 0.12,
"learning_rate": 4e-05,
"loss": 0.8157,
"step": 25
},
{
"epoch": 0.13,
"learning_rate": 3.999984693774727e-05,
"loss": 0.8581,
"step": 26
},
{
"epoch": 0.13,
"learning_rate": 3.999938775333188e-05,
"loss": 0.8138,
"step": 27
},
{
"epoch": 0.13,
"learning_rate": 3.999862245378221e-05,
"loss": 0.8278,
"step": 28
},
{
"epoch": 0.14,
"learning_rate": 3.99975510508121e-05,
"loss": 0.8481,
"step": 29
},
{
"epoch": 0.14,
"learning_rate": 3.9996173560820705e-05,
"loss": 0.8524,
"step": 30
},
{
"epoch": 0.15,
"learning_rate": 3.999449000489218e-05,
"loss": 0.8708,
"step": 31
},
{
"epoch": 0.15,
"learning_rate": 3.999250040879542e-05,
"loss": 0.8405,
"step": 32
},
{
"epoch": 0.16,
"learning_rate": 3.999020480298362e-05,
"loss": 0.8568,
"step": 33
},
{
"epoch": 0.16,
"learning_rate": 3.9987603222593846e-05,
"loss": 0.7929,
"step": 34
},
{
"epoch": 0.17,
"learning_rate": 3.998469570744648e-05,
"loss": 0.8818,
"step": 35
},
{
"epoch": 0.17,
"learning_rate": 3.9981482302044604e-05,
"loss": 0.8383,
"step": 36
},
{
"epoch": 0.18,
"learning_rate": 3.997796305557331e-05,
"loss": 0.8177,
"step": 37
},
{
"epoch": 0.18,
"learning_rate": 3.9974138021899e-05,
"loss": 0.8688,
"step": 38
},
{
"epoch": 0.19,
"learning_rate": 3.9970007259568474e-05,
"loss": 0.8567,
"step": 39
},
{
"epoch": 0.19,
"learning_rate": 3.996557083180813e-05,
"loss": 0.8225,
"step": 40
},
{
"epoch": 0.2,
"learning_rate": 3.996082880652292e-05,
"loss": 0.8681,
"step": 41
},
{
"epoch": 0.2,
"learning_rate": 3.995578125629537e-05,
"loss": 0.8248,
"step": 42
},
{
"epoch": 0.21,
"learning_rate": 3.99504282583844e-05,
"loss": 0.8459,
"step": 43
},
{
"epoch": 0.21,
"learning_rate": 3.99447698947242e-05,
"loss": 0.7633,
"step": 44
},
{
"epoch": 0.22,
"learning_rate": 3.993880625192298e-05,
"loss": 0.8474,
"step": 45
},
{
"epoch": 0.22,
"learning_rate": 3.993253742126159e-05,
"loss": 0.8679,
"step": 46
},
{
"epoch": 0.23,
"learning_rate": 3.992596349869216e-05,
"loss": 0.7761,
"step": 47
},
{
"epoch": 0.23,
"learning_rate": 3.9919084584836634e-05,
"loss": 0.8133,
"step": 48
},
{
"epoch": 0.24,
"learning_rate": 3.991190078498521e-05,
"loss": 0.8003,
"step": 49
},
{
"epoch": 0.24,
"learning_rate": 3.9904412209094755e-05,
"loss": 0.7804,
"step": 50
},
{
"epoch": 0.25,
"learning_rate": 3.98966189717871e-05,
"loss": 0.8182,
"step": 51
},
{
"epoch": 0.25,
"learning_rate": 3.9888521192347284e-05,
"loss": 0.7216,
"step": 52
},
{
"epoch": 0.26,
"learning_rate": 3.988011899472175e-05,
"loss": 0.815,
"step": 53
},
{
"epoch": 0.26,
"learning_rate": 3.987141250751641e-05,
"loss": 0.7936,
"step": 54
},
{
"epoch": 0.26,
"learning_rate": 3.9862401863994744e-05,
"loss": 0.8217,
"step": 55
},
{
"epoch": 0.27,
"learning_rate": 3.9853087202075685e-05,
"loss": 0.8067,
"step": 56
},
{
"epoch": 0.27,
"learning_rate": 3.984346866433154e-05,
"loss": 0.7595,
"step": 57
},
{
"epoch": 0.28,
"learning_rate": 3.98335463979858e-05,
"loss": 0.7803,
"step": 58
},
{
"epoch": 0.28,
"learning_rate": 3.9823320554910945e-05,
"loss": 0.8233,
"step": 59
},
{
"epoch": 0.29,
"learning_rate": 3.9812791291626e-05,
"loss": 0.8029,
"step": 60
},
{
"epoch": 0.29,
"learning_rate": 3.980195876929426e-05,
"loss": 0.8611,
"step": 61
},
{
"epoch": 0.3,
"learning_rate": 3.979082315372074e-05,
"loss": 0.8465,
"step": 62
},
{
"epoch": 0.3,
"learning_rate": 3.9779384615349694e-05,
"loss": 0.845,
"step": 63
},
{
"epoch": 0.31,
"learning_rate": 3.9767643329261954e-05,
"loss": 0.7699,
"step": 64
},
{
"epoch": 0.31,
"learning_rate": 3.97555994751723e-05,
"loss": 0.8047,
"step": 65
},
{
"epoch": 0.32,
"learning_rate": 3.974325323742666e-05,
"loss": 0.7777,
"step": 66
},
{
"epoch": 0.32,
"learning_rate": 3.973060480499936e-05,
"loss": 0.8331,
"step": 67
},
{
"epoch": 0.33,
"learning_rate": 3.971765437149012e-05,
"loss": 0.8154,
"step": 68
},
{
"epoch": 0.33,
"learning_rate": 3.9704402135121214e-05,
"loss": 0.7861,
"step": 69
},
{
"epoch": 0.34,
"learning_rate": 3.969084829873436e-05,
"loss": 0.77,
"step": 70
},
{
"epoch": 0.34,
"learning_rate": 3.9676993069787614e-05,
"loss": 0.7481,
"step": 71
},
{
"epoch": 0.35,
"learning_rate": 3.9662836660352253e-05,
"loss": 0.8169,
"step": 72
},
{
"epoch": 0.35,
"learning_rate": 3.964837928710945e-05,
"loss": 0.8223,
"step": 73
},
{
"epoch": 0.36,
"learning_rate": 3.963362117134703e-05,
"loss": 0.7206,
"step": 74
},
{
"epoch": 0.36,
"learning_rate": 3.961856253895603e-05,
"loss": 0.7769,
"step": 75
},
{
"epoch": 0.37,
"learning_rate": 3.960320362042727e-05,
"loss": 0.7664,
"step": 76
},
{
"epoch": 0.37,
"learning_rate": 3.958754465084782e-05,
"loss": 0.7793,
"step": 77
},
{
"epoch": 0.38,
"learning_rate": 3.9571585869897393e-05,
"loss": 0.7771,
"step": 78
},
{
"epoch": 0.38,
"learning_rate": 3.9555327521844684e-05,
"loss": 0.8059,
"step": 79
},
{
"epoch": 0.39,
"learning_rate": 3.953876985554364e-05,
"loss": 0.8497,
"step": 80
},
{
"epoch": 0.39,
"learning_rate": 3.9521913124429615e-05,
"loss": 0.7565,
"step": 81
},
{
"epoch": 0.39,
"learning_rate": 3.9504757586515554e-05,
"loss": 0.7474,
"step": 82
},
{
"epoch": 0.4,
"learning_rate": 3.9487303504387966e-05,
"loss": 0.7756,
"step": 83
},
{
"epoch": 0.4,
"learning_rate": 3.9469551145202974e-05,
"loss": 0.7837,
"step": 84
},
{
"epoch": 0.41,
"learning_rate": 3.945150078068219e-05,
"loss": 0.7551,
"step": 85
},
{
"epoch": 0.41,
"learning_rate": 3.943315268710855e-05,
"loss": 0.8106,
"step": 86
},
{
"epoch": 0.42,
"learning_rate": 3.9414507145322115e-05,
"loss": 0.7108,
"step": 87
},
{
"epoch": 0.42,
"learning_rate": 3.939556444071574e-05,
"loss": 0.7712,
"step": 88
},
{
"epoch": 0.43,
"learning_rate": 3.9376324863230735e-05,
"loss": 0.7709,
"step": 89
},
{
"epoch": 0.43,
"learning_rate": 3.9356788707352406e-05,
"loss": 0.7745,
"step": 90
},
{
"epoch": 0.44,
"learning_rate": 3.933695627210555e-05,
"loss": 0.7723,
"step": 91
},
{
"epoch": 0.44,
"learning_rate": 3.931682786104991e-05,
"loss": 0.7559,
"step": 92
},
{
"epoch": 0.45,
"learning_rate": 3.929640378227545e-05,
"loss": 0.7185,
"step": 93
},
{
"epoch": 0.45,
"learning_rate": 3.927568434839774e-05,
"loss": 0.7663,
"step": 94
},
{
"epoch": 0.46,
"learning_rate": 3.925466987655309e-05,
"loss": 0.7859,
"step": 95
},
{
"epoch": 0.46,
"learning_rate": 3.923336068839376e-05,
"loss": 0.7218,
"step": 96
},
{
"epoch": 0.47,
"learning_rate": 3.921175711008297e-05,
"loss": 0.7417,
"step": 97
},
{
"epoch": 0.47,
"learning_rate": 3.9189859472289956e-05,
"loss": 0.7042,
"step": 98
},
{
"epoch": 0.48,
"learning_rate": 3.916766811018489e-05,
"loss": 0.7549,
"step": 99
},
{
"epoch": 0.48,
"learning_rate": 3.9145183363433777e-05,
"loss": 0.7699,
"step": 100
},
{
"epoch": 0.49,
"learning_rate": 3.91224055761932e-05,
"loss": 0.7717,
"step": 101
},
{
"epoch": 0.49,
"learning_rate": 3.909933509710511e-05,
"loss": 0.7997,
"step": 102
},
{
"epoch": 0.5,
"learning_rate": 3.907597227929145e-05,
"loss": 0.7058,
"step": 103
},
{
"epoch": 0.5,
"learning_rate": 3.9052317480348785e-05,
"loss": 0.7423,
"step": 104
},
{
"epoch": 0.51,
"learning_rate": 3.902837106234278e-05,
"loss": 0.7638,
"step": 105
},
{
"epoch": 0.51,
"learning_rate": 3.900413339180272e-05,
"loss": 0.7625,
"step": 106
},
{
"epoch": 0.52,
"learning_rate": 3.8979604839715836e-05,
"loss": 0.7994,
"step": 107
},
{
"epoch": 0.52,
"learning_rate": 3.895478578152168e-05,
"loss": 0.751,
"step": 108
},
{
"epoch": 0.52,
"learning_rate": 3.8929676597106344e-05,
"loss": 0.7974,
"step": 109
},
{
"epoch": 0.53,
"learning_rate": 3.890427767079667e-05,
"loss": 0.7439,
"step": 110
},
{
"epoch": 0.53,
"learning_rate": 3.8878589391354335e-05,
"loss": 0.7413,
"step": 111
},
{
"epoch": 0.54,
"learning_rate": 3.8852612151969936e-05,
"loss": 0.7252,
"step": 112
},
{
"epoch": 0.54,
"learning_rate": 3.8826346350256943e-05,
"loss": 0.8034,
"step": 113
},
{
"epoch": 0.55,
"learning_rate": 3.879979238824565e-05,
"loss": 0.7282,
"step": 114
},
{
"epoch": 0.55,
"learning_rate": 3.877295067237697e-05,
"loss": 0.7553,
"step": 115
},
{
"epoch": 0.56,
"learning_rate": 3.874582161349625e-05,
"loss": 0.7268,
"step": 116
},
{
"epoch": 0.56,
"learning_rate": 3.871840562684698e-05,
"loss": 0.765,
"step": 117
},
{
"epoch": 0.57,
"learning_rate": 3.869070313206444e-05,
"loss": 0.7385,
"step": 118
},
{
"epoch": 0.57,
"learning_rate": 3.866271455316924e-05,
"loss": 0.7452,
"step": 119
},
{
"epoch": 0.58,
"learning_rate": 3.863444031856088e-05,
"loss": 0.7273,
"step": 120
},
{
"epoch": 0.58,
"learning_rate": 3.860588086101116e-05,
"loss": 0.7277,
"step": 121
},
{
"epoch": 0.59,
"learning_rate": 3.857703661765758e-05,
"loss": 0.669,
"step": 122
},
{
"epoch": 0.59,
"learning_rate": 3.8547908029996614e-05,
"loss": 0.7766,
"step": 123
},
{
"epoch": 0.6,
"learning_rate": 3.8518495543877e-05,
"loss": 0.7316,
"step": 124
},
{
"epoch": 0.6,
"learning_rate": 3.848879960949287e-05,
"loss": 0.7323,
"step": 125
},
{
"epoch": 0.61,
"learning_rate": 3.8458820681376884e-05,
"loss": 0.7672,
"step": 126
},
{
"epoch": 0.61,
"learning_rate": 3.8428559218393264e-05,
"loss": 0.6816,
"step": 127
},
{
"epoch": 0.62,
"learning_rate": 3.839801568373079e-05,
"loss": 0.7286,
"step": 128
},
{
"epoch": 0.62,
"learning_rate": 3.8367190544895685e-05,
"loss": 0.7232,
"step": 129
},
{
"epoch": 0.63,
"learning_rate": 3.8336084273704457e-05,
"loss": 0.8108,
"step": 130
},
{
"epoch": 0.63,
"learning_rate": 3.830469734627671e-05,
"loss": 0.7189,
"step": 131
},
{
"epoch": 0.64,
"learning_rate": 3.827303024302782e-05,
"loss": 0.7385,
"step": 132
},
{
"epoch": 0.64,
"learning_rate": 3.8241083448661604e-05,
"loss": 0.7063,
"step": 133
},
{
"epoch": 0.65,
"learning_rate": 3.82088574521629e-05,
"loss": 0.7086,
"step": 134
},
{
"epoch": 0.65,
"learning_rate": 3.817635274679006e-05,
"loss": 0.7205,
"step": 135
},
{
"epoch": 0.65,
"learning_rate": 3.814356983006743e-05,
"loss": 0.6482,
"step": 136
},
{
"epoch": 0.66,
"learning_rate": 3.8110509203777735e-05,
"loss": 0.6529,
"step": 137
},
{
"epoch": 0.66,
"learning_rate": 3.8077171373954336e-05,
"loss": 0.7627,
"step": 138
},
{
"epoch": 0.67,
"learning_rate": 3.8043556850873594e-05,
"loss": 0.7225,
"step": 139
},
{
"epoch": 0.67,
"learning_rate": 3.8009666149046957e-05,
"loss": 0.6181,
"step": 140
},
{
"epoch": 0.68,
"learning_rate": 3.797549978721315e-05,
"loss": 0.6694,
"step": 141
},
{
"epoch": 0.68,
"learning_rate": 3.7941058288330205e-05,
"loss": 0.7681,
"step": 142
},
{
"epoch": 0.69,
"learning_rate": 3.790634217956745e-05,
"loss": 0.7092,
"step": 143
},
{
"epoch": 0.69,
"learning_rate": 3.7871351992297475e-05,
"loss": 0.6695,
"step": 144
},
{
"epoch": 0.7,
"learning_rate": 3.7836088262087975e-05,
"loss": 0.649,
"step": 145
},
{
"epoch": 0.7,
"learning_rate": 3.780055152869354e-05,
"loss": 0.6117,
"step": 146
},
{
"epoch": 0.71,
"learning_rate": 3.7764742336047416e-05,
"loss": 0.6281,
"step": 147
},
{
"epoch": 0.71,
"learning_rate": 3.772866123225317e-05,
"loss": 0.7467,
"step": 148
},
{
"epoch": 0.72,
"learning_rate": 3.7692308769576324e-05,
"loss": 0.7524,
"step": 149
},
{
"epoch": 0.72,
"learning_rate": 3.765568550443583e-05,
"loss": 0.6974,
"step": 150
},
{
"epoch": 0.73,
"learning_rate": 3.761879199739567e-05,
"loss": 0.6814,
"step": 151
},
{
"epoch": 0.73,
"learning_rate": 3.7581628813156145e-05,
"loss": 0.6862,
"step": 152
},
{
"epoch": 0.74,
"learning_rate": 3.754419652054534e-05,
"loss": 0.7204,
"step": 153
},
{
"epoch": 0.74,
"learning_rate": 3.750649569251035e-05,
"loss": 0.7122,
"step": 154
},
{
"epoch": 0.75,
"learning_rate": 3.746852690610855e-05,
"loss": 0.6916,
"step": 155
},
{
"epoch": 0.75,
"learning_rate": 3.743029074249872e-05,
"loss": 0.6784,
"step": 156
},
{
"epoch": 0.76,
"learning_rate": 3.739178778693222e-05,
"loss": 0.6783,
"step": 157
},
{
"epoch": 0.76,
"learning_rate": 3.735301862874395e-05,
"loss": 0.6273,
"step": 158
},
{
"epoch": 0.77,
"learning_rate": 3.7313983861343375e-05,
"loss": 0.6828,
"step": 159
},
{
"epoch": 0.77,
"learning_rate": 3.727468408220544e-05,
"loss": 0.6589,
"step": 160
},
{
"epoch": 0.78,
"learning_rate": 3.723511989286142e-05,
"loss": 0.6628,
"step": 161
},
{
"epoch": 0.78,
"learning_rate": 3.71952918988897e-05,
"loss": 0.7284,
"step": 162
},
{
"epoch": 0.78,
"learning_rate": 3.7155200709906546e-05,
"loss": 0.7146,
"step": 163
},
{
"epoch": 0.79,
"learning_rate": 3.711484693955671e-05,
"loss": 0.584,
"step": 164
},
{
"epoch": 0.79,
"learning_rate": 3.707423120550411e-05,
"loss": 0.6794,
"step": 165
},
{
"epoch": 0.8,
"learning_rate": 3.70333541294223e-05,
"loss": 0.6943,
"step": 166
},
{
"epoch": 0.8,
"learning_rate": 3.6992216336985036e-05,
"loss": 0.6189,
"step": 167
},
{
"epoch": 0.81,
"learning_rate": 3.695081845785663e-05,
"loss": 0.6605,
"step": 168
},
{
"epoch": 0.81,
"learning_rate": 3.690916112568233e-05,
"loss": 0.6721,
"step": 169
},
{
"epoch": 0.82,
"learning_rate": 3.686724497807867e-05,
"loss": 0.6997,
"step": 170
},
{
"epoch": 0.82,
"learning_rate": 3.6825070656623626e-05,
"loss": 0.7126,
"step": 171
},
{
"epoch": 0.83,
"learning_rate": 3.678263880684688e-05,
"loss": 0.6579,
"step": 172
},
{
"epoch": 0.83,
"learning_rate": 3.6739950078219875e-05,
"loss": 0.6946,
"step": 173
},
{
"epoch": 0.84,
"learning_rate": 3.669700512414591e-05,
"loss": 0.6754,
"step": 174
},
{
"epoch": 0.84,
"learning_rate": 3.6653804601950126e-05,
"loss": 0.6339,
"step": 175
},
{
"epoch": 0.85,
"learning_rate": 3.661034917286945e-05,
"loss": 0.6238,
"step": 176
},
{
"epoch": 0.85,
"learning_rate": 3.656663950204246e-05,
"loss": 0.6639,
"step": 177
},
{
"epoch": 0.86,
"learning_rate": 3.6522676258499246e-05,
"loss": 0.6441,
"step": 178
},
{
"epoch": 0.86,
"learning_rate": 3.6478460115151084e-05,
"loss": 0.7069,
"step": 179
},
{
"epoch": 0.87,
"learning_rate": 3.6433991748780255e-05,
"loss": 0.6448,
"step": 180
},
{
"epoch": 0.87,
"learning_rate": 3.638927184002958e-05,
"loss": 0.705,
"step": 181
},
{
"epoch": 0.88,
"learning_rate": 3.634430107339206e-05,
"loss": 0.6275,
"step": 182
},
{
"epoch": 0.88,
"learning_rate": 3.629908013720036e-05,
"loss": 0.5772,
"step": 183
},
{
"epoch": 0.89,
"learning_rate": 3.625360972361635e-05,
"loss": 0.6817,
"step": 184
},
{
"epoch": 0.89,
"learning_rate": 3.62078905286204e-05,
"loss": 0.6652,
"step": 185
},
{
"epoch": 0.9,
"learning_rate": 3.616192325200082e-05,
"loss": 0.6137,
"step": 186
},
{
"epoch": 0.9,
"learning_rate": 3.611570859734309e-05,
"loss": 0.6409,
"step": 187
},
{
"epoch": 0.91,
"learning_rate": 3.606924727201914e-05,
"loss": 0.6396,
"step": 188
},
{
"epoch": 0.91,
"learning_rate": 3.602253998717647e-05,
"loss": 0.7233,
"step": 189
},
{
"epoch": 0.91,
"learning_rate": 3.59755874577273e-05,
"loss": 0.5505,
"step": 190
},
{
"epoch": 0.92,
"learning_rate": 3.592839040233764e-05,
"loss": 0.5851,
"step": 191
},
{
"epoch": 0.92,
"learning_rate": 3.5880949543416235e-05,
"loss": 0.5618,
"step": 192
},
{
"epoch": 0.93,
"learning_rate": 3.583326560710356e-05,
"loss": 0.6,
"step": 193
},
{
"epoch": 0.93,
"learning_rate": 3.57853393232607e-05,
"loss": 0.6251,
"step": 194
},
{
"epoch": 0.94,
"learning_rate": 3.573717142545814e-05,
"loss": 0.6066,
"step": 195
},
{
"epoch": 0.94,
"learning_rate": 3.5688762650964585e-05,
"loss": 0.5661,
"step": 196
},
{
"epoch": 0.95,
"learning_rate": 3.5640113740735625e-05,
"loss": 0.6269,
"step": 197
},
{
"epoch": 0.95,
"learning_rate": 3.5591225439402455e-05,
"loss": 0.5059,
"step": 198
},
{
"epoch": 0.96,
"learning_rate": 3.554209849526043e-05,
"loss": 0.6517,
"step": 199
},
{
"epoch": 0.96,
"learning_rate": 3.5492733660257605e-05,
"loss": 0.5813,
"step": 200
},
{
"epoch": 0.97,
"learning_rate": 3.5443131689983285e-05,
"loss": 0.5806,
"step": 201
},
{
"epoch": 0.97,
"learning_rate": 3.53932933436564e-05,
"loss": 0.5882,
"step": 202
},
{
"epoch": 0.98,
"learning_rate": 3.534321938411389e-05,
"loss": 0.5635,
"step": 203
},
{
"epoch": 0.98,
"learning_rate": 3.529291057779907e-05,
"loss": 0.6346,
"step": 204
},
{
"epoch": 0.99,
"learning_rate": 3.524236769474987e-05,
"loss": 0.6491,
"step": 205
},
{
"epoch": 0.99,
"learning_rate": 3.519159150858703e-05,
"loss": 0.5108,
"step": 206
},
{
"epoch": 1.0,
"learning_rate": 3.514058279650231e-05,
"loss": 0.6038,
"step": 207
},
{
"epoch": 1.0,
"learning_rate": 3.5089342339246526e-05,
"loss": 0.5471,
"step": 208
},
{
"epoch": 1.01,
"learning_rate": 3.503787092111768e-05,
"loss": 0.5146,
"step": 209
},
{
"epoch": 1.01,
"learning_rate": 3.498616932994888e-05,
"loss": 0.5266,
"step": 210
},
{
"epoch": 1.02,
"learning_rate": 3.493423835709634e-05,
"loss": 0.47,
"step": 211
},
{
"epoch": 1.02,
"learning_rate": 3.488207879742722e-05,
"loss": 0.5524,
"step": 212
},
{
"epoch": 1.03,
"learning_rate": 3.482969144930749e-05,
"loss": 0.4221,
"step": 213
},
{
"epoch": 1.03,
"learning_rate": 3.47770771145897e-05,
"loss": 0.5545,
"step": 214
},
{
"epoch": 1.04,
"learning_rate": 3.4724236598600725e-05,
"loss": 0.5427,
"step": 215
},
{
"epoch": 1.04,
"learning_rate": 3.467117071012938e-05,
"loss": 0.4692,
"step": 216
},
{
"epoch": 1.04,
"learning_rate": 3.461788026141414e-05,
"loss": 0.4469,
"step": 217
},
{
"epoch": 1.05,
"learning_rate": 3.456436606813059e-05,
"loss": 0.4594,
"step": 218
},
{
"epoch": 1.05,
"learning_rate": 3.451062894937905e-05,
"loss": 0.5229,
"step": 219
},
{
"epoch": 1.06,
"learning_rate": 3.4456669727671944e-05,
"loss": 0.4865,
"step": 220
},
{
"epoch": 1.06,
"learning_rate": 3.440248922892129e-05,
"loss": 0.4027,
"step": 221
},
{
"epoch": 1.07,
"learning_rate": 3.4348088282426e-05,
"loss": 0.5003,
"step": 222
},
{
"epoch": 1.07,
"learning_rate": 3.429346772085923e-05,
"loss": 0.5518,
"step": 223
},
{
"epoch": 1.08,
"learning_rate": 3.423862838025557e-05,
"loss": 0.453,
"step": 224
},
{
"epoch": 1.08,
"learning_rate": 3.4183571099998355e-05,
"loss": 0.4592,
"step": 225
},
{
"epoch": 1.09,
"learning_rate": 3.4128296722806694e-05,
"loss": 0.4911,
"step": 226
},
{
"epoch": 1.09,
"learning_rate": 3.407280609472267e-05,
"loss": 0.4616,
"step": 227
},
{
"epoch": 1.1,
"learning_rate": 3.4017100065098334e-05,
"loss": 0.5109,
"step": 228
},
{
"epoch": 1.1,
"learning_rate": 3.3961179486582724e-05,
"loss": 0.4871,
"step": 229
},
{
"epoch": 1.11,
"learning_rate": 3.390504521510882e-05,
"loss": 0.4952,
"step": 230
},
{
"epoch": 1.11,
"learning_rate": 3.384869810988041e-05,
"loss": 0.3892,
"step": 231
},
{
"epoch": 1.12,
"learning_rate": 3.379213903335899e-05,
"loss": 0.511,
"step": 232
},
{
"epoch": 1.12,
"learning_rate": 3.373536885125053e-05,
"loss": 0.4913,
"step": 233
},
{
"epoch": 1.13,
"learning_rate": 3.367838843249222e-05,
"loss": 0.4147,
"step": 234
},
{
"epoch": 1.13,
"learning_rate": 3.362119864923918e-05,
"loss": 0.4271,
"step": 235
},
{
"epoch": 1.14,
"learning_rate": 3.356380037685113e-05,
"loss": 0.5301,
"step": 236
},
{
"epoch": 1.14,
"learning_rate": 3.350619449387895e-05,
"loss": 0.4934,
"step": 237
},
{
"epoch": 1.15,
"learning_rate": 3.344838188205126e-05,
"loss": 0.495,
"step": 238
},
{
"epoch": 1.15,
"learning_rate": 3.3390363426260925e-05,
"loss": 0.5223,
"step": 239
},
{
"epoch": 1.16,
"learning_rate": 3.333214001455149e-05,
"loss": 0.456,
"step": 240
},
{
"epoch": 1.16,
"learning_rate": 3.3273712538103625e-05,
"loss": 0.521,
"step": 241
},
{
"epoch": 1.17,
"learning_rate": 3.3215081891221444e-05,
"loss": 0.4192,
"step": 242
},
{
"epoch": 1.17,
"learning_rate": 3.315624897131883e-05,
"loss": 0.433,
"step": 243
},
{
"epoch": 1.17,
"learning_rate": 3.309721467890571e-05,
"loss": 0.4189,
"step": 244
},
{
"epoch": 1.18,
"learning_rate": 3.303797991757425e-05,
"loss": 0.4392,
"step": 245
},
{
"epoch": 1.18,
"learning_rate": 3.297854559398507e-05,
"loss": 0.4689,
"step": 246
},
{
"epoch": 1.19,
"learning_rate": 3.2918912617853323e-05,
"loss": 0.4116,
"step": 247
},
{
"epoch": 1.19,
"learning_rate": 3.285908190193474e-05,
"loss": 0.4349,
"step": 248
},
{
"epoch": 1.2,
"learning_rate": 3.279905436201178e-05,
"loss": 0.4447,
"step": 249
},
{
"epoch": 1.2,
"learning_rate": 3.273883091687946e-05,
"loss": 0.3817,
"step": 250
},
{
"epoch": 1.21,
"learning_rate": 3.267841248833141e-05,
"loss": 0.5276,
"step": 251
},
{
"epoch": 1.21,
"learning_rate": 3.26178000011457e-05,
"loss": 0.4973,
"step": 252
},
{
"epoch": 1.22,
"learning_rate": 3.255699438307073e-05,
"loss": 0.4345,
"step": 253
},
{
"epoch": 1.22,
"learning_rate": 3.249599656481097e-05,
"loss": 0.4391,
"step": 254
},
{
"epoch": 1.23,
"learning_rate": 3.243480748001278e-05,
"loss": 0.4006,
"step": 255
},
{
"epoch": 1.23,
"learning_rate": 3.237342806525007e-05,
"loss": 0.3827,
"step": 256
},
{
"epoch": 1.24,
"learning_rate": 3.231185926000999e-05,
"loss": 0.5182,
"step": 257
},
{
"epoch": 1.24,
"learning_rate": 3.2250102006678553e-05,
"loss": 0.4304,
"step": 258
},
{
"epoch": 1.25,
"learning_rate": 3.218815725052617e-05,
"loss": 0.3919,
"step": 259
},
{
"epoch": 1.25,
"learning_rate": 3.212602593969325e-05,
"loss": 0.4645,
"step": 260
},
{
"epoch": 1.26,
"learning_rate": 3.206370902517562e-05,
"loss": 0.4707,
"step": 261
},
{
"epoch": 1.26,
"learning_rate": 3.200120746081003e-05,
"loss": 0.4003,
"step": 262
},
{
"epoch": 1.27,
"learning_rate": 3.193852220325949e-05,
"loss": 0.4671,
"step": 263
},
{
"epoch": 1.27,
"learning_rate": 3.187565421199868e-05,
"loss": 0.4404,
"step": 264
},
{
"epoch": 1.28,
"learning_rate": 3.181260444929923e-05,
"loss": 0.4389,
"step": 265
},
{
"epoch": 1.28,
"learning_rate": 3.174937388021501e-05,
"loss": 0.4845,
"step": 266
},
{
"epoch": 1.29,
"learning_rate": 3.168596347256737e-05,
"loss": 0.487,
"step": 267
},
{
"epoch": 1.29,
"learning_rate": 3.162237419693028e-05,
"loss": 0.3587,
"step": 268
},
{
"epoch": 1.3,
"learning_rate": 3.155860702661553e-05,
"loss": 0.2996,
"step": 269
},
{
"epoch": 1.3,
"learning_rate": 3.149466293765778e-05,
"loss": 0.4116,
"step": 270
},
{
"epoch": 1.3,
"learning_rate": 3.143054290879966e-05,
"loss": 0.4242,
"step": 271
},
{
"epoch": 1.31,
"learning_rate": 3.136624792147679e-05,
"loss": 0.4106,
"step": 272
},
{
"epoch": 1.31,
"learning_rate": 3.130177895980271e-05,
"loss": 0.4634,
"step": 273
},
{
"epoch": 1.32,
"learning_rate": 3.1237137010553896e-05,
"loss": 0.3338,
"step": 274
},
{
"epoch": 1.32,
"learning_rate": 3.117232306315456e-05,
"loss": 0.4786,
"step": 275
},
{
"epoch": 1.33,
"learning_rate": 3.110733810966161e-05,
"loss": 0.5054,
"step": 276
},
{
"epoch": 1.33,
"learning_rate": 3.104218314474936e-05,
"loss": 0.4239,
"step": 277
},
{
"epoch": 1.34,
"learning_rate": 3.097685916569439e-05,
"loss": 0.479,
"step": 278
},
{
"epoch": 1.34,
"learning_rate": 3.0911367172360234e-05,
"loss": 0.3421,
"step": 279
},
{
"epoch": 1.35,
"learning_rate": 3.08457081671821e-05,
"loss": 0.3319,
"step": 280
},
{
"epoch": 1.35,
"learning_rate": 3.0779883155151506e-05,
"loss": 0.39,
"step": 281
},
{
"epoch": 1.36,
"learning_rate": 3.071389314380093e-05,
"loss": 0.4444,
"step": 282
},
{
"epoch": 1.36,
"learning_rate": 3.064773914318833e-05,
"loss": 0.4337,
"step": 283
},
{
"epoch": 1.37,
"learning_rate": 3.0581422165881765e-05,
"loss": 0.3625,
"step": 284
},
{
"epoch": 1.37,
"learning_rate": 3.0514943226943816e-05,
"loss": 0.3672,
"step": 285
},
{
"epoch": 1.38,
"learning_rate": 3.044830334391609e-05,
"loss": 0.3519,
"step": 286
},
{
"epoch": 1.38,
"learning_rate": 3.038150353680367e-05,
"loss": 0.4319,
"step": 287
},
{
"epoch": 1.39,
"learning_rate": 3.031454482805944e-05,
"loss": 0.4481,
"step": 288
},
{
"epoch": 1.39,
"learning_rate": 3.024742824256848e-05,
"loss": 0.32,
"step": 289
},
{
"epoch": 1.4,
"learning_rate": 3.018015480763236e-05,
"loss": 0.4338,
"step": 290
},
{
"epoch": 1.4,
"learning_rate": 3.011272555295344e-05,
"loss": 0.3395,
"step": 291
},
{
"epoch": 1.41,
"learning_rate": 3.0045141510619084e-05,
"loss": 0.3989,
"step": 292
},
{
"epoch": 1.41,
"learning_rate": 2.9977403715085865e-05,
"loss": 0.3522,
"step": 293
},
{
"epoch": 1.42,
"learning_rate": 2.990951320316374e-05,
"loss": 0.302,
"step": 294
},
{
"epoch": 1.42,
"learning_rate": 2.984147101400018e-05,
"loss": 0.4202,
"step": 295
},
{
"epoch": 1.43,
"learning_rate": 2.9773278189064257e-05,
"loss": 0.3644,
"step": 296
},
{
"epoch": 1.43,
"learning_rate": 2.9704935772130715e-05,
"loss": 0.4119,
"step": 297
},
{
"epoch": 1.43,
"learning_rate": 2.9636444809263985e-05,
"loss": 0.3488,
"step": 298
},
{
"epoch": 1.44,
"learning_rate": 2.9567806348802172e-05,
"loss": 0.3844,
"step": 299
},
{
"epoch": 1.44,
"learning_rate": 2.9499021441341012e-05,
"loss": 0.3997,
"step": 300
},
{
"epoch": 1.45,
"learning_rate": 2.943009113971779e-05,
"loss": 0.3986,
"step": 301
},
{
"epoch": 1.45,
"learning_rate": 2.9361016498995243e-05,
"loss": 0.4919,
"step": 302
},
{
"epoch": 1.46,
"learning_rate": 2.9291798576445373e-05,
"loss": 0.4761,
"step": 303
},
{
"epoch": 1.46,
"learning_rate": 2.92224384315333e-05,
"loss": 0.365,
"step": 304
},
{
"epoch": 1.47,
"learning_rate": 2.915293712590102e-05,
"loss": 0.3406,
"step": 305
},
{
"epoch": 1.47,
"learning_rate": 2.9083295723351185e-05,
"loss": 0.3183,
"step": 306
},
{
"epoch": 1.48,
"learning_rate": 2.9013515289830785e-05,
"loss": 0.3819,
"step": 307
},
{
"epoch": 1.48,
"learning_rate": 2.8943596893414853e-05,
"loss": 0.4082,
"step": 308
},
{
"epoch": 1.49,
"learning_rate": 2.887354160429012e-05,
"loss": 0.4097,
"step": 309
},
{
"epoch": 1.49,
"learning_rate": 2.8803350494738615e-05,
"loss": 0.3797,
"step": 310
},
{
"epoch": 1.5,
"learning_rate": 2.8733024639121283e-05,
"loss": 0.3632,
"step": 311
},
{
"epoch": 1.5,
"learning_rate": 2.8662565113861506e-05,
"loss": 0.3771,
"step": 312
},
{
"epoch": 1.51,
"learning_rate": 2.859197299742865e-05,
"loss": 0.3052,
"step": 313
},
{
"epoch": 1.51,
"learning_rate": 2.8521249370321556e-05,
"loss": 0.3176,
"step": 314
},
{
"epoch": 1.52,
"learning_rate": 2.845039531505199e-05,
"loss": 0.339,
"step": 315
},
{
"epoch": 1.52,
"learning_rate": 2.8379411916128083e-05,
"loss": 0.2996,
"step": 316
},
{
"epoch": 1.53,
"learning_rate": 2.8308300260037734e-05,
"loss": 0.3259,
"step": 317
},
{
"epoch": 1.53,
"learning_rate": 2.8237061435231964e-05,
"loss": 0.3557,
"step": 318
},
{
"epoch": 1.54,
"learning_rate": 2.8165696532108272e-05,
"loss": 0.3995,
"step": 319
},
{
"epoch": 1.54,
"learning_rate": 2.8094206642993955e-05,
"loss": 0.3834,
"step": 320
},
{
"epoch": 1.55,
"learning_rate": 2.802259286212935e-05,
"loss": 0.39,
"step": 321
},
{
"epoch": 1.55,
"learning_rate": 2.7950856285651124e-05,
"loss": 0.3714,
"step": 322
},
{
"epoch": 1.56,
"learning_rate": 2.787899801157547e-05,
"loss": 0.2646,
"step": 323
},
{
"epoch": 1.56,
"learning_rate": 2.7807019139781326e-05,
"loss": 0.3543,
"step": 324
},
{
"epoch": 1.56,
"learning_rate": 2.773492077199351e-05,
"loss": 0.3026,
"step": 325
},
{
"epoch": 1.57,
"learning_rate": 2.7662704011765894e-05,
"loss": 0.2986,
"step": 326
},
{
"epoch": 1.57,
"learning_rate": 2.7590369964464468e-05,
"loss": 0.3504,
"step": 327
},
{
"epoch": 1.58,
"learning_rate": 2.7517919737250465e-05,
"loss": 0.3483,
"step": 328
},
{
"epoch": 1.58,
"learning_rate": 2.7445354439063368e-05,
"loss": 0.3629,
"step": 329
},
{
"epoch": 1.59,
"learning_rate": 2.7372675180603994e-05,
"loss": 0.3209,
"step": 330
},
{
"epoch": 1.59,
"learning_rate": 2.729988307431744e-05,
"loss": 0.3807,
"step": 331
},
{
"epoch": 1.6,
"learning_rate": 2.722697923437608e-05,
"loss": 0.2818,
"step": 332
},
{
"epoch": 1.6,
"learning_rate": 2.7153964776662517e-05,
"loss": 0.2724,
"step": 333
},
{
"epoch": 1.61,
"learning_rate": 2.7080840818752475e-05,
"loss": 0.29,
"step": 334
},
{
"epoch": 1.61,
"learning_rate": 2.700760847989775e-05,
"loss": 0.3543,
"step": 335
},
{
"epoch": 1.62,
"learning_rate": 2.6934268881008994e-05,
"loss": 0.3971,
"step": 336
},
{
"epoch": 1.62,
"learning_rate": 2.6860823144638646e-05,
"loss": 0.414,
"step": 337
},
{
"epoch": 1.63,
"learning_rate": 2.6787272394963684e-05,
"loss": 0.4116,
"step": 338
},
{
"epoch": 1.63,
"learning_rate": 2.671361775776846e-05,
"loss": 0.3259,
"step": 339
},
{
"epoch": 1.64,
"learning_rate": 2.6639860360427426e-05,
"loss": 0.3429,
"step": 340
},
{
"epoch": 1.64,
"learning_rate": 2.6566001331887945e-05,
"loss": 0.4411,
"step": 341
},
{
"epoch": 1.65,
"learning_rate": 2.649204180265293e-05,
"loss": 0.3466,
"step": 342
},
{
"epoch": 1.65,
"learning_rate": 2.6417982904763597e-05,
"loss": 0.3531,
"step": 343
},
{
"epoch": 1.66,
"learning_rate": 2.6343825771782125e-05,
"loss": 0.2261,
"step": 344
},
{
"epoch": 1.66,
"learning_rate": 2.6269571538774294e-05,
"loss": 0.3678,
"step": 345
},
{
"epoch": 1.67,
"learning_rate": 2.6195221342292126e-05,
"loss": 0.3564,
"step": 346
},
{
"epoch": 1.67,
"learning_rate": 2.6120776320356473e-05,
"loss": 0.3306,
"step": 347
},
{
"epoch": 1.68,
"learning_rate": 2.604623761243962e-05,
"loss": 0.3551,
"step": 348
},
{
"epoch": 1.68,
"learning_rate": 2.5971606359447806e-05,
"loss": 0.3244,
"step": 349
},
{
"epoch": 1.69,
"learning_rate": 2.589688370370382e-05,
"loss": 0.3598,
"step": 350
},
{
"epoch": 1.69,
"learning_rate": 2.5822070788929448e-05,
"loss": 0.3207,
"step": 351
},
{
"epoch": 1.69,
"learning_rate": 2.5747168760228024e-05,
"loss": 0.3626,
"step": 352
},
{
"epoch": 1.7,
"learning_rate": 2.567217876406688e-05,
"loss": 0.3048,
"step": 353
},
{
"epoch": 1.7,
"learning_rate": 2.5597101948259777e-05,
"loss": 0.2911,
"step": 354
},
{
"epoch": 1.71,
"learning_rate": 2.5521939461949384e-05,
"loss": 0.2598,
"step": 355
},
{
"epoch": 1.71,
"learning_rate": 2.5446692455589633e-05,
"loss": 0.3651,
"step": 356
},
{
"epoch": 1.72,
"learning_rate": 2.5371362080928162e-05,
"loss": 0.272,
"step": 357
},
{
"epoch": 1.72,
"learning_rate": 2.529594949098866e-05,
"loss": 0.3887,
"step": 358
},
{
"epoch": 1.73,
"learning_rate": 2.5220455840053217e-05,
"loss": 0.1976,
"step": 359
},
{
"epoch": 1.73,
"learning_rate": 2.5144882283644644e-05,
"loss": 0.2853,
"step": 360
},
{
"epoch": 1.74,
"learning_rate": 2.5069229978508845e-05,
"loss": 0.3242,
"step": 361
},
{
"epoch": 1.74,
"learning_rate": 2.499350008259703e-05,
"loss": 0.3711,
"step": 362
},
{
"epoch": 1.75,
"learning_rate": 2.491769375504805e-05,
"loss": 0.2618,
"step": 363
},
{
"epoch": 1.75,
"learning_rate": 2.4841812156170632e-05,
"loss": 0.2531,
"step": 364
},
{
"epoch": 1.76,
"learning_rate": 2.4765856447425614e-05,
"loss": 0.3616,
"step": 365
},
{
"epoch": 1.76,
"learning_rate": 2.4689827791408198e-05,
"loss": 0.239,
"step": 366
},
{
"epoch": 1.77,
"learning_rate": 2.4613727351830115e-05,
"loss": 0.3474,
"step": 367
},
{
"epoch": 1.77,
"learning_rate": 2.453755629350183e-05,
"loss": 0.3964,
"step": 368
},
{
"epoch": 1.78,
"learning_rate": 2.446131578231473e-05,
"loss": 0.3069,
"step": 369
},
{
"epoch": 1.78,
"learning_rate": 2.438500698522325e-05,
"loss": 0.3234,
"step": 370
},
{
"epoch": 1.79,
"learning_rate": 2.4308631070227025e-05,
"loss": 0.3984,
"step": 371
},
{
"epoch": 1.79,
"learning_rate": 2.4232189206353026e-05,
"loss": 0.3254,
"step": 372
},
{
"epoch": 1.8,
"learning_rate": 2.415568256363763e-05,
"loss": 0.2716,
"step": 373
},
{
"epoch": 1.8,
"learning_rate": 2.407911231310876e-05,
"loss": 0.36,
"step": 374
},
{
"epoch": 1.81,
"learning_rate": 2.4002479626767903e-05,
"loss": 0.2299,
"step": 375
},
{
"epoch": 1.81,
"learning_rate": 2.3925785677572232e-05,
"loss": 0.2841,
"step": 376
},
{
"epoch": 1.82,
"learning_rate": 2.38490316394166e-05,
"loss": 0.2167,
"step": 377
},
{
"epoch": 1.82,
"learning_rate": 2.3772218687115615e-05,
"loss": 0.3267,
"step": 378
},
{
"epoch": 1.82,
"learning_rate": 2.369534799638563e-05,
"loss": 0.2126,
"step": 379
},
{
"epoch": 1.83,
"learning_rate": 2.361842074382674e-05,
"loss": 0.3876,
"step": 380
},
{
"epoch": 1.83,
"learning_rate": 2.3541438106904816e-05,
"loss": 0.4048,
"step": 381
},
{
"epoch": 1.84,
"learning_rate": 2.3464401263933435e-05,
"loss": 0.2619,
"step": 382
},
{
"epoch": 1.84,
"learning_rate": 2.338731139405587e-05,
"loss": 0.2396,
"step": 383
},
{
"epoch": 1.85,
"learning_rate": 2.331016967722704e-05,
"loss": 0.4138,
"step": 384
},
{
"epoch": 1.85,
"learning_rate": 2.3232977294195437e-05,
"loss": 0.324,
"step": 385
},
{
"epoch": 1.86,
"learning_rate": 2.3155735426485065e-05,
"loss": 0.1834,
"step": 386
},
{
"epoch": 1.86,
"learning_rate": 2.307844525637735e-05,
"loss": 0.3739,
"step": 387
},
{
"epoch": 1.87,
"learning_rate": 2.3001107966893054e-05,
"loss": 0.3552,
"step": 388
},
{
"epoch": 1.87,
"learning_rate": 2.2923724741774153e-05,
"loss": 0.2042,
"step": 389
},
{
"epoch": 1.88,
"learning_rate": 2.2846296765465708e-05,
"loss": 0.2872,
"step": 390
},
{
"epoch": 1.88,
"learning_rate": 2.2768825223097775e-05,
"loss": 0.3826,
"step": 391
},
{
"epoch": 1.89,
"learning_rate": 2.269131130046724e-05,
"loss": 0.1668,
"step": 392
},
{
"epoch": 1.89,
"learning_rate": 2.2613756184019656e-05,
"loss": 0.324,
"step": 393
},
{
"epoch": 1.9,
"learning_rate": 2.2536161060831114e-05,
"loss": 0.4264,
"step": 394
},
{
"epoch": 1.9,
"learning_rate": 2.245852711859004e-05,
"loss": 0.2733,
"step": 395
},
{
"epoch": 1.91,
"learning_rate": 2.238085554557906e-05,
"loss": 0.3542,
"step": 396
},
{
"epoch": 1.91,
"learning_rate": 2.2303147530656748e-05,
"loss": 0.3048,
"step": 397
},
{
"epoch": 1.92,
"learning_rate": 2.2225404263239493e-05,
"loss": 0.2757,
"step": 398
},
{
"epoch": 1.92,
"learning_rate": 2.2147626933283265e-05,
"loss": 0.2164,
"step": 399
},
{
"epoch": 1.93,
"learning_rate": 2.206981673126539e-05,
"loss": 0.3575,
"step": 400
},
{
"epoch": 1.93,
"learning_rate": 2.1991974848166356e-05,
"loss": 0.1491,
"step": 401
},
{
"epoch": 1.94,
"learning_rate": 2.191410247545155e-05,
"loss": 0.2321,
"step": 402
},
{
"epoch": 1.94,
"learning_rate": 2.1836200805053065e-05,
"loss": 0.2292,
"step": 403
},
{
"epoch": 1.95,
"learning_rate": 2.175827102935141e-05,
"loss": 0.2479,
"step": 404
},
{
"epoch": 1.95,
"learning_rate": 2.168031434115729e-05,
"loss": 0.2909,
"step": 405
},
{
"epoch": 1.95,
"learning_rate": 2.1602331933693325e-05,
"loss": 0.235,
"step": 406
},
{
"epoch": 1.96,
"learning_rate": 2.152432500057583e-05,
"loss": 0.2568,
"step": 407
},
{
"epoch": 1.96,
"learning_rate": 2.1446294735796486e-05,
"loss": 0.2387,
"step": 408
},
{
"epoch": 1.97,
"learning_rate": 2.13682423337041e-05,
"loss": 0.3835,
"step": 409
},
{
"epoch": 1.97,
"learning_rate": 2.1290168988986332e-05,
"loss": 0.3197,
"step": 410
},
{
"epoch": 1.98,
"learning_rate": 2.1212075896651373e-05,
"loss": 0.2987,
"step": 411
},
{
"epoch": 1.98,
"learning_rate": 2.1133964252009696e-05,
"loss": 0.2288,
"step": 412
},
{
"epoch": 1.99,
"learning_rate": 2.105583525065572e-05,
"loss": 0.2994,
"step": 413
},
{
"epoch": 1.99,
"learning_rate": 2.097769008844955e-05,
"loss": 0.2969,
"step": 414
},
{
"epoch": 2.0,
"learning_rate": 2.0899529961498633e-05,
"loss": 0.2523,
"step": 415
},
{
"epoch": 2.0,
"learning_rate": 2.082135606613949e-05,
"loss": 0.233,
"step": 416
},
{
"epoch": 2.01,
"learning_rate": 2.074316959891937e-05,
"loss": 0.1857,
"step": 417
},
{
"epoch": 2.01,
"learning_rate": 2.0664971756577953e-05,
"loss": 0.2529,
"step": 418
},
{
"epoch": 2.02,
"learning_rate": 2.058676373602902e-05,
"loss": 0.2896,
"step": 419
},
{
"epoch": 2.02,
"learning_rate": 2.050854673434217e-05,
"loss": 0.225,
"step": 420
},
{
"epoch": 2.03,
"learning_rate": 2.0430321948724447e-05,
"loss": 0.1983,
"step": 421
},
{
"epoch": 2.03,
"learning_rate": 2.035209057650203e-05,
"loss": 0.2276,
"step": 422
},
{
"epoch": 2.04,
"learning_rate": 2.0273853815101937e-05,
"loss": 0.2311,
"step": 423
},
{
"epoch": 2.04,
"learning_rate": 2.019561286203365e-05,
"loss": 0.1648,
"step": 424
},
{
"epoch": 2.05,
"learning_rate": 2.0117368914870838e-05,
"loss": 0.1401,
"step": 425
},
{
"epoch": 2.05,
"learning_rate": 2.003912317123297e-05,
"loss": 0.1672,
"step": 426
},
{
"epoch": 2.06,
"learning_rate": 1.9960876828767036e-05,
"loss": 0.261,
"step": 427
},
{
"epoch": 2.06,
"learning_rate": 1.9882631085129166e-05,
"loss": 0.1689,
"step": 428
},
{
"epoch": 2.07,
"learning_rate": 1.9804387137966354e-05,
"loss": 0.2404,
"step": 429
},
{
"epoch": 2.07,
"learning_rate": 1.9726146184898066e-05,
"loss": 0.2547,
"step": 430
},
{
"epoch": 2.08,
"learning_rate": 1.9647909423497974e-05,
"loss": 0.2642,
"step": 431
},
{
"epoch": 2.08,
"learning_rate": 1.956967805127556e-05,
"loss": 0.2206,
"step": 432
},
{
"epoch": 2.08,
"learning_rate": 1.9491453265657826e-05,
"loss": 0.279,
"step": 433
},
{
"epoch": 2.09,
"learning_rate": 1.9413236263970978e-05,
"loss": 0.2637,
"step": 434
},
{
"epoch": 2.09,
"learning_rate": 1.933502824342205e-05,
"loss": 0.2329,
"step": 435
},
{
"epoch": 2.1,
"learning_rate": 1.9256830401080635e-05,
"loss": 0.2355,
"step": 436
},
{
"epoch": 2.1,
"learning_rate": 1.917864393386051e-05,
"loss": 0.1518,
"step": 437
},
{
"epoch": 2.11,
"learning_rate": 1.9100470038501377e-05,
"loss": 0.2514,
"step": 438
},
{
"epoch": 2.11,
"learning_rate": 1.902230991155046e-05,
"loss": 0.1823,
"step": 439
},
{
"epoch": 2.12,
"learning_rate": 1.894416474934429e-05,
"loss": 0.2212,
"step": 440
},
{
"epoch": 2.12,
"learning_rate": 1.8866035747990318e-05,
"loss": 0.1689,
"step": 441
},
{
"epoch": 2.13,
"learning_rate": 1.8787924103348637e-05,
"loss": 0.2263,
"step": 442
},
{
"epoch": 2.13,
"learning_rate": 1.8709831011013678e-05,
"loss": 0.2164,
"step": 443
},
{
"epoch": 2.14,
"learning_rate": 1.8631757666295905e-05,
"loss": 0.2228,
"step": 444
},
{
"epoch": 2.14,
"learning_rate": 1.855370526420352e-05,
"loss": 0.1823,
"step": 445
},
{
"epoch": 2.15,
"learning_rate": 1.8475674999424175e-05,
"loss": 0.2087,
"step": 446
},
{
"epoch": 2.15,
"learning_rate": 1.839766806630668e-05,
"loss": 0.1859,
"step": 447
},
{
"epoch": 2.16,
"learning_rate": 1.8319685658842717e-05,
"loss": 0.2363,
"step": 448
},
{
"epoch": 2.16,
"learning_rate": 1.82417289706486e-05,
"loss": 0.2573,
"step": 449
},
{
"epoch": 2.17,
"learning_rate": 1.8163799194946938e-05,
"loss": 0.166,
"step": 450
},
{
"epoch": 2.17,
"learning_rate": 1.8085897524548454e-05,
"loss": 0.2676,
"step": 451
},
{
"epoch": 2.18,
"learning_rate": 1.800802515183365e-05,
"loss": 0.2502,
"step": 452
},
{
"epoch": 2.18,
"learning_rate": 1.7930183268734613e-05,
"loss": 0.1509,
"step": 453
},
{
"epoch": 2.19,
"learning_rate": 1.785237306671674e-05,
"loss": 0.1553,
"step": 454
},
{
"epoch": 2.19,
"learning_rate": 1.777459573676051e-05,
"loss": 0.2557,
"step": 455
},
{
"epoch": 2.2,
"learning_rate": 1.769685246934326e-05,
"loss": 0.2229,
"step": 456
},
{
"epoch": 2.2,
"learning_rate": 1.7619144454420944e-05,
"loss": 0.2122,
"step": 457
},
{
"epoch": 2.21,
"learning_rate": 1.7541472881409957e-05,
"loss": 0.1877,
"step": 458
},
{
"epoch": 2.21,
"learning_rate": 1.7463838939168886e-05,
"loss": 0.1889,
"step": 459
},
{
"epoch": 2.21,
"learning_rate": 1.7386243815980354e-05,
"loss": 0.2506,
"step": 460
},
{
"epoch": 2.22,
"learning_rate": 1.7308688699532767e-05,
"loss": 0.1552,
"step": 461
},
{
"epoch": 2.22,
"learning_rate": 1.7231174776902232e-05,
"loss": 0.1788,
"step": 462
},
{
"epoch": 2.23,
"learning_rate": 1.7153703234534302e-05,
"loss": 0.2183,
"step": 463
},
{
"epoch": 2.23,
"learning_rate": 1.707627525822586e-05,
"loss": 0.1812,
"step": 464
},
{
"epoch": 2.24,
"learning_rate": 1.699889203310695e-05,
"loss": 0.2299,
"step": 465
},
{
"epoch": 2.24,
"learning_rate": 1.6921554743622654e-05,
"loss": 0.2191,
"step": 466
},
{
"epoch": 2.25,
"learning_rate": 1.684426457351494e-05,
"loss": 0.2227,
"step": 467
},
{
"epoch": 2.25,
"learning_rate": 1.676702270580457e-05,
"loss": 0.2325,
"step": 468
},
{
"epoch": 2.26,
"learning_rate": 1.668983032277297e-05,
"loss": 0.2563,
"step": 469
},
{
"epoch": 2.26,
"learning_rate": 1.6612688605944133e-05,
"loss": 0.2716,
"step": 470
},
{
"epoch": 2.27,
"learning_rate": 1.6535598736066575e-05,
"loss": 0.1982,
"step": 471
},
{
"epoch": 2.27,
"learning_rate": 1.645856189309519e-05,
"loss": 0.2051,
"step": 472
},
{
"epoch": 2.28,
"learning_rate": 1.6381579256173265e-05,
"loss": 0.1689,
"step": 473
},
{
"epoch": 2.28,
"learning_rate": 1.630465200361438e-05,
"loss": 0.3489,
"step": 474
},
{
"epoch": 2.29,
"learning_rate": 1.6227781312884388e-05,
"loss": 0.2629,
"step": 475
},
{
"epoch": 2.29,
"learning_rate": 1.6150968360583404e-05,
"loss": 0.18,
"step": 476
},
{
"epoch": 2.3,
"learning_rate": 1.607421432242777e-05,
"loss": 0.1822,
"step": 477
},
{
"epoch": 2.3,
"learning_rate": 1.59975203732321e-05,
"loss": 0.1745,
"step": 478
},
{
"epoch": 2.31,
"learning_rate": 1.5920887686891245e-05,
"loss": 0.2919,
"step": 479
},
{
"epoch": 2.31,
"learning_rate": 1.584431743636237e-05,
"loss": 0.1981,
"step": 480
},
{
"epoch": 2.32,
"learning_rate": 1.5767810793646974e-05,
"loss": 0.2054,
"step": 481
},
{
"epoch": 2.32,
"learning_rate": 1.5691368929772975e-05,
"loss": 0.205,
"step": 482
},
{
"epoch": 2.33,
"learning_rate": 1.561499301477676e-05,
"loss": 0.2093,
"step": 483
},
{
"epoch": 2.33,
"learning_rate": 1.5538684217685282e-05,
"loss": 0.2318,
"step": 484
},
{
"epoch": 2.34,
"learning_rate": 1.5462443706498178e-05,
"loss": 0.2029,
"step": 485
},
{
"epoch": 2.34,
"learning_rate": 1.5386272648169898e-05,
"loss": 0.248,
"step": 486
},
{
"epoch": 2.34,
"learning_rate": 1.531017220859181e-05,
"loss": 0.1729,
"step": 487
},
{
"epoch": 2.35,
"learning_rate": 1.5234143552574393e-05,
"loss": 0.1975,
"step": 488
},
{
"epoch": 2.35,
"learning_rate": 1.5158187843829375e-05,
"loss": 0.2119,
"step": 489
},
{
"epoch": 2.36,
"learning_rate": 1.5082306244951956e-05,
"loss": 0.1371,
"step": 490
},
{
"epoch": 2.36,
"learning_rate": 1.5006499917402977e-05,
"loss": 0.2504,
"step": 491
},
{
"epoch": 2.37,
"learning_rate": 1.493077002149116e-05,
"loss": 0.2672,
"step": 492
},
{
"epoch": 2.37,
"learning_rate": 1.4855117716355358e-05,
"loss": 0.2934,
"step": 493
},
{
"epoch": 2.38,
"learning_rate": 1.4779544159946793e-05,
"loss": 0.1987,
"step": 494
},
{
"epoch": 2.38,
"learning_rate": 1.4704050509011345e-05,
"loss": 0.2354,
"step": 495
},
{
"epoch": 2.39,
"learning_rate": 1.462863791907184e-05,
"loss": 0.176,
"step": 496
},
{
"epoch": 2.39,
"learning_rate": 1.4553307544410373e-05,
"loss": 0.1183,
"step": 497
},
{
"epoch": 2.4,
"learning_rate": 1.4478060538050622e-05,
"loss": 0.0902,
"step": 498
},
{
"epoch": 2.4,
"learning_rate": 1.4402898051740227e-05,
"loss": 0.1498,
"step": 499
},
{
"epoch": 2.41,
"learning_rate": 1.4327821235933126e-05,
"loss": 0.2515,
"step": 500
},
{
"epoch": 2.41,
"learning_rate": 1.4252831239771974e-05,
"loss": 0.2061,
"step": 501
},
{
"epoch": 2.42,
"learning_rate": 1.4177929211070556e-05,
"loss": 0.1439,
"step": 502
},
{
"epoch": 2.42,
"learning_rate": 1.4103116296296185e-05,
"loss": 0.1817,
"step": 503
},
{
"epoch": 2.43,
"learning_rate": 1.4028393640552195e-05,
"loss": 0.1519,
"step": 504
},
{
"epoch": 2.43,
"learning_rate": 1.3953762387560392e-05,
"loss": 0.1722,
"step": 505
},
{
"epoch": 2.44,
"learning_rate": 1.3879223679643535e-05,
"loss": 0.2072,
"step": 506
},
{
"epoch": 2.44,
"learning_rate": 1.380477865770788e-05,
"loss": 0.1248,
"step": 507
},
{
"epoch": 2.45,
"learning_rate": 1.3730428461225716e-05,
"loss": 0.2491,
"step": 508
},
{
"epoch": 2.45,
"learning_rate": 1.3656174228217883e-05,
"loss": 0.1744,
"step": 509
},
{
"epoch": 2.46,
"learning_rate": 1.3582017095236413e-05,
"loss": 0.1792,
"step": 510
},
{
"epoch": 2.46,
"learning_rate": 1.3507958197347078e-05,
"loss": 0.1393,
"step": 511
},
{
"epoch": 2.47,
"learning_rate": 1.3433998668112062e-05,
"loss": 0.2205,
"step": 512
},
{
"epoch": 2.47,
"learning_rate": 1.3360139639572575e-05,
"loss": 0.2203,
"step": 513
},
{
"epoch": 2.47,
"learning_rate": 1.3286382242231546e-05,
"loss": 0.1297,
"step": 514
},
{
"epoch": 2.48,
"learning_rate": 1.3212727605036319e-05,
"loss": 0.1953,
"step": 515
},
{
"epoch": 2.48,
"learning_rate": 1.3139176855361357e-05,
"loss": 0.2554,
"step": 516
},
{
"epoch": 2.49,
"learning_rate": 1.306573111899101e-05,
"loss": 0.1167,
"step": 517
},
{
"epoch": 2.49,
"learning_rate": 1.2992391520102256e-05,
"loss": 0.1432,
"step": 518
},
{
"epoch": 2.5,
"learning_rate": 1.2919159181247527e-05,
"loss": 0.2211,
"step": 519
},
{
"epoch": 2.5,
"learning_rate": 1.284603522333749e-05,
"loss": 0.2003,
"step": 520
},
{
"epoch": 2.51,
"learning_rate": 1.2773020765623926e-05,
"loss": 0.1954,
"step": 521
},
{
"epoch": 2.51,
"learning_rate": 1.2700116925682566e-05,
"loss": 0.2775,
"step": 522
},
{
"epoch": 2.52,
"learning_rate": 1.2627324819396008e-05,
"loss": 0.1936,
"step": 523
},
{
"epoch": 2.52,
"learning_rate": 1.2554645560936634e-05,
"loss": 0.2279,
"step": 524
},
{
"epoch": 2.53,
"learning_rate": 1.2482080262749538e-05,
"loss": 0.1451,
"step": 525
},
{
"epoch": 2.53,
"learning_rate": 1.2409630035535534e-05,
"loss": 0.1396,
"step": 526
},
{
"epoch": 2.54,
"learning_rate": 1.2337295988234115e-05,
"loss": 0.1255,
"step": 527
},
{
"epoch": 2.54,
"learning_rate": 1.2265079228006499e-05,
"loss": 0.2538,
"step": 528
},
{
"epoch": 2.55,
"learning_rate": 1.2192980860218686e-05,
"loss": 0.1268,
"step": 529
},
{
"epoch": 2.55,
"learning_rate": 1.2121001988424541e-05,
"loss": 0.2127,
"step": 530
},
{
"epoch": 2.56,
"learning_rate": 1.2049143714348886e-05,
"loss": 0.2451,
"step": 531
},
{
"epoch": 2.56,
"learning_rate": 1.1977407137870657e-05,
"loss": 0.2047,
"step": 532
},
{
"epoch": 2.57,
"learning_rate": 1.190579335700605e-05,
"loss": 0.2452,
"step": 533
},
{
"epoch": 2.57,
"learning_rate": 1.183430346789173e-05,
"loss": 0.1794,
"step": 534
},
{
"epoch": 2.58,
"learning_rate": 1.176293856476804e-05,
"loss": 0.1501,
"step": 535
},
{
"epoch": 2.58,
"learning_rate": 1.1691699739962275e-05,
"loss": 0.2022,
"step": 536
},
{
"epoch": 2.59,
"learning_rate": 1.1620588083871919e-05,
"loss": 0.274,
"step": 537
},
{
"epoch": 2.59,
"learning_rate": 1.1549604684948017e-05,
"loss": 0.2059,
"step": 538
},
{
"epoch": 2.6,
"learning_rate": 1.1478750629678453e-05,
"loss": 0.1917,
"step": 539
},
{
"epoch": 2.6,
"learning_rate": 1.1408027002571359e-05,
"loss": 0.1058,
"step": 540
},
{
"epoch": 2.6,
"learning_rate": 1.1337434886138499e-05,
"loss": 0.1933,
"step": 541
},
{
"epoch": 2.61,
"learning_rate": 1.1266975360878723e-05,
"loss": 0.1301,
"step": 542
},
{
"epoch": 2.61,
"learning_rate": 1.119664950526139e-05,
"loss": 0.1618,
"step": 543
},
{
"epoch": 2.62,
"learning_rate": 1.1126458395709889e-05,
"loss": 0.1565,
"step": 544
},
{
"epoch": 2.62,
"learning_rate": 1.1056403106585156e-05,
"loss": 0.2198,
"step": 545
},
{
"epoch": 2.63,
"learning_rate": 1.0986484710169218e-05,
"loss": 0.2848,
"step": 546
},
{
"epoch": 2.63,
"learning_rate": 1.0916704276648815e-05,
"loss": 0.2031,
"step": 547
},
{
"epoch": 2.64,
"learning_rate": 1.0847062874098981e-05,
"loss": 0.2038,
"step": 548
},
{
"epoch": 2.64,
"learning_rate": 1.0777561568466708e-05,
"loss": 0.1408,
"step": 549
},
{
"epoch": 2.65,
"learning_rate": 1.0708201423554634e-05,
"loss": 0.1861,
"step": 550
},
{
"epoch": 2.65,
"learning_rate": 1.0638983501004767e-05,
"loss": 0.1512,
"step": 551
},
{
"epoch": 2.66,
"learning_rate": 1.0569908860282218e-05,
"loss": 0.271,
"step": 552
},
{
"epoch": 2.66,
"learning_rate": 1.0500978558659001e-05,
"loss": 0.1945,
"step": 553
},
{
"epoch": 2.67,
"learning_rate": 1.0432193651197834e-05,
"loss": 0.2549,
"step": 554
},
{
"epoch": 2.67,
"learning_rate": 1.036355519073602e-05,
"loss": 0.1765,
"step": 555
},
{
"epoch": 2.68,
"learning_rate": 1.0295064227869292e-05,
"loss": 0.2626,
"step": 556
},
{
"epoch": 2.68,
"learning_rate": 1.022672181093575e-05,
"loss": 0.2532,
"step": 557
},
{
"epoch": 2.69,
"learning_rate": 1.0158528985999832e-05,
"loss": 0.1611,
"step": 558
},
{
"epoch": 2.69,
"learning_rate": 1.0090486796836263e-05,
"loss": 0.249,
"step": 559
},
{
"epoch": 2.7,
"learning_rate": 1.0022596284914138e-05,
"loss": 0.1412,
"step": 560
},
{
"epoch": 2.7,
"learning_rate": 9.95485848938092e-06,
"loss": 0.1555,
"step": 561
},
{
"epoch": 2.71,
"learning_rate": 9.887274447046564e-06,
"loss": 0.192,
"step": 562
},
{
"epoch": 2.71,
"learning_rate": 9.819845192367644e-06,
"loss": 0.226,
"step": 563
},
{
"epoch": 2.72,
"learning_rate": 9.752571757431528e-06,
"loss": 0.1235,
"step": 564
},
{
"epoch": 2.72,
"learning_rate": 9.685455171940567e-06,
"loss": 0.178,
"step": 565
},
{
"epoch": 2.73,
"learning_rate": 9.618496463196337e-06,
"loss": 0.1961,
"step": 566
},
{
"epoch": 2.73,
"learning_rate": 9.551696656083912e-06,
"loss": 0.1967,
"step": 567
},
{
"epoch": 2.73,
"learning_rate": 9.485056773056188e-06,
"loss": 0.2143,
"step": 568
},
{
"epoch": 2.74,
"learning_rate": 9.418577834118236e-06,
"loss": 0.2471,
"step": 569
},
{
"epoch": 2.74,
"learning_rate": 9.352260856811667e-06,
"loss": 0.1866,
"step": 570
},
{
"epoch": 2.75,
"learning_rate": 9.286106856199076e-06,
"loss": 0.1961,
"step": 571
},
{
"epoch": 2.75,
"learning_rate": 9.220116844848497e-06,
"loss": 0.1566,
"step": 572
},
{
"epoch": 2.76,
"learning_rate": 9.15429183281791e-06,
"loss": 0.1923,
"step": 573
},
{
"epoch": 2.76,
"learning_rate": 9.088632827639776e-06,
"loss": 0.1757,
"step": 574
},
{
"epoch": 2.77,
"learning_rate": 9.023140834305621e-06,
"loss": 0.1966,
"step": 575
},
{
"epoch": 2.77,
"learning_rate": 8.957816855250645e-06,
"loss": 0.2316,
"step": 576
},
{
"epoch": 2.78,
"learning_rate": 8.892661890338395e-06,
"loss": 0.1888,
"step": 577
},
{
"epoch": 2.78,
"learning_rate": 8.82767693684544e-06,
"loss": 0.1814,
"step": 578
},
{
"epoch": 2.79,
"learning_rate": 8.762862989446113e-06,
"loss": 0.2633,
"step": 579
},
{
"epoch": 2.79,
"learning_rate": 8.698221040197288e-06,
"loss": 0.1614,
"step": 580
},
{
"epoch": 2.8,
"learning_rate": 8.63375207852322e-06,
"loss": 0.1122,
"step": 581
},
{
"epoch": 2.8,
"learning_rate": 8.569457091200348e-06,
"loss": 0.1046,
"step": 582
},
{
"epoch": 2.81,
"learning_rate": 8.505337062342234e-06,
"loss": 0.1475,
"step": 583
},
{
"epoch": 2.81,
"learning_rate": 8.441392973384483e-06,
"loss": 0.2702,
"step": 584
},
{
"epoch": 2.82,
"learning_rate": 8.37762580306972e-06,
"loss": 0.1265,
"step": 585
},
{
"epoch": 2.82,
"learning_rate": 8.314036527432631e-06,
"loss": 0.2168,
"step": 586
},
{
"epoch": 2.83,
"learning_rate": 8.250626119784988e-06,
"loss": 0.2676,
"step": 587
},
{
"epoch": 2.83,
"learning_rate": 8.187395550700777e-06,
"loss": 0.2056,
"step": 588
},
{
"epoch": 2.84,
"learning_rate": 8.124345788001328e-06,
"loss": 0.1343,
"step": 589
},
{
"epoch": 2.84,
"learning_rate": 8.061477796740511e-06,
"loss": 0.2392,
"step": 590
},
{
"epoch": 2.85,
"learning_rate": 7.998792539189973e-06,
"loss": 0.2062,
"step": 591
},
{
"epoch": 2.85,
"learning_rate": 7.936290974824381e-06,
"loss": 0.1645,
"step": 592
},
{
"epoch": 2.86,
"learning_rate": 7.873974060306757e-06,
"loss": 0.2342,
"step": 593
},
{
"epoch": 2.86,
"learning_rate": 7.811842749473837e-06,
"loss": 0.2206,
"step": 594
},
{
"epoch": 2.86,
"learning_rate": 7.74989799332146e-06,
"loss": 0.1834,
"step": 595
},
{
"epoch": 2.87,
"learning_rate": 7.688140739990015e-06,
"loss": 0.1885,
"step": 596
},
{
"epoch": 2.87,
"learning_rate": 7.6265719347499376e-06,
"loss": 0.1864,
"step": 597
},
{
"epoch": 2.88,
"learning_rate": 7.565192519987223e-06,
"loss": 0.2092,
"step": 598
},
{
"epoch": 2.88,
"learning_rate": 7.5040034351890335e-06,
"loss": 0.2321,
"step": 599
},
{
"epoch": 2.89,
"learning_rate": 7.443005616929277e-06,
"loss": 0.2588,
"step": 600
},
{
"epoch": 2.89,
"learning_rate": 7.382199998854304e-06,
"loss": 0.1922,
"step": 601
},
{
"epoch": 2.9,
"learning_rate": 7.321587511668595e-06,
"loss": 0.1921,
"step": 602
},
{
"epoch": 2.9,
"learning_rate": 7.261169083120545e-06,
"loss": 0.1381,
"step": 603
},
{
"epoch": 2.91,
"learning_rate": 7.2009456379882285e-06,
"loss": 0.2228,
"step": 604
},
{
"epoch": 2.91,
"learning_rate": 7.1409180980652596e-06,
"loss": 0.1796,
"step": 605
},
{
"epoch": 2.92,
"learning_rate": 7.0810873821466875e-06,
"loss": 0.2165,
"step": 606
},
{
"epoch": 2.92,
"learning_rate": 7.021454406014925e-06,
"loss": 0.1416,
"step": 607
},
{
"epoch": 2.93,
"learning_rate": 6.962020082425749e-06,
"loss": 0.3069,
"step": 608
},
{
"epoch": 2.93,
"learning_rate": 6.902785321094301e-06,
"loss": 0.2353,
"step": 609
},
{
"epoch": 2.94,
"learning_rate": 6.843751028681178e-06,
"loss": 0.2902,
"step": 610
},
{
"epoch": 2.94,
"learning_rate": 6.784918108778562e-06,
"loss": 0.1831,
"step": 611
},
{
"epoch": 2.95,
"learning_rate": 6.7262874618963705e-06,
"loss": 0.232,
"step": 612
},
{
"epoch": 2.95,
"learning_rate": 6.667859985448506e-06,
"loss": 0.1839,
"step": 613
},
{
"epoch": 2.96,
"learning_rate": 6.60963657373908e-06,
"loss": 0.1795,
"step": 614
},
{
"epoch": 2.96,
"learning_rate": 6.551618117948746e-06,
"loss": 0.2648,
"step": 615
},
{
"epoch": 2.97,
"learning_rate": 6.493805506121051e-06,
"loss": 0.2029,
"step": 616
},
{
"epoch": 2.97,
"learning_rate": 6.436199623148877e-06,
"loss": 0.1655,
"step": 617
},
{
"epoch": 2.98,
"learning_rate": 6.3788013507608285e-06,
"loss": 0.2168,
"step": 618
},
{
"epoch": 2.98,
"learning_rate": 6.321611567507795e-06,
"loss": 0.1731,
"step": 619
},
{
"epoch": 2.99,
"learning_rate": 6.2646311487494785e-06,
"loss": 0.1859,
"step": 620
},
{
"epoch": 2.99,
"learning_rate": 6.207860966641015e-06,
"loss": 0.1892,
"step": 621
},
{
"epoch": 2.99,
"learning_rate": 6.151301890119598e-06,
"loss": 0.2486,
"step": 622
},
{
"epoch": 3.0,
"learning_rate": 6.094954784891192e-06,
"loss": 0.2159,
"step": 623
},
{
"epoch": 3.0,
"learning_rate": 6.038820513417274e-06,
"loss": 0.1465,
"step": 624
},
{
"epoch": 3.01,
"learning_rate": 5.982899934901667e-06,
"loss": 0.1306,
"step": 625
},
{
"epoch": 3.01,
"learning_rate": 5.927193905277333e-06,
"loss": 0.2354,
"step": 626
},
{
"epoch": 3.02,
"learning_rate": 5.87170327719331e-06,
"loss": 0.1698,
"step": 627
},
{
"epoch": 3.02,
"learning_rate": 5.816428900001656e-06,
"loss": 0.1054,
"step": 628
},
{
"epoch": 3.03,
"learning_rate": 5.761371619744431e-06,
"loss": 0.1032,
"step": 629
},
{
"epoch": 3.03,
"learning_rate": 5.706532279140782e-06,
"loss": 0.1025,
"step": 630
},
{
"epoch": 3.04,
"learning_rate": 5.651911717574004e-06,
"loss": 0.1438,
"step": 631
},
{
"epoch": 3.04,
"learning_rate": 5.597510771078716e-06,
"loss": 0.0904,
"step": 632
},
{
"epoch": 3.05,
"learning_rate": 5.5433302723280555e-06,
"loss": 0.0988,
"step": 633
},
{
"epoch": 3.05,
"learning_rate": 5.489371050620955e-06,
"loss": 0.1751,
"step": 634
},
{
"epoch": 3.06,
"learning_rate": 5.43563393186941e-06,
"loss": 0.139,
"step": 635
},
{
"epoch": 3.06,
"learning_rate": 5.382119738585865e-06,
"loss": 0.1487,
"step": 636
},
{
"epoch": 3.07,
"learning_rate": 5.328829289870621e-06,
"loss": 0.1103,
"step": 637
},
{
"epoch": 3.07,
"learning_rate": 5.275763401399279e-06,
"loss": 0.1027,
"step": 638
},
{
"epoch": 3.08,
"learning_rate": 5.222922885410304e-06,
"loss": 0.1083,
"step": 639
},
{
"epoch": 3.08,
"learning_rate": 5.1703085506925225e-06,
"loss": 0.1774,
"step": 640
},
{
"epoch": 3.09,
"learning_rate": 5.1179212025727935e-06,
"loss": 0.1255,
"step": 641
},
{
"epoch": 3.09,
"learning_rate": 5.065761642903666e-06,
"loss": 0.137,
"step": 642
},
{
"epoch": 3.1,
"learning_rate": 5.013830670051123e-06,
"loss": 0.1599,
"step": 643
},
{
"epoch": 3.1,
"learning_rate": 4.962129078882327e-06,
"loss": 0.0948,
"step": 644
},
{
"epoch": 3.11,
"learning_rate": 4.910657660753482e-06,
"loss": 0.1396,
"step": 645
},
{
"epoch": 3.11,
"learning_rate": 4.859417203497698e-06,
"loss": 0.1374,
"step": 646
},
{
"epoch": 3.12,
"learning_rate": 4.808408491412973e-06,
"loss": 0.0805,
"step": 647
},
{
"epoch": 3.12,
"learning_rate": 4.757632305250135e-06,
"loss": 0.0651,
"step": 648
},
{
"epoch": 3.12,
"learning_rate": 4.7070894222009325e-06,
"loss": 0.1521,
"step": 649
},
{
"epoch": 3.13,
"learning_rate": 4.6567806158861164e-06,
"loss": 0.1367,
"step": 650
},
{
"epoch": 3.13,
"learning_rate": 4.606706656343607e-06,
"loss": 0.132,
"step": 651
},
{
"epoch": 3.14,
"learning_rate": 4.556868310016715e-06,
"loss": 0.1112,
"step": 652
},
{
"epoch": 3.14,
"learning_rate": 4.507266339742396e-06,
"loss": 0.1835,
"step": 653
},
{
"epoch": 3.15,
"learning_rate": 4.457901504739579e-06,
"loss": 0.1406,
"step": 654
},
{
"epoch": 3.15,
"learning_rate": 4.408774560597544e-06,
"loss": 0.1474,
"step": 655
},
{
"epoch": 3.16,
"learning_rate": 4.3598862592643765e-06,
"loss": 0.181,
"step": 656
},
{
"epoch": 3.16,
"learning_rate": 4.311237349035422e-06,
"loss": 0.1478,
"step": 657
},
{
"epoch": 3.17,
"learning_rate": 4.262828574541862e-06,
"loss": 0.0812,
"step": 658
},
{
"epoch": 3.17,
"learning_rate": 4.214660676739304e-06,
"loss": 0.1407,
"step": 659
},
{
"epoch": 3.18,
"learning_rate": 4.166734392896438e-06,
"loss": 0.1228,
"step": 660
},
{
"epoch": 3.18,
"learning_rate": 4.119050456583775e-06,
"loss": 0.1246,
"step": 661
},
{
"epoch": 3.19,
"learning_rate": 4.071609597662371e-06,
"loss": 0.0836,
"step": 662
},
{
"epoch": 3.19,
"learning_rate": 4.024412542272706e-06,
"loss": 0.1724,
"step": 663
},
{
"epoch": 3.2,
"learning_rate": 3.977460012823538e-06,
"loss": 0.0742,
"step": 664
},
{
"epoch": 3.2,
"learning_rate": 3.9307527279808665e-06,
"loss": 0.115,
"step": 665
},
{
"epoch": 3.21,
"learning_rate": 3.884291402656912e-06,
"loss": 0.1291,
"step": 666
},
{
"epoch": 3.21,
"learning_rate": 3.8380767479991865e-06,
"loss": 0.0751,
"step": 667
},
{
"epoch": 3.22,
"learning_rate": 3.7921094713795993e-06,
"loss": 0.1531,
"step": 668
},
{
"epoch": 3.22,
"learning_rate": 3.7463902763836514e-06,
"loss": 0.147,
"step": 669
},
{
"epoch": 3.23,
"learning_rate": 3.700919862799639e-06,
"loss": 0.142,
"step": 670
},
{
"epoch": 3.23,
"learning_rate": 3.6556989266079512e-06,
"loss": 0.1227,
"step": 671
},
{
"epoch": 3.24,
"learning_rate": 3.6107281599704268e-06,
"loss": 0.0678,
"step": 672
},
{
"epoch": 3.24,
"learning_rate": 3.566008251219746e-06,
"loss": 0.1249,
"step": 673
},
{
"epoch": 3.25,
"learning_rate": 3.5215398848489167e-06,
"loss": 0.0824,
"step": 674
},
{
"epoch": 3.25,
"learning_rate": 3.4773237415007644e-06,
"loss": 0.1161,
"step": 675
},
{
"epoch": 3.25,
"learning_rate": 3.433360497957541e-06,
"loss": 0.1366,
"step": 676
},
{
"epoch": 3.26,
"learning_rate": 3.3896508271305527e-06,
"loss": 0.1228,
"step": 677
},
{
"epoch": 3.26,
"learning_rate": 3.346195398049876e-06,
"loss": 0.0878,
"step": 678
},
{
"epoch": 3.27,
"learning_rate": 3.302994875854093e-06,
"loss": 0.1498,
"step": 679
},
{
"epoch": 3.27,
"learning_rate": 3.2600499217801307e-06,
"loss": 0.1358,
"step": 680
},
{
"epoch": 3.28,
"learning_rate": 3.217361193153126e-06,
"loss": 0.1506,
"step": 681
},
{
"epoch": 3.28,
"learning_rate": 3.174929343376374e-06,
"loss": 0.0924,
"step": 682
},
{
"epoch": 3.29,
"learning_rate": 3.1327550219213365e-06,
"loss": 0.0812,
"step": 683
},
{
"epoch": 3.29,
"learning_rate": 3.0908388743176766e-06,
"loss": 0.12,
"step": 684
},
{
"epoch": 3.3,
"learning_rate": 3.0491815421433825e-06,
"loss": 0.1527,
"step": 685
},
{
"epoch": 3.3,
"learning_rate": 3.007783663014965e-06,
"loss": 0.131,
"step": 686
},
{
"epoch": 3.31,
"learning_rate": 2.9666458705777e-06,
"loss": 0.078,
"step": 687
},
{
"epoch": 3.31,
"learning_rate": 2.925768794495898e-06,
"loss": 0.1267,
"step": 688
},
{
"epoch": 3.32,
"learning_rate": 2.8851530604432933e-06,
"loss": 0.0991,
"step": 689
},
{
"epoch": 3.32,
"learning_rate": 2.8447992900934583e-06,
"loss": 0.2192,
"step": 690
},
{
"epoch": 3.33,
"learning_rate": 2.804708101110303e-06,
"loss": 0.1544,
"step": 691
},
{
"epoch": 3.33,
"learning_rate": 2.76488010713859e-06,
"loss": 0.148,
"step": 692
},
{
"epoch": 3.34,
"learning_rate": 2.7253159177945686e-06,
"loss": 0.1251,
"step": 693
},
{
"epoch": 3.34,
"learning_rate": 2.6860161386566353e-06,
"loss": 0.1152,
"step": 694
},
{
"epoch": 3.35,
"learning_rate": 2.6469813712560544e-06,
"loss": 0.0976,
"step": 695
},
{
"epoch": 3.35,
"learning_rate": 2.60821221306778e-06,
"loss": 0.154,
"step": 696
},
{
"epoch": 3.36,
"learning_rate": 2.5697092575012785e-06,
"loss": 0.1086,
"step": 697
},
{
"epoch": 3.36,
"learning_rate": 2.531473093891459e-06,
"loss": 0.151,
"step": 698
},
{
"epoch": 3.37,
"learning_rate": 2.4935043074896515e-06,
"loss": 0.1462,
"step": 699
},
{
"epoch": 3.37,
"learning_rate": 2.455803479454664e-06,
"loss": 0.1652,
"step": 700
},
{
"epoch": 3.38,
"learning_rate": 2.4183711868438577e-06,
"loss": 0.1231,
"step": 701
},
{
"epoch": 3.38,
"learning_rate": 2.3812080026043336e-06,
"loss": 0.1214,
"step": 702
},
{
"epoch": 3.38,
"learning_rate": 2.344314495564166e-06,
"loss": 0.1641,
"step": 703
},
{
"epoch": 3.39,
"learning_rate": 2.3076912304236788e-06,
"loss": 0.0408,
"step": 704
},
{
"epoch": 3.39,
"learning_rate": 2.2713387677468267e-06,
"loss": 0.1112,
"step": 705
},
{
"epoch": 3.4,
"learning_rate": 2.2352576639525926e-06,
"loss": 0.0829,
"step": 706
},
{
"epoch": 3.4,
"learning_rate": 2.199448471306467e-06,
"loss": 0.1211,
"step": 707
},
{
"epoch": 3.41,
"learning_rate": 2.1639117379120324e-06,
"loss": 0.125,
"step": 708
},
{
"epoch": 3.41,
"learning_rate": 2.1286480077025296e-06,
"loss": 0.1531,
"step": 709
},
{
"epoch": 3.42,
"learning_rate": 2.0936578204325575e-06,
"loss": 0.0729,
"step": 710
},
{
"epoch": 3.42,
"learning_rate": 2.0589417116698062e-06,
"loss": 0.1757,
"step": 711
},
{
"epoch": 3.43,
"learning_rate": 2.024500212786853e-06,
"loss": 0.1177,
"step": 712
},
{
"epoch": 3.43,
"learning_rate": 1.9903338509530455e-06,
"loss": 0.1055,
"step": 713
},
{
"epoch": 3.44,
"learning_rate": 1.9564431491264126e-06,
"loss": 0.1398,
"step": 714
},
{
"epoch": 3.44,
"learning_rate": 1.9228286260456673e-06,
"loss": 0.1022,
"step": 715
},
{
"epoch": 3.45,
"learning_rate": 1.8894907962222754e-06,
"loss": 0.1305,
"step": 716
},
{
"epoch": 3.45,
"learning_rate": 1.8564301699325682e-06,
"loss": 0.1074,
"step": 717
},
{
"epoch": 3.46,
"learning_rate": 1.8236472532099413e-06,
"loss": 0.1054,
"step": 718
},
{
"epoch": 3.46,
"learning_rate": 1.7911425478371059e-06,
"loss": 0.1758,
"step": 719
},
{
"epoch": 3.47,
"learning_rate": 1.7589165513383988e-06,
"loss": 0.211,
"step": 720
},
{
"epoch": 3.47,
"learning_rate": 1.7269697569721833e-06,
"loss": 0.1356,
"step": 721
},
{
"epoch": 3.48,
"learning_rate": 1.6953026537232942e-06,
"loss": 0.0777,
"step": 722
},
{
"epoch": 3.48,
"learning_rate": 1.6639157262955463e-06,
"loss": 0.121,
"step": 723
},
{
"epoch": 3.49,
"learning_rate": 1.6328094551043229e-06,
"loss": 0.0861,
"step": 724
},
{
"epoch": 3.49,
"learning_rate": 1.601984316269214e-06,
"loss": 0.1478,
"step": 725
},
{
"epoch": 3.5,
"learning_rate": 1.5714407816067368e-06,
"loss": 0.1783,
"step": 726
},
{
"epoch": 3.5,
"learning_rate": 1.5411793186231229e-06,
"loss": 0.1273,
"step": 727
},
{
"epoch": 3.51,
"learning_rate": 1.5112003905071393e-06,
"loss": 0.174,
"step": 728
},
{
"epoch": 3.51,
"learning_rate": 1.481504456123004e-06,
"loss": 0.0906,
"step": 729
},
{
"epoch": 3.51,
"learning_rate": 1.4520919700033864e-06,
"loss": 0.153,
"step": 730
},
{
"epoch": 3.52,
"learning_rate": 1.4229633823424261e-06,
"loss": 0.1374,
"step": 731
},
{
"epoch": 3.52,
"learning_rate": 1.394119138988843e-06,
"loss": 0.1047,
"step": 732
},
{
"epoch": 3.53,
"learning_rate": 1.3655596814391259e-06,
"loss": 0.1373,
"step": 733
},
{
"epoch": 3.53,
"learning_rate": 1.3372854468307627e-06,
"loss": 0.1155,
"step": 734
},
{
"epoch": 3.54,
"learning_rate": 1.3092968679355634e-06,
"loss": 0.1288,
"step": 735
},
{
"epoch": 3.54,
"learning_rate": 1.281594373153019e-06,
"loss": 0.1205,
"step": 736
},
{
"epoch": 3.55,
"learning_rate": 1.254178386503755e-06,
"loss": 0.1281,
"step": 737
},
{
"epoch": 3.55,
"learning_rate": 1.22704932762304e-06,
"loss": 0.123,
"step": 738
},
{
"epoch": 3.56,
"learning_rate": 1.200207611754356e-06,
"loss": 0.1312,
"step": 739
},
{
"epoch": 3.56,
"learning_rate": 1.1736536497430584e-06,
"loss": 0.1619,
"step": 740
},
{
"epoch": 3.57,
"learning_rate": 1.1473878480300726e-06,
"loss": 0.145,
"step": 741
},
{
"epoch": 3.57,
"learning_rate": 1.1214106086456722e-06,
"loss": 0.1658,
"step": 742
},
{
"epoch": 3.58,
"learning_rate": 1.0957223292033349e-06,
"loss": 0.0794,
"step": 743
},
{
"epoch": 3.58,
"learning_rate": 1.070323402893656e-06,
"loss": 0.1179,
"step": 744
},
{
"epoch": 3.59,
"learning_rate": 1.0452142184783232e-06,
"loss": 0.1105,
"step": 745
},
{
"epoch": 3.59,
"learning_rate": 1.0203951602841688e-06,
"loss": 0.1284,
"step": 746
},
{
"epoch": 3.6,
"learning_rate": 9.958666081972867e-07,
"loss": 0.1693,
"step": 747
},
{
"epoch": 3.6,
"learning_rate": 9.716289376572207e-07,
"loss": 0.1762,
"step": 748
},
{
"epoch": 3.61,
"learning_rate": 9.476825196512207e-07,
"loss": 0.0992,
"step": 749
},
{
"epoch": 3.61,
"learning_rate": 9.240277207085557e-07,
"loss": 0.1589,
"step": 750
},
{
"epoch": 3.62,
"learning_rate": 9.006649028948966e-07,
"loss": 0.0858,
"step": 751
},
{
"epoch": 3.62,
"learning_rate": 8.775944238068046e-07,
"loss": 0.1141,
"step": 752
},
{
"epoch": 3.63,
"learning_rate": 8.548166365662314e-07,
"loss": 0.1144,
"step": 753
},
{
"epoch": 3.63,
"learning_rate": 8.323318898151144e-07,
"loss": 0.1336,
"step": 754
},
{
"epoch": 3.64,
"learning_rate": 8.101405277100549e-07,
"loss": 0.1304,
"step": 755
},
{
"epoch": 3.64,
"learning_rate": 7.882428899170369e-07,
"loss": 0.146,
"step": 756
},
{
"epoch": 3.64,
"learning_rate": 7.666393116062432e-07,
"loss": 0.1025,
"step": 757
},
{
"epoch": 3.65,
"learning_rate": 7.453301234469101e-07,
"loss": 0.0893,
"step": 758
},
{
"epoch": 3.65,
"learning_rate": 7.243156516022675e-07,
"loss": 0.0994,
"step": 759
},
{
"epoch": 3.66,
"learning_rate": 7.035962177245536e-07,
"loss": 0.0949,
"step": 760
},
{
"epoch": 3.66,
"learning_rate": 6.831721389500989e-07,
"loss": 0.1267,
"step": 761
},
{
"epoch": 3.67,
"learning_rate": 6.630437278944501e-07,
"loss": 0.1325,
"step": 762
},
{
"epoch": 3.67,
"learning_rate": 6.432112926476009e-07,
"loss": 0.1182,
"step": 763
},
{
"epoch": 3.68,
"learning_rate": 6.236751367692706e-07,
"loss": 0.1319,
"step": 764
},
{
"epoch": 3.68,
"learning_rate": 6.044355592842644e-07,
"loss": 0.1414,
"step": 765
},
{
"epoch": 3.69,
"learning_rate": 5.854928546778915e-07,
"loss": 0.0943,
"step": 766
},
{
"epoch": 3.69,
"learning_rate": 5.668473128914542e-07,
"loss": 0.0809,
"step": 767
},
{
"epoch": 3.7,
"learning_rate": 5.484992193178152e-07,
"loss": 0.0972,
"step": 768
},
{
"epoch": 3.7,
"learning_rate": 5.304488547970254e-07,
"loss": 0.1651,
"step": 769
},
{
"epoch": 3.71,
"learning_rate": 5.126964956120351e-07,
"loss": 0.1,
"step": 770
},
{
"epoch": 3.71,
"learning_rate": 4.952424134844491e-07,
"loss": 0.1077,
"step": 771
},
{
"epoch": 3.72,
"learning_rate": 4.78086875570385e-07,
"loss": 0.1625,
"step": 772
},
{
"epoch": 3.72,
"learning_rate": 4.6123014445636605e-07,
"loss": 0.1763,
"step": 773
},
{
"epoch": 3.73,
"learning_rate": 4.446724781553191e-07,
"loss": 0.1455,
"step": 774
},
{
"epoch": 3.73,
"learning_rate": 4.2841413010261456e-07,
"loss": 0.1195,
"step": 775
},
{
"epoch": 3.74,
"learning_rate": 4.1245534915218767e-07,
"loss": 0.1473,
"step": 776
},
{
"epoch": 3.74,
"learning_rate": 3.96796379572737e-07,
"loss": 0.1436,
"step": 777
},
{
"epoch": 3.75,
"learning_rate": 3.8143746104397637e-07,
"loss": 0.1168,
"step": 778
},
{
"epoch": 3.75,
"learning_rate": 3.663788286529757e-07,
"loss": 0.1102,
"step": 779
},
{
"epoch": 3.76,
"learning_rate": 3.5162071289055245e-07,
"loss": 0.1873,
"step": 780
},
{
"epoch": 3.76,
"learning_rate": 3.371633396477525e-07,
"loss": 0.1206,
"step": 781
},
{
"epoch": 3.77,
"learning_rate": 3.2300693021238394e-07,
"loss": 0.1399,
"step": 782
},
{
"epoch": 3.77,
"learning_rate": 3.0915170126564643e-07,
"loss": 0.1813,
"step": 783
},
{
"epoch": 3.77,
"learning_rate": 2.9559786487878716e-07,
"loss": 0.191,
"step": 784
},
{
"epoch": 3.78,
"learning_rate": 2.8234562850988356e-07,
"loss": 0.1644,
"step": 785
},
{
"epoch": 3.78,
"learning_rate": 2.6939519500065015e-07,
"loss": 0.116,
"step": 786
},
{
"epoch": 3.79,
"learning_rate": 2.5674676257333884e-07,
"loss": 0.1104,
"step": 787
},
{
"epoch": 3.79,
"learning_rate": 2.444005248277059e-07,
"loss": 0.158,
"step": 788
},
{
"epoch": 3.8,
"learning_rate": 2.3235667073804979e-07,
"loss": 0.1445,
"step": 789
},
{
"epoch": 3.8,
"learning_rate": 2.2061538465031117e-07,
"loss": 0.1268,
"step": 790
},
{
"epoch": 3.81,
"learning_rate": 2.0917684627926204e-07,
"loss": 0.1376,
"step": 791
},
{
"epoch": 3.81,
"learning_rate": 1.9804123070574556e-07,
"loss": 0.0521,
"step": 792
},
{
"epoch": 3.82,
"learning_rate": 1.8720870837400263e-07,
"loss": 0.2584,
"step": 793
},
{
"epoch": 3.82,
"learning_rate": 1.76679445089063e-07,
"loss": 0.1103,
"step": 794
},
{
"epoch": 3.83,
"learning_rate": 1.6645360201420046e-07,
"loss": 0.1743,
"step": 795
},
{
"epoch": 3.83,
"learning_rate": 1.5653133566847278e-07,
"loss": 0.1284,
"step": 796
},
{
"epoch": 3.84,
"learning_rate": 1.4691279792432345e-07,
"loss": 0.1151,
"step": 797
},
{
"epoch": 3.84,
"learning_rate": 1.3759813600525695e-07,
"loss": 0.1216,
"step": 798
},
{
"epoch": 3.85,
"learning_rate": 1.2858749248358948e-07,
"loss": 0.1659,
"step": 799
},
{
"epoch": 3.85,
"learning_rate": 1.198810052782595e-07,
"loss": 0.1303,
"step": 800
},
{
"epoch": 3.86,
"learning_rate": 1.114788076527229e-07,
"loss": 0.0962,
"step": 801
},
{
"epoch": 3.86,
"learning_rate": 1.0338102821290552e-07,
"loss": 0.1392,
"step": 802
},
{
"epoch": 3.87,
"learning_rate": 9.558779090524717e-08,
"loss": 0.1358,
"step": 803
},
{
"epoch": 3.87,
"learning_rate": 8.809921501479413e-08,
"loss": 0.1733,
"step": 804
},
{
"epoch": 3.88,
"learning_rate": 8.091541516337398e-08,
"loss": 0.1873,
"step": 805
},
{
"epoch": 3.88,
"learning_rate": 7.403650130784368e-08,
"loss": 0.1007,
"step": 806
},
{
"epoch": 3.89,
"learning_rate": 6.746257873841311e-08,
"loss": 0.1789,
"step": 807
},
{
"epoch": 3.89,
"learning_rate": 6.119374807701972e-08,
"loss": 0.1187,
"step": 808
},
{
"epoch": 3.9,
"learning_rate": 5.523010527579642e-08,
"loss": 0.1016,
"step": 809
},
{
"epoch": 3.9,
"learning_rate": 4.957174161560607e-08,
"loss": 0.0997,
"step": 810
},
{
"epoch": 3.9,
"learning_rate": 4.421874370463597e-08,
"loss": 0.1401,
"step": 811
},
{
"epoch": 3.91,
"learning_rate": 3.917119347707887e-08,
"loss": 0.1245,
"step": 812
},
{
"epoch": 3.91,
"learning_rate": 3.442916819187403e-08,
"loss": 0.0768,
"step": 813
},
{
"epoch": 3.92,
"learning_rate": 2.999274043153033e-08,
"loss": 0.079,
"step": 814
},
{
"epoch": 3.92,
"learning_rate": 2.5861978101009433e-08,
"loss": 0.0857,
"step": 815
},
{
"epoch": 3.93,
"learning_rate": 2.203694442669102e-08,
"loss": 0.0859,
"step": 816
},
{
"epoch": 3.93,
"learning_rate": 1.851769795540026e-08,
"loss": 0.1111,
"step": 817
},
{
"epoch": 3.94,
"learning_rate": 1.5304292553519616e-08,
"loss": 0.1393,
"step": 818
},
{
"epoch": 3.94,
"learning_rate": 1.2396777406153971e-08,
"loss": 0.1039,
"step": 819
},
{
"epoch": 3.95,
"learning_rate": 9.795197016384538e-09,
"loss": 0.1019,
"step": 820
},
{
"epoch": 3.95,
"learning_rate": 7.49959120458943e-09,
"loss": 0.175,
"step": 821
},
{
"epoch": 3.96,
"learning_rate": 5.509995107824129e-09,
"loss": 0.1495,
"step": 822
},
{
"epoch": 3.96,
"learning_rate": 3.826439179299701e-09,
"loss": 0.0748,
"step": 823
},
{
"epoch": 3.97,
"learning_rate": 2.4489491878987303e-09,
"loss": 0.1622,
"step": 824
},
{
"epoch": 3.97,
"learning_rate": 1.3775462177956222e-09,
"loss": 0.0846,
"step": 825
},
{
"epoch": 3.98,
"learning_rate": 6.122466681235395e-10,
"loss": 0.0982,
"step": 826
},
{
"epoch": 3.98,
"learning_rate": 1.530622527323722e-10,
"loss": 0.2389,
"step": 827
},
{
"epoch": 3.99,
"learning_rate": 0.0,
"loss": 0.0402,
"step": 828
},
{
"epoch": 3.99,
"step": 828,
"total_flos": 3.096773545255011e+21,
"train_loss": 0.37214213893141435,
"train_runtime": 13339.7222,
"train_samples_per_second": 2.989,
"train_steps_per_second": 0.062
}
],
"logging_steps": 1.0,
"max_steps": 828,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 200,
"total_flos": 3.096773545255011e+21,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}