aseratus1 commited on
Commit
3b83b74
·
verified ·
1 Parent(s): d058485

Training in progress, step 1650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3c5ece6319df12b42a1321ab23cc0d598cb195389f44ad963eb2a55611fa1bb
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38ff59f65b26f1e229a4f3d4c73b0fefd04c74898553389ea45285c7c03e44fb
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38d317eefa2b55406d7080b244f8c8276671d3ad992293aa7b453dbb94ceedb8
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e68c2fd3fc08f26f68cc6ea31f89eb8432170046fa27022f45200b45830bbfd
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8432027dbbc8dab44e5086e964f9cdb0667714c28870c25714f6895b3ab20859
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac19d5d69213bd6ec3ac71c1dc1e56a89e4422a2146959318340f8687a25d0a0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e81835a72dc0c964cd591ffc9309a23b625c25e3c788c837ad74942ba4e6af0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f26152095eec0fefdbf64011d027c8b7130d36103664f806cba7c1c0501a24f2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.43063661456108093,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
- "epoch": 0.5687203791469194,
5
  "eval_steps": 150,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1145,6 +1145,119 @@
1145
  "eval_samples_per_second": 21.012,
1146
  "eval_steps_per_second": 5.255,
1147
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1148
  }
1149
  ],
1150
  "logging_steps": 10,
@@ -1173,7 +1286,7 @@
1173
  "attributes": {}
1174
  }
1175
  },
1176
- "total_flos": 1.3314517974405612e+18,
1177
  "train_batch_size": 8,
1178
  "trial_name": null,
1179
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4205625653266907,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1650",
4
+ "epoch": 0.6255924170616114,
5
  "eval_steps": 150,
6
+ "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1145
  "eval_samples_per_second": 21.012,
1146
  "eval_steps_per_second": 5.255,
1147
  "step": 1500
1148
+ },
1149
+ {
1150
+ "epoch": 0.5725118483412323,
1151
+ "grad_norm": 0.7785443067550659,
1152
+ "learning_rate": 3.999261602853686e-05,
1153
+ "loss": 0.6699,
1154
+ "step": 1510
1155
+ },
1156
+ {
1157
+ "epoch": 0.576303317535545,
1158
+ "grad_norm": 0.6982720494270325,
1159
+ "learning_rate": 3.93986954553998e-05,
1160
+ "loss": 0.4564,
1161
+ "step": 1520
1162
+ },
1163
+ {
1164
+ "epoch": 0.5800947867298578,
1165
+ "grad_norm": 0.7283275127410889,
1166
+ "learning_rate": 3.880633704106066e-05,
1167
+ "loss": 0.4075,
1168
+ "step": 1530
1169
+ },
1170
+ {
1171
+ "epoch": 0.5838862559241706,
1172
+ "grad_norm": 0.7250307202339172,
1173
+ "learning_rate": 3.821562807269284e-05,
1174
+ "loss": 0.3547,
1175
+ "step": 1540
1176
+ },
1177
+ {
1178
+ "epoch": 0.5876777251184834,
1179
+ "grad_norm": 0.8150220513343811,
1180
+ "learning_rate": 3.762665559441513e-05,
1181
+ "loss": 0.2715,
1182
+ "step": 1550
1183
+ },
1184
+ {
1185
+ "epoch": 0.5914691943127962,
1186
+ "grad_norm": 0.6675704717636108,
1187
+ "learning_rate": 3.703950639446525e-05,
1188
+ "loss": 0.6556,
1189
+ "step": 1560
1190
+ },
1191
+ {
1192
+ "epoch": 0.595260663507109,
1193
+ "grad_norm": 0.7834139466285706,
1194
+ "learning_rate": 3.6454266992411084e-05,
1195
+ "loss": 0.4595,
1196
+ "step": 1570
1197
+ },
1198
+ {
1199
+ "epoch": 0.5990521327014218,
1200
+ "grad_norm": 0.6742093563079834,
1201
+ "learning_rate": 3.5871023626401605e-05,
1202
+ "loss": 0.3832,
1203
+ "step": 1580
1204
+ },
1205
+ {
1206
+ "epoch": 0.6028436018957346,
1207
+ "grad_norm": 0.6419682502746582,
1208
+ "learning_rate": 3.5289862240459255e-05,
1209
+ "loss": 0.3495,
1210
+ "step": 1590
1211
+ },
1212
+ {
1213
+ "epoch": 0.6066350710900474,
1214
+ "grad_norm": 0.6275917291641235,
1215
+ "learning_rate": 3.4710868471815586e-05,
1216
+ "loss": 0.2515,
1217
+ "step": 1600
1218
+ },
1219
+ {
1220
+ "epoch": 0.6104265402843602,
1221
+ "grad_norm": 0.7566835284233093,
1222
+ "learning_rate": 3.413412763829218e-05,
1223
+ "loss": 0.6413,
1224
+ "step": 1610
1225
+ },
1226
+ {
1227
+ "epoch": 0.614218009478673,
1228
+ "grad_norm": 0.6755979657173157,
1229
+ "learning_rate": 3.355972472572859e-05,
1230
+ "loss": 0.4296,
1231
+ "step": 1620
1232
+ },
1233
+ {
1234
+ "epoch": 0.6180094786729858,
1235
+ "grad_norm": 0.7301715612411499,
1236
+ "learning_rate": 3.298774437545924e-05,
1237
+ "loss": 0.4115,
1238
+ "step": 1630
1239
+ },
1240
+ {
1241
+ "epoch": 0.6218009478672986,
1242
+ "grad_norm": 0.6284864544868469,
1243
+ "learning_rate": 3.241827087184112e-05,
1244
+ "loss": 0.3517,
1245
+ "step": 1640
1246
+ },
1247
+ {
1248
+ "epoch": 0.6255924170616114,
1249
+ "grad_norm": 0.6837667226791382,
1250
+ "learning_rate": 3.185138812983393e-05,
1251
+ "loss": 0.2677,
1252
+ "step": 1650
1253
+ },
1254
+ {
1255
+ "epoch": 0.6255924170616114,
1256
+ "eval_loss": 0.4205625653266907,
1257
+ "eval_runtime": 211.4182,
1258
+ "eval_samples_per_second": 21.01,
1259
+ "eval_steps_per_second": 5.255,
1260
+ "step": 1650
1261
  }
1262
  ],
1263
  "logging_steps": 10,
 
1286
  "attributes": {}
1287
  }
1288
  },
1289
+ "total_flos": 1.464578120764883e+18,
1290
  "train_batch_size": 8,
1291
  "trial_name": null,
1292
  "trial_params": null