Training in progress, step 25000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 959732416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2808ef6d7c1ec76f205bfc3bf60e8896b8d012108d053710f2b9d7309d7f42b2
|
3 |
size 959732416
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1915006400
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93f4aafd5e55920f58c0febce193ec74760cdf950639df7ad73eefccd9da8ec1
|
3 |
size 1915006400
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe1ca75804c729ecd6274811b801cee592417281624e7bdb93722530ee68ca62
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:348961f7e743fe4e2fc3f96e9872ae30fee7c5dae2b7050fcbf673c342e559b9
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -326,6 +326,89 @@
|
|
326 |
"learning_rate": 1.6459835653194702e-05,
|
327 |
"loss": 0.0453,
|
328 |
"step": 20000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
}
|
330 |
],
|
331 |
"logging_steps": 500,
|
@@ -345,7 +428,7 @@
|
|
345 |
"attributes": {}
|
346 |
}
|
347 |
},
|
348 |
-
"total_flos":
|
349 |
"train_batch_size": 2,
|
350 |
"trial_name": null,
|
351 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.192520543350662,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 25000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
326 |
"learning_rate": 1.6459835653194702e-05,
|
327 |
"loss": 0.0453,
|
328 |
"step": 20000
|
329 |
+
},
|
330 |
+
{
|
331 |
+
"epoch": 3.437866845547543,
|
332 |
+
"grad_norm": 0.4968201816082001,
|
333 |
+
"learning_rate": 1.562133154452457e-05,
|
334 |
+
"loss": 0.0443,
|
335 |
+
"step": 20500
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"epoch": 3.5217172564145565,
|
339 |
+
"grad_norm": 0.4099849760532379,
|
340 |
+
"learning_rate": 1.4782827435854438e-05,
|
341 |
+
"loss": 0.0468,
|
342 |
+
"step": 21000
|
343 |
+
},
|
344 |
+
{
|
345 |
+
"epoch": 3.6055676672815697,
|
346 |
+
"grad_norm": 0.5295602083206177,
|
347 |
+
"learning_rate": 1.3944323327184303e-05,
|
348 |
+
"loss": 0.0473,
|
349 |
+
"step": 21500
|
350 |
+
},
|
351 |
+
{
|
352 |
+
"epoch": 3.689418078148583,
|
353 |
+
"grad_norm": 0.5218081474304199,
|
354 |
+
"learning_rate": 1.3105819218514171e-05,
|
355 |
+
"loss": 0.0488,
|
356 |
+
"step": 22000
|
357 |
+
},
|
358 |
+
{
|
359 |
+
"epoch": 3.7732684890155963,
|
360 |
+
"grad_norm": 0.2999401390552521,
|
361 |
+
"learning_rate": 1.226731510984404e-05,
|
362 |
+
"loss": 0.0418,
|
363 |
+
"step": 22500
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"epoch": 3.8571188998826096,
|
367 |
+
"grad_norm": 0.6840993762016296,
|
368 |
+
"learning_rate": 1.1428811001173906e-05,
|
369 |
+
"loss": 0.0431,
|
370 |
+
"step": 23000
|
371 |
+
},
|
372 |
+
{
|
373 |
+
"epoch": 3.9409693107496224,
|
374 |
+
"grad_norm": 0.33584555983543396,
|
375 |
+
"learning_rate": 1.0590306892503774e-05,
|
376 |
+
"loss": 0.0417,
|
377 |
+
"step": 23500
|
378 |
+
},
|
379 |
+
{
|
380 |
+
"epoch": 4.0,
|
381 |
+
"eval_gen_len": 100.0,
|
382 |
+
"eval_loss": 0.07122652977705002,
|
383 |
+
"eval_rouge1": 2.9034,
|
384 |
+
"eval_rouge2": 1.9182,
|
385 |
+
"eval_rougeL": 2.9223,
|
386 |
+
"eval_rougeLsum": 2.9168,
|
387 |
+
"eval_runtime": 1624.6424,
|
388 |
+
"eval_samples_per_second": 0.816,
|
389 |
+
"eval_steps_per_second": 0.408,
|
390 |
+
"step": 23852
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"epoch": 4.024819721616636,
|
394 |
+
"grad_norm": 0.2943115532398224,
|
395 |
+
"learning_rate": 9.751802783833641e-06,
|
396 |
+
"loss": 0.0393,
|
397 |
+
"step": 24000
|
398 |
+
},
|
399 |
+
{
|
400 |
+
"epoch": 4.108670132483649,
|
401 |
+
"grad_norm": 0.4018648564815521,
|
402 |
+
"learning_rate": 8.913298675163509e-06,
|
403 |
+
"loss": 0.0321,
|
404 |
+
"step": 24500
|
405 |
+
},
|
406 |
+
{
|
407 |
+
"epoch": 4.192520543350662,
|
408 |
+
"grad_norm": 0.31552907824516296,
|
409 |
+
"learning_rate": 8.074794566493377e-06,
|
410 |
+
"loss": 0.0309,
|
411 |
+
"step": 25000
|
412 |
}
|
413 |
],
|
414 |
"logging_steps": 500,
|
|
|
428 |
"attributes": {}
|
429 |
}
|
430 |
},
|
431 |
+
"total_flos": 9.02247270044192e+18,
|
432 |
"train_batch_size": 2,
|
433 |
"trial_name": null,
|
434 |
"trial_params": null
|