jeo053 commited on
Commit
554815e
·
verified ·
1 Parent(s): 82783a1

Training in progress, step 25000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18e226bfabb4fe37953d03ddbd4d68cb6331b76339a65c4652c13e68da7c732e
3
  size 959732416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2808ef6d7c1ec76f205bfc3bf60e8896b8d012108d053710f2b9d7309d7f42b2
3
  size 959732416
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f46b965df64512eadca15157f363bb6b9d956cdb921d1cf19f79f3e3de5b89f3
3
  size 1915006400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93f4aafd5e55920f58c0febce193ec74760cdf950639df7ad73eefccd9da8ec1
3
  size 1915006400
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f10ca53587facccfd8ce6c38c8ee3d38f84b1cd52bf1939f663991a4aa3ab223
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe1ca75804c729ecd6274811b801cee592417281624e7bdb93722530ee68ca62
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6a37679919f879e26f31535286e088a72dcdededa90c5c4ca97536707dfcd97
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:348961f7e743fe4e2fc3f96e9872ae30fee7c5dae2b7050fcbf673c342e559b9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.35401643468053,
5
  "eval_steps": 500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -326,6 +326,89 @@
326
  "learning_rate": 1.6459835653194702e-05,
327
  "loss": 0.0453,
328
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  }
330
  ],
331
  "logging_steps": 500,
@@ -345,7 +428,7 @@
345
  "attributes": {}
346
  }
347
  },
348
- "total_flos": 7.21801425313176e+18,
349
  "train_batch_size": 2,
350
  "trial_name": null,
351
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.192520543350662,
5
  "eval_steps": 500,
6
+ "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
326
  "learning_rate": 1.6459835653194702e-05,
327
  "loss": 0.0453,
328
  "step": 20000
329
+ },
330
+ {
331
+ "epoch": 3.437866845547543,
332
+ "grad_norm": 0.4968201816082001,
333
+ "learning_rate": 1.562133154452457e-05,
334
+ "loss": 0.0443,
335
+ "step": 20500
336
+ },
337
+ {
338
+ "epoch": 3.5217172564145565,
339
+ "grad_norm": 0.4099849760532379,
340
+ "learning_rate": 1.4782827435854438e-05,
341
+ "loss": 0.0468,
342
+ "step": 21000
343
+ },
344
+ {
345
+ "epoch": 3.6055676672815697,
346
+ "grad_norm": 0.5295602083206177,
347
+ "learning_rate": 1.3944323327184303e-05,
348
+ "loss": 0.0473,
349
+ "step": 21500
350
+ },
351
+ {
352
+ "epoch": 3.689418078148583,
353
+ "grad_norm": 0.5218081474304199,
354
+ "learning_rate": 1.3105819218514171e-05,
355
+ "loss": 0.0488,
356
+ "step": 22000
357
+ },
358
+ {
359
+ "epoch": 3.7732684890155963,
360
+ "grad_norm": 0.2999401390552521,
361
+ "learning_rate": 1.226731510984404e-05,
362
+ "loss": 0.0418,
363
+ "step": 22500
364
+ },
365
+ {
366
+ "epoch": 3.8571188998826096,
367
+ "grad_norm": 0.6840993762016296,
368
+ "learning_rate": 1.1428811001173906e-05,
369
+ "loss": 0.0431,
370
+ "step": 23000
371
+ },
372
+ {
373
+ "epoch": 3.9409693107496224,
374
+ "grad_norm": 0.33584555983543396,
375
+ "learning_rate": 1.0590306892503774e-05,
376
+ "loss": 0.0417,
377
+ "step": 23500
378
+ },
379
+ {
380
+ "epoch": 4.0,
381
+ "eval_gen_len": 100.0,
382
+ "eval_loss": 0.07122652977705002,
383
+ "eval_rouge1": 2.9034,
384
+ "eval_rouge2": 1.9182,
385
+ "eval_rougeL": 2.9223,
386
+ "eval_rougeLsum": 2.9168,
387
+ "eval_runtime": 1624.6424,
388
+ "eval_samples_per_second": 0.816,
389
+ "eval_steps_per_second": 0.408,
390
+ "step": 23852
391
+ },
392
+ {
393
+ "epoch": 4.024819721616636,
394
+ "grad_norm": 0.2943115532398224,
395
+ "learning_rate": 9.751802783833641e-06,
396
+ "loss": 0.0393,
397
+ "step": 24000
398
+ },
399
+ {
400
+ "epoch": 4.108670132483649,
401
+ "grad_norm": 0.4018648564815521,
402
+ "learning_rate": 8.913298675163509e-06,
403
+ "loss": 0.0321,
404
+ "step": 24500
405
+ },
406
+ {
407
+ "epoch": 4.192520543350662,
408
+ "grad_norm": 0.31552907824516296,
409
+ "learning_rate": 8.074794566493377e-06,
410
+ "loss": 0.0309,
411
+ "step": 25000
412
  }
413
  ],
414
  "logging_steps": 500,
 
428
  "attributes": {}
429
  }
430
  },
431
+ "total_flos": 9.02247270044192e+18,
432
  "train_batch_size": 2,
433
  "trial_name": null,
434
  "trial_params": null