aseratus1 commited on
Commit
acd474f
·
verified ·
1 Parent(s): b2c5850

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:412e51cdc7d726870ae8f6204741d3d1562e0102dcabde12df717ac957963803
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23e6c68395835c095227904b39332d10c7d34ed81b5571602677e6ef5575a13c
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a05b3409edce260bc3ca9025cc2358ecfe3019c73e1cafa162b1fd3ffdb415d
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:730cb00d49078d662595d6151979453a01918923d37aa2ef0eb475c5ee4d0078
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:babd590d0742fc32628cbe9db2b8cd70b83c9e903b16900f03ac2712f2748c4b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8da0edca726df8f38c4ca0e916639bf4307c82f3e2c93b50d1be5fc1f26a49b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3db5842dcea39e4b17fa9381afbdc2276af054161f761dc7841162f8c80751ca
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5ee946f0f01c37e2608b520c9577a5f5f81875a92165f89f7b804fa093c4404
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.40772125124931335,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1950",
4
- "epoch": 0.7393364928909952,
5
  "eval_steps": 150,
6
- "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1484,6 +1484,119 @@
1484
  "eval_samples_per_second": 20.994,
1485
  "eval_steps_per_second": 5.251,
1486
  "step": 1950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1487
  }
1488
  ],
1489
  "logging_steps": 10,
@@ -1512,7 +1625,7 @@
1512
  "attributes": {}
1513
  }
1514
  },
1515
- "total_flos": 1.731207895808213e+18,
1516
  "train_batch_size": 8,
1517
  "trial_name": null,
1518
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.40230637788772583,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-2100",
4
+ "epoch": 0.7962085308056872,
5
  "eval_steps": 150,
6
+ "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1484
  "eval_samples_per_second": 20.994,
1485
  "eval_steps_per_second": 5.251,
1486
  "step": 1950
1487
+ },
1488
+ {
1489
+ "epoch": 0.7431279620853081,
1490
+ "grad_norm": 0.7177047729492188,
1491
+ "learning_rate": 1.5999838850740295e-05,
1492
+ "loss": 0.5998,
1493
+ "step": 1960
1494
+ },
1495
+ {
1496
+ "epoch": 0.7469194312796209,
1497
+ "grad_norm": 0.7692488431930542,
1498
+ "learning_rate": 1.555733080046378e-05,
1499
+ "loss": 0.4557,
1500
+ "step": 1970
1501
+ },
1502
+ {
1503
+ "epoch": 0.7507109004739336,
1504
+ "grad_norm": 0.7131124138832092,
1505
+ "learning_rate": 1.5119898061295162e-05,
1506
+ "loss": 0.3787,
1507
+ "step": 1980
1508
+ },
1509
+ {
1510
+ "epoch": 0.7545023696682465,
1511
+ "grad_norm": 0.7252326011657715,
1512
+ "learning_rate": 1.468760509128288e-05,
1513
+ "loss": 0.3477,
1514
+ "step": 1990
1515
+ },
1516
+ {
1517
+ "epoch": 0.7582938388625592,
1518
+ "grad_norm": 0.6361256241798401,
1519
+ "learning_rate": 1.4260515591103002e-05,
1520
+ "loss": 0.2685,
1521
+ "step": 2000
1522
+ },
1523
+ {
1524
+ "epoch": 0.762085308056872,
1525
+ "grad_norm": 0.741794228553772,
1526
+ "learning_rate": 1.3838692494672462e-05,
1527
+ "loss": 0.6092,
1528
+ "step": 2010
1529
+ },
1530
+ {
1531
+ "epoch": 0.7658767772511849,
1532
+ "grad_norm": 0.7934831976890564,
1533
+ "learning_rate": 1.3422197959875615e-05,
1534
+ "loss": 0.4335,
1535
+ "step": 2020
1536
+ },
1537
+ {
1538
+ "epoch": 0.7696682464454976,
1539
+ "grad_norm": 0.7226603031158447,
1540
+ "learning_rate": 1.3011093359404725e-05,
1541
+ "loss": 0.3835,
1542
+ "step": 2030
1543
+ },
1544
+ {
1545
+ "epoch": 0.7734597156398104,
1546
+ "grad_norm": 0.7291485667228699,
1547
+ "learning_rate": 1.2605439271716517e-05,
1548
+ "loss": 0.3368,
1549
+ "step": 2040
1550
+ },
1551
+ {
1552
+ "epoch": 0.7772511848341233,
1553
+ "grad_norm": 0.47125428915023804,
1554
+ "learning_rate": 1.220529547210556e-05,
1555
+ "loss": 0.239,
1556
+ "step": 2050
1557
+ },
1558
+ {
1559
+ "epoch": 0.781042654028436,
1560
+ "grad_norm": 0.7346218824386597,
1561
+ "learning_rate": 1.181072092389608e-05,
1562
+ "loss": 0.5992,
1563
+ "step": 2060
1564
+ },
1565
+ {
1566
+ "epoch": 0.7848341232227488,
1567
+ "grad_norm": 0.7117639780044556,
1568
+ "learning_rate": 1.1421773769753386e-05,
1569
+ "loss": 0.4201,
1570
+ "step": 2070
1571
+ },
1572
+ {
1573
+ "epoch": 0.7886255924170616,
1574
+ "grad_norm": 0.7014518976211548,
1575
+ "learning_rate": 1.1038511323116208e-05,
1576
+ "loss": 0.387,
1577
+ "step": 2080
1578
+ },
1579
+ {
1580
+ "epoch": 0.7924170616113744,
1581
+ "grad_norm": 0.6080646514892578,
1582
+ "learning_rate": 1.066099005975132e-05,
1583
+ "loss": 0.3142,
1584
+ "step": 2090
1585
+ },
1586
+ {
1587
+ "epoch": 0.7962085308056872,
1588
+ "grad_norm": 0.7223226428031921,
1589
+ "learning_rate": 1.0289265609431536e-05,
1590
+ "loss": 0.2375,
1591
+ "step": 2100
1592
+ },
1593
+ {
1594
+ "epoch": 0.7962085308056872,
1595
+ "eval_loss": 0.40230637788772583,
1596
+ "eval_runtime": 211.0367,
1597
+ "eval_samples_per_second": 21.048,
1598
+ "eval_steps_per_second": 5.264,
1599
+ "step": 2100
1600
  }
1601
  ],
1602
  "logging_steps": 10,
 
1625
  "attributes": {}
1626
  }
1627
  },
1628
+ "total_flos": 1.8643342191325348e+18,
1629
  "train_batch_size": 8,
1630
  "trial_name": null,
1631
  "trial_params": null