aseratus1 commited on
Commit
49533a9
·
verified ·
1 Parent(s): f757b70

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23e6c68395835c095227904b39332d10c7d34ed81b5571602677e6ef5575a13c
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20330ea8ceb7989cdeaed0b70028a7280cf91c4a40d7a3dbf16716f4013eb76e
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:730cb00d49078d662595d6151979453a01918923d37aa2ef0eb475c5ee4d0078
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f930ca3c8cd907232edcf81606166f98bf8373c658b9735840ca38b7917e86d3
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8da0edca726df8f38c4ca0e916639bf4307c82f3e2c93b50d1be5fc1f26a49b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe84c11be1cdc087ef0194266840b7ef5f06c05444f30ed4990e9108cb84807
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5ee946f0f01c37e2608b520c9577a5f5f81875a92165f89f7b804fa093c4404
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c383fc6cb5e030b465886a85a32551c8ea17044bece28f4ebe77147ab2b20f37
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.40230637788772583,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-2100",
4
- "epoch": 0.7962085308056872,
5
  "eval_steps": 150,
6
- "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1597,6 +1597,119 @@
1597
  "eval_samples_per_second": 21.048,
1598
  "eval_steps_per_second": 5.264,
1599
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1600
  }
1601
  ],
1602
  "logging_steps": 10,
@@ -1625,7 +1738,7 @@
1625
  "attributes": {}
1626
  }
1627
  },
1628
- "total_flos": 1.8643342191325348e+18,
1629
  "train_batch_size": 8,
1630
  "trial_name": null,
1631
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.39911842346191406,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-2250",
4
+ "epoch": 0.8530805687203792,
5
  "eval_steps": 150,
6
+ "global_step": 2250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1597
  "eval_samples_per_second": 21.048,
1598
  "eval_steps_per_second": 5.264,
1599
  "step": 2100
1600
+ },
1601
+ {
1602
+ "epoch": 0.8,
1603
+ "grad_norm": 0.7154859900474548,
1604
+ "learning_rate": 9.923392747738264e-06,
1605
+ "loss": 0.6162,
1606
+ "step": 2110
1607
+ },
1608
+ {
1609
+ "epoch": 0.8037914691943128,
1610
+ "grad_norm": 0.7427191734313965,
1611
+ "learning_rate": 9.563425387990149e-06,
1612
+ "loss": 0.4395,
1613
+ "step": 2120
1614
+ },
1615
+ {
1616
+ "epoch": 0.8075829383886256,
1617
+ "grad_norm": 0.6783840656280518,
1618
+ "learning_rate": 9.209416573298567e-06,
1619
+ "loss": 0.3985,
1620
+ "step": 2130
1621
+ },
1622
+ {
1623
+ "epoch": 0.8113744075829384,
1624
+ "grad_norm": 0.7031378149986267,
1625
+ "learning_rate": 8.861418468751542e-06,
1626
+ "loss": 0.3239,
1627
+ "step": 2140
1628
+ },
1629
+ {
1630
+ "epoch": 0.8151658767772512,
1631
+ "grad_norm": 0.7050457000732422,
1632
+ "learning_rate": 8.51948235372686e-06,
1633
+ "loss": 0.2472,
1634
+ "step": 2150
1635
+ },
1636
+ {
1637
+ "epoch": 0.818957345971564,
1638
+ "grad_norm": 0.7675678133964539,
1639
+ "learning_rate": 8.183658614335754e-06,
1640
+ "loss": 0.6221,
1641
+ "step": 2160
1642
+ },
1643
+ {
1644
+ "epoch": 0.8227488151658767,
1645
+ "grad_norm": 0.7096587419509888,
1646
+ "learning_rate": 7.853996735998353e-06,
1647
+ "loss": 0.404,
1648
+ "step": 2170
1649
+ },
1650
+ {
1651
+ "epoch": 0.8265402843601896,
1652
+ "grad_norm": 0.7109266519546509,
1653
+ "learning_rate": 7.530545296151642e-06,
1654
+ "loss": 0.3859,
1655
+ "step": 2180
1656
+ },
1657
+ {
1658
+ "epoch": 0.8303317535545024,
1659
+ "grad_norm": 0.6554428339004517,
1660
+ "learning_rate": 7.213351957091369e-06,
1661
+ "loss": 0.3142,
1662
+ "step": 2190
1663
+ },
1664
+ {
1665
+ "epoch": 0.8341232227488151,
1666
+ "grad_norm": 0.5105351805686951,
1667
+ "learning_rate": 6.902463458948738e-06,
1668
+ "loss": 0.2497,
1669
+ "step": 2200
1670
+ },
1671
+ {
1672
+ "epoch": 0.837914691943128,
1673
+ "grad_norm": 0.8070971965789795,
1674
+ "learning_rate": 6.597925612802969e-06,
1675
+ "loss": 0.6438,
1676
+ "step": 2210
1677
+ },
1678
+ {
1679
+ "epoch": 0.8417061611374408,
1680
+ "grad_norm": 0.8927067518234253,
1681
+ "learning_rate": 6.299783293930844e-06,
1682
+ "loss": 0.4307,
1683
+ "step": 2220
1684
+ },
1685
+ {
1686
+ "epoch": 0.8454976303317535,
1687
+ "grad_norm": 0.6909337043762207,
1688
+ "learning_rate": 6.008080435194074e-06,
1689
+ "loss": 0.3953,
1690
+ "step": 2230
1691
+ },
1692
+ {
1693
+ "epoch": 0.8492890995260663,
1694
+ "grad_norm": 0.7040910720825195,
1695
+ "learning_rate": 5.722860020565551e-06,
1696
+ "loss": 0.3385,
1697
+ "step": 2240
1698
+ },
1699
+ {
1700
+ "epoch": 0.8530805687203792,
1701
+ "grad_norm": 0.5680156350135803,
1702
+ "learning_rate": 5.444164078795444e-06,
1703
+ "loss": 0.247,
1704
+ "step": 2250
1705
+ },
1706
+ {
1707
+ "epoch": 0.8530805687203792,
1708
+ "eval_loss": 0.39911842346191406,
1709
+ "eval_runtime": 211.2832,
1710
+ "eval_samples_per_second": 21.024,
1711
+ "eval_steps_per_second": 5.258,
1712
+ "step": 2250
1713
  }
1714
  ],
1715
  "logging_steps": 10,
 
1738
  "attributes": {}
1739
  }
1740
  },
1741
+ "total_flos": 1.9997233128249754e+18,
1742
  "train_batch_size": 8,
1743
  "trial_name": null,
1744
  "trial_params": null