Training in progress, step 2250, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671149168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20330ea8ceb7989cdeaed0b70028a7280cf91c4a40d7a3dbf16716f4013eb76e
|
3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 341314644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f930ca3c8cd907232edcf81606166f98bf8373c658b9735840ca38b7917e86d3
|
3 |
size 341314644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afe84c11be1cdc087ef0194266840b7ef5f06c05444f30ed4990e9108cb84807
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c383fc6cb5e030b465886a85a32551c8ea17044bece28f4ebe77147ab2b20f37
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1597,6 +1597,119 @@
|
|
1597 |
"eval_samples_per_second": 21.048,
|
1598 |
"eval_steps_per_second": 5.264,
|
1599 |
"step": 2100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1600 |
}
|
1601 |
],
|
1602 |
"logging_steps": 10,
|
@@ -1625,7 +1738,7 @@
|
|
1625 |
"attributes": {}
|
1626 |
}
|
1627 |
},
|
1628 |
-
"total_flos": 1.
|
1629 |
"train_batch_size": 8,
|
1630 |
"trial_name": null,
|
1631 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.39911842346191406,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-2250",
|
4 |
+
"epoch": 0.8530805687203792,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 2250,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1597 |
"eval_samples_per_second": 21.048,
|
1598 |
"eval_steps_per_second": 5.264,
|
1599 |
"step": 2100
|
1600 |
+
},
|
1601 |
+
{
|
1602 |
+
"epoch": 0.8,
|
1603 |
+
"grad_norm": 0.7154859900474548,
|
1604 |
+
"learning_rate": 9.923392747738264e-06,
|
1605 |
+
"loss": 0.6162,
|
1606 |
+
"step": 2110
|
1607 |
+
},
|
1608 |
+
{
|
1609 |
+
"epoch": 0.8037914691943128,
|
1610 |
+
"grad_norm": 0.7427191734313965,
|
1611 |
+
"learning_rate": 9.563425387990149e-06,
|
1612 |
+
"loss": 0.4395,
|
1613 |
+
"step": 2120
|
1614 |
+
},
|
1615 |
+
{
|
1616 |
+
"epoch": 0.8075829383886256,
|
1617 |
+
"grad_norm": 0.6783840656280518,
|
1618 |
+
"learning_rate": 9.209416573298567e-06,
|
1619 |
+
"loss": 0.3985,
|
1620 |
+
"step": 2130
|
1621 |
+
},
|
1622 |
+
{
|
1623 |
+
"epoch": 0.8113744075829384,
|
1624 |
+
"grad_norm": 0.7031378149986267,
|
1625 |
+
"learning_rate": 8.861418468751542e-06,
|
1626 |
+
"loss": 0.3239,
|
1627 |
+
"step": 2140
|
1628 |
+
},
|
1629 |
+
{
|
1630 |
+
"epoch": 0.8151658767772512,
|
1631 |
+
"grad_norm": 0.7050457000732422,
|
1632 |
+
"learning_rate": 8.51948235372686e-06,
|
1633 |
+
"loss": 0.2472,
|
1634 |
+
"step": 2150
|
1635 |
+
},
|
1636 |
+
{
|
1637 |
+
"epoch": 0.818957345971564,
|
1638 |
+
"grad_norm": 0.7675678133964539,
|
1639 |
+
"learning_rate": 8.183658614335754e-06,
|
1640 |
+
"loss": 0.6221,
|
1641 |
+
"step": 2160
|
1642 |
+
},
|
1643 |
+
{
|
1644 |
+
"epoch": 0.8227488151658767,
|
1645 |
+
"grad_norm": 0.7096587419509888,
|
1646 |
+
"learning_rate": 7.853996735998353e-06,
|
1647 |
+
"loss": 0.404,
|
1648 |
+
"step": 2170
|
1649 |
+
},
|
1650 |
+
{
|
1651 |
+
"epoch": 0.8265402843601896,
|
1652 |
+
"grad_norm": 0.7109266519546509,
|
1653 |
+
"learning_rate": 7.530545296151642e-06,
|
1654 |
+
"loss": 0.3859,
|
1655 |
+
"step": 2180
|
1656 |
+
},
|
1657 |
+
{
|
1658 |
+
"epoch": 0.8303317535545024,
|
1659 |
+
"grad_norm": 0.6554428339004517,
|
1660 |
+
"learning_rate": 7.213351957091369e-06,
|
1661 |
+
"loss": 0.3142,
|
1662 |
+
"step": 2190
|
1663 |
+
},
|
1664 |
+
{
|
1665 |
+
"epoch": 0.8341232227488151,
|
1666 |
+
"grad_norm": 0.5105351805686951,
|
1667 |
+
"learning_rate": 6.902463458948738e-06,
|
1668 |
+
"loss": 0.2497,
|
1669 |
+
"step": 2200
|
1670 |
+
},
|
1671 |
+
{
|
1672 |
+
"epoch": 0.837914691943128,
|
1673 |
+
"grad_norm": 0.8070971965789795,
|
1674 |
+
"learning_rate": 6.597925612802969e-06,
|
1675 |
+
"loss": 0.6438,
|
1676 |
+
"step": 2210
|
1677 |
+
},
|
1678 |
+
{
|
1679 |
+
"epoch": 0.8417061611374408,
|
1680 |
+
"grad_norm": 0.8927067518234253,
|
1681 |
+
"learning_rate": 6.299783293930844e-06,
|
1682 |
+
"loss": 0.4307,
|
1683 |
+
"step": 2220
|
1684 |
+
},
|
1685 |
+
{
|
1686 |
+
"epoch": 0.8454976303317535,
|
1687 |
+
"grad_norm": 0.6909337043762207,
|
1688 |
+
"learning_rate": 6.008080435194074e-06,
|
1689 |
+
"loss": 0.3953,
|
1690 |
+
"step": 2230
|
1691 |
+
},
|
1692 |
+
{
|
1693 |
+
"epoch": 0.8492890995260663,
|
1694 |
+
"grad_norm": 0.7040910720825195,
|
1695 |
+
"learning_rate": 5.722860020565551e-06,
|
1696 |
+
"loss": 0.3385,
|
1697 |
+
"step": 2240
|
1698 |
+
},
|
1699 |
+
{
|
1700 |
+
"epoch": 0.8530805687203792,
|
1701 |
+
"grad_norm": 0.5680156350135803,
|
1702 |
+
"learning_rate": 5.444164078795444e-06,
|
1703 |
+
"loss": 0.247,
|
1704 |
+
"step": 2250
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"epoch": 0.8530805687203792,
|
1708 |
+
"eval_loss": 0.39911842346191406,
|
1709 |
+
"eval_runtime": 211.2832,
|
1710 |
+
"eval_samples_per_second": 21.024,
|
1711 |
+
"eval_steps_per_second": 5.258,
|
1712 |
+
"step": 2250
|
1713 |
}
|
1714 |
],
|
1715 |
"logging_steps": 10,
|
|
|
1738 |
"attributes": {}
|
1739 |
}
|
1740 |
},
|
1741 |
+
"total_flos": 1.9997233128249754e+18,
|
1742 |
"train_batch_size": 8,
|
1743 |
"trial_name": null,
|
1744 |
"trial_params": null
|