aseratus1 commited on
Commit
8b8bad0
·
verified ·
1 Parent(s): c09b51a

Training in progress, step 2550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:712502ca776105404151dfeac958174c3eed08f2aa25261a982b2f9aa888fe9f
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc7a32ae1a260d232561f8bfa7cadee6bbcd47aca2968958089976f95afd9512
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c223af06d71ce7871dc7649f6cfdee15585b3a08252ccf3975c3b7c0d9857b12
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4630961c792966353518425385bb6bdec4ee01ada3092767c85b23a263ff78d
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ceaa3f2b731557edf0d8aff0b45a00a84d15db8edf47bd49778d983e9466b85
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df359f92a67934fe621c77a319e3203ed8bf9f11020a6732c84063a23dd6bca
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:334bfcab64a1c8be0f196dcc75ffd76422b0488c7c39f2cac1326be5a0bdb87a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f216ea40a4443b0f449133ae4ca79e6899c677cb6c40f87fbb71204a9e2a38ba
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.39714062213897705,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-2400",
4
- "epoch": 0.909952606635071,
5
  "eval_steps": 150,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1823,6 +1823,119 @@
1823
  "eval_samples_per_second": 21.01,
1824
  "eval_steps_per_second": 5.255,
1825
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1826
  }
1827
  ],
1828
  "logging_steps": 10,
@@ -1851,7 +1964,7 @@
1851
  "attributes": {}
1852
  }
1853
  },
1854
- "total_flos": 2.1324725077546107e+18,
1855
  "train_batch_size": 8,
1856
  "trial_name": null,
1857
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.396539568901062,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-2550",
4
+ "epoch": 0.966824644549763,
5
  "eval_steps": 150,
6
+ "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1823
  "eval_samples_per_second": 21.01,
1824
  "eval_steps_per_second": 5.255,
1825
  "step": 2400
1826
+ },
1827
+ {
1828
+ "epoch": 0.9137440758293839,
1829
+ "grad_norm": 0.7534606456756592,
1830
+ "learning_rate": 1.9028598748504767e-06,
1831
+ "loss": 0.631,
1832
+ "step": 2410
1833
+ },
1834
+ {
1835
+ "epoch": 0.9175355450236967,
1836
+ "grad_norm": 0.7881170511245728,
1837
+ "learning_rate": 1.7405569831232704e-06,
1838
+ "loss": 0.4308,
1839
+ "step": 2420
1840
+ },
1841
+ {
1842
+ "epoch": 0.9213270142180094,
1843
+ "grad_norm": 0.7743288278579712,
1844
+ "learning_rate": 1.5853653778583666e-06,
1845
+ "loss": 0.3928,
1846
+ "step": 2430
1847
+ },
1848
+ {
1849
+ "epoch": 0.9251184834123223,
1850
+ "grad_norm": 0.6832749247550964,
1851
+ "learning_rate": 1.437307927366971e-06,
1852
+ "loss": 0.3336,
1853
+ "step": 2440
1854
+ },
1855
+ {
1856
+ "epoch": 0.9289099526066351,
1857
+ "grad_norm": 0.6585612297058105,
1858
+ "learning_rate": 1.2964064487045236e-06,
1859
+ "loss": 0.2391,
1860
+ "step": 2450
1861
+ },
1862
+ {
1863
+ "epoch": 0.9327014218009478,
1864
+ "grad_norm": 0.7330191135406494,
1865
+ "learning_rate": 1.162681704455798e-06,
1866
+ "loss": 0.6028,
1867
+ "step": 2460
1868
+ },
1869
+ {
1870
+ "epoch": 0.9364928909952607,
1871
+ "grad_norm": 0.8107926249504089,
1872
+ "learning_rate": 1.036153399675488e-06,
1873
+ "loss": 0.4511,
1874
+ "step": 2470
1875
+ },
1876
+ {
1877
+ "epoch": 0.9402843601895735,
1878
+ "grad_norm": 0.7075092792510986,
1879
+ "learning_rate": 9.168401789845183e-07,
1880
+ "loss": 0.3733,
1881
+ "step": 2480
1882
+ },
1883
+ {
1884
+ "epoch": 0.9440758293838862,
1885
+ "grad_norm": 0.6632401347160339,
1886
+ "learning_rate": 8.04759623822654e-07,
1887
+ "loss": 0.3243,
1888
+ "step": 2490
1889
+ },
1890
+ {
1891
+ "epoch": 0.9478672985781991,
1892
+ "grad_norm": 0.6346734166145325,
1893
+ "learning_rate": 6.999282498578174e-07,
1894
+ "loss": 0.2299,
1895
+ "step": 2500
1896
+ },
1897
+ {
1898
+ "epoch": 0.9516587677725118,
1899
+ "grad_norm": 0.7297029495239258,
1900
+ "learning_rate": 6.023615045523844e-07,
1901
+ "loss": 0.6426,
1902
+ "step": 2510
1903
+ },
1904
+ {
1905
+ "epoch": 0.9554502369668246,
1906
+ "grad_norm": 0.6962762475013733,
1907
+ "learning_rate": 5.120737648869389e-07,
1908
+ "loss": 0.4379,
1909
+ "step": 2520
1910
+ },
1911
+ {
1912
+ "epoch": 0.9592417061611375,
1913
+ "grad_norm": 0.7509403824806213,
1914
+ "learning_rate": 4.290783352417338e-07,
1915
+ "loss": 0.3845,
1916
+ "step": 2530
1917
+ },
1918
+ {
1919
+ "epoch": 0.9630331753554502,
1920
+ "grad_norm": 0.7338268160820007,
1921
+ "learning_rate": 3.5338744543622627e-07,
1922
+ "loss": 0.332,
1923
+ "step": 2540
1924
+ },
1925
+ {
1926
+ "epoch": 0.966824644549763,
1927
+ "grad_norm": 0.5980384945869446,
1928
+ "learning_rate": 2.8501224892695245e-07,
1929
+ "loss": 0.2342,
1930
+ "step": 2550
1931
+ },
1932
+ {
1933
+ "epoch": 0.966824644549763,
1934
+ "eval_loss": 0.396539568901062,
1935
+ "eval_runtime": 211.4226,
1936
+ "eval_samples_per_second": 21.01,
1937
+ "eval_steps_per_second": 5.255,
1938
+ "step": 2550
1939
  }
1940
  ],
1941
  "logging_steps": 10,
 
1964
  "attributes": {}
1965
  }
1966
  },
1967
+ "total_flos": 2.2657873952762757e+18,
1968
  "train_batch_size": 8,
1969
  "trial_name": null,
1970
  "trial_params": null