Training in progress, step 2550, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 671149168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc7a32ae1a260d232561f8bfa7cadee6bbcd47aca2968958089976f95afd9512
|
3 |
size 671149168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 341314644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4630961c792966353518425385bb6bdec4ee01ada3092767c85b23a263ff78d
|
3 |
size 341314644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0df359f92a67934fe621c77a319e3203ed8bf9f11020a6732c84063a23dd6bca
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f216ea40a4443b0f449133ae4ca79e6899c677cb6c40f87fbb71204a9e2a38ba
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 150,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1823,6 +1823,119 @@
|
|
1823 |
"eval_samples_per_second": 21.01,
|
1824 |
"eval_steps_per_second": 5.255,
|
1825 |
"step": 2400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1826 |
}
|
1827 |
],
|
1828 |
"logging_steps": 10,
|
@@ -1851,7 +1964,7 @@
|
|
1851 |
"attributes": {}
|
1852 |
}
|
1853 |
},
|
1854 |
-
"total_flos": 2.
|
1855 |
"train_batch_size": 8,
|
1856 |
"trial_name": null,
|
1857 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.396539568901062,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-2550",
|
4 |
+
"epoch": 0.966824644549763,
|
5 |
"eval_steps": 150,
|
6 |
+
"global_step": 2550,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1823 |
"eval_samples_per_second": 21.01,
|
1824 |
"eval_steps_per_second": 5.255,
|
1825 |
"step": 2400
|
1826 |
+
},
|
1827 |
+
{
|
1828 |
+
"epoch": 0.9137440758293839,
|
1829 |
+
"grad_norm": 0.7534606456756592,
|
1830 |
+
"learning_rate": 1.9028598748504767e-06,
|
1831 |
+
"loss": 0.631,
|
1832 |
+
"step": 2410
|
1833 |
+
},
|
1834 |
+
{
|
1835 |
+
"epoch": 0.9175355450236967,
|
1836 |
+
"grad_norm": 0.7881170511245728,
|
1837 |
+
"learning_rate": 1.7405569831232704e-06,
|
1838 |
+
"loss": 0.4308,
|
1839 |
+
"step": 2420
|
1840 |
+
},
|
1841 |
+
{
|
1842 |
+
"epoch": 0.9213270142180094,
|
1843 |
+
"grad_norm": 0.7743288278579712,
|
1844 |
+
"learning_rate": 1.5853653778583666e-06,
|
1845 |
+
"loss": 0.3928,
|
1846 |
+
"step": 2430
|
1847 |
+
},
|
1848 |
+
{
|
1849 |
+
"epoch": 0.9251184834123223,
|
1850 |
+
"grad_norm": 0.6832749247550964,
|
1851 |
+
"learning_rate": 1.437307927366971e-06,
|
1852 |
+
"loss": 0.3336,
|
1853 |
+
"step": 2440
|
1854 |
+
},
|
1855 |
+
{
|
1856 |
+
"epoch": 0.9289099526066351,
|
1857 |
+
"grad_norm": 0.6585612297058105,
|
1858 |
+
"learning_rate": 1.2964064487045236e-06,
|
1859 |
+
"loss": 0.2391,
|
1860 |
+
"step": 2450
|
1861 |
+
},
|
1862 |
+
{
|
1863 |
+
"epoch": 0.9327014218009478,
|
1864 |
+
"grad_norm": 0.7330191135406494,
|
1865 |
+
"learning_rate": 1.162681704455798e-06,
|
1866 |
+
"loss": 0.6028,
|
1867 |
+
"step": 2460
|
1868 |
+
},
|
1869 |
+
{
|
1870 |
+
"epoch": 0.9364928909952607,
|
1871 |
+
"grad_norm": 0.8107926249504089,
|
1872 |
+
"learning_rate": 1.036153399675488e-06,
|
1873 |
+
"loss": 0.4511,
|
1874 |
+
"step": 2470
|
1875 |
+
},
|
1876 |
+
{
|
1877 |
+
"epoch": 0.9402843601895735,
|
1878 |
+
"grad_norm": 0.7075092792510986,
|
1879 |
+
"learning_rate": 9.168401789845183e-07,
|
1880 |
+
"loss": 0.3733,
|
1881 |
+
"step": 2480
|
1882 |
+
},
|
1883 |
+
{
|
1884 |
+
"epoch": 0.9440758293838862,
|
1885 |
+
"grad_norm": 0.6632401347160339,
|
1886 |
+
"learning_rate": 8.04759623822654e-07,
|
1887 |
+
"loss": 0.3243,
|
1888 |
+
"step": 2490
|
1889 |
+
},
|
1890 |
+
{
|
1891 |
+
"epoch": 0.9478672985781991,
|
1892 |
+
"grad_norm": 0.6346734166145325,
|
1893 |
+
"learning_rate": 6.999282498578174e-07,
|
1894 |
+
"loss": 0.2299,
|
1895 |
+
"step": 2500
|
1896 |
+
},
|
1897 |
+
{
|
1898 |
+
"epoch": 0.9516587677725118,
|
1899 |
+
"grad_norm": 0.7297029495239258,
|
1900 |
+
"learning_rate": 6.023615045523844e-07,
|
1901 |
+
"loss": 0.6426,
|
1902 |
+
"step": 2510
|
1903 |
+
},
|
1904 |
+
{
|
1905 |
+
"epoch": 0.9554502369668246,
|
1906 |
+
"grad_norm": 0.6962762475013733,
|
1907 |
+
"learning_rate": 5.120737648869389e-07,
|
1908 |
+
"loss": 0.4379,
|
1909 |
+
"step": 2520
|
1910 |
+
},
|
1911 |
+
{
|
1912 |
+
"epoch": 0.9592417061611375,
|
1913 |
+
"grad_norm": 0.7509403824806213,
|
1914 |
+
"learning_rate": 4.290783352417338e-07,
|
1915 |
+
"loss": 0.3845,
|
1916 |
+
"step": 2530
|
1917 |
+
},
|
1918 |
+
{
|
1919 |
+
"epoch": 0.9630331753554502,
|
1920 |
+
"grad_norm": 0.7338268160820007,
|
1921 |
+
"learning_rate": 3.5338744543622627e-07,
|
1922 |
+
"loss": 0.332,
|
1923 |
+
"step": 2540
|
1924 |
+
},
|
1925 |
+
{
|
1926 |
+
"epoch": 0.966824644549763,
|
1927 |
+
"grad_norm": 0.5980384945869446,
|
1928 |
+
"learning_rate": 2.8501224892695245e-07,
|
1929 |
+
"loss": 0.2342,
|
1930 |
+
"step": 2550
|
1931 |
+
},
|
1932 |
+
{
|
1933 |
+
"epoch": 0.966824644549763,
|
1934 |
+
"eval_loss": 0.396539568901062,
|
1935 |
+
"eval_runtime": 211.4226,
|
1936 |
+
"eval_samples_per_second": 21.01,
|
1937 |
+
"eval_steps_per_second": 5.255,
|
1938 |
+
"step": 2550
|
1939 |
}
|
1940 |
],
|
1941 |
"logging_steps": 10,
|
|
|
1964 |
"attributes": {}
|
1965 |
}
|
1966 |
},
|
1967 |
+
"total_flos": 2.2657873952762757e+18,
|
1968 |
"train_batch_size": 8,
|
1969 |
"trial_name": null,
|
1970 |
"trial_params": null
|