masatochi commited on
Commit
0fb84d4
·
verified ·
1 Parent(s): 763b1cc

Training in progress, step 260, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d469916d9bc99ab8690c2ad5963f520f6c7aebca89a0f0fac62a70421794a8e3
3
  size 48679352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b28c46bd32ec7687d71dcd8f92b07ccf8622a23ced323d5d17a73cc1d2d7c90
3
  size 48679352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:061e88c1730d0f3889058eaef8233b4d1a8ec193e7d2cf0deaf2c21b4bce6960
3
- size 25152500
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50f836283767652e9ac7d754244fd5074d2f19b6e5aa2726705730c0809d5111
3
+ size 25152884
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa476934231ac34b5459f284f02dd625d474b4000e9d8f8d25797737ca876934
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f8d57acea1e2f327c0a900d89663e7e6946a8381191b0de303acbea96b525fd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3373dd77cba0e4415af66a32e16afdc4a8cd4fd649cb8e376d68b5558a8a0e8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17d1b21cc84b70a525916759b0723a1fe102004e6a7697742d2db971361ed1d7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1843817787418655,
5
  "eval_steps": 692,
6
- "global_step": 255,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1800,6 +1800,41 @@
1800
  "learning_rate": 1.997448376186836e-05,
1801
  "loss": 1.1043,
1802
  "step": 255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1803
  }
1804
  ],
1805
  "logging_steps": 1,
@@ -1819,7 +1854,7 @@
1819
  "attributes": {}
1820
  }
1821
  },
1822
- "total_flos": 5.677100052774912e+17,
1823
  "train_batch_size": 4,
1824
  "trial_name": null,
1825
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.18799710773680406,
5
  "eval_steps": 692,
6
+ "global_step": 260,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1800
  "learning_rate": 1.997448376186836e-05,
1801
  "loss": 1.1043,
1802
  "step": 255
1803
+ },
1804
+ {
1805
+ "epoch": 0.18510484454085321,
1806
+ "grad_norm": 0.28662002086639404,
1807
+ "learning_rate": 1.997415360037498e-05,
1808
+ "loss": 1.0743,
1809
+ "step": 256
1810
+ },
1811
+ {
1812
+ "epoch": 0.18582791033984092,
1813
+ "grad_norm": 0.266493022441864,
1814
+ "learning_rate": 1.9973821319329625e-05,
1815
+ "loss": 0.9767,
1816
+ "step": 257
1817
+ },
1818
+ {
1819
+ "epoch": 0.18655097613882862,
1820
+ "grad_norm": 0.34035131335258484,
1821
+ "learning_rate": 1.9973486918802912e-05,
1822
+ "loss": 1.2844,
1823
+ "step": 258
1824
+ },
1825
+ {
1826
+ "epoch": 0.18727404193781635,
1827
+ "grad_norm": 0.34165364503860474,
1828
+ "learning_rate": 1.9973150398865908e-05,
1829
+ "loss": 1.2808,
1830
+ "step": 259
1831
+ },
1832
+ {
1833
+ "epoch": 0.18799710773680406,
1834
+ "grad_norm": 0.3018459975719452,
1835
+ "learning_rate": 1.9972811759590117e-05,
1836
+ "loss": 0.9891,
1837
+ "step": 260
1838
  }
1839
  ],
1840
  "logging_steps": 1,
 
1854
  "attributes": {}
1855
  }
1856
  },
1857
+ "total_flos": 5.788415740084224e+17,
1858
  "train_batch_size": 4,
1859
  "trial_name": null,
1860
  "trial_params": null