aseratus1 commited on
Commit
c6ee6a4
·
verified ·
1 Parent(s): 3e20a39

Training in progress, step 2638, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc7a32ae1a260d232561f8bfa7cadee6bbcd47aca2968958089976f95afd9512
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2d9a8d6795192f637c7fd56cae539e7d3120cfadfde689a0e8f65910f3e46c
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4630961c792966353518425385bb6bdec4ee01ada3092767c85b23a263ff78d
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d52f666587ac029c213c632acf57f59dae1c5a9631fcd09bc194b4473efa6bd1
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0df359f92a67934fe621c77a319e3203ed8bf9f11020a6732c84063a23dd6bca
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f996ce44acaa379abe63ccd766fc8a9cc3ce72bbc46d7af068e024d3c0760985
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f216ea40a4443b0f449133ae4ca79e6899c677cb6c40f87fbb71204a9e2a38ba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8936bfdedeac688941b6f290fca3503d9a0d3f36a293d20a5b181c2f84403d43
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.396539568901062,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2550",
4
- "epoch": 0.966824644549763,
5
  "eval_steps": 150,
6
- "global_step": 2550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1936,6 +1936,62 @@
1936
  "eval_samples_per_second": 21.01,
1937
  "eval_steps_per_second": 5.255,
1938
  "step": 2550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1939
  }
1940
  ],
1941
  "logging_steps": 10,
@@ -1959,12 +2015,12 @@
1959
  "should_evaluate": false,
1960
  "should_log": false,
1961
  "should_save": true,
1962
- "should_training_stop": false
1963
  },
1964
  "attributes": {}
1965
  }
1966
  },
1967
- "total_flos": 2.2657873952762757e+18,
1968
  "train_batch_size": 8,
1969
  "trial_name": null,
1970
  "trial_params": null
 
1
  {
2
  "best_metric": 0.396539568901062,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2550",
4
+ "epoch": 1.0001895734597157,
5
  "eval_steps": 150,
6
+ "global_step": 2638,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1936
  "eval_samples_per_second": 21.01,
1937
  "eval_steps_per_second": 5.255,
1938
  "step": 2550
1939
+ },
1940
+ {
1941
+ "epoch": 0.9706161137440759,
1942
+ "grad_norm": 0.7466861605644226,
1943
+ "learning_rate": 2.239628211639977e-07,
1944
+ "loss": 0.6012,
1945
+ "step": 2560
1946
+ },
1947
+ {
1948
+ "epoch": 0.9744075829383886,
1949
+ "grad_norm": 0.7521525621414185,
1950
+ "learning_rate": 1.7024815810632865e-07,
1951
+ "loss": 0.4468,
1952
+ "step": 2570
1953
+ },
1954
+ {
1955
+ "epoch": 0.9781990521327014,
1956
+ "grad_norm": 0.7282313704490662,
1957
+ "learning_rate": 1.2387617489619253e-07,
1958
+ "loss": 0.3767,
1959
+ "step": 2580
1960
+ },
1961
+ {
1962
+ "epoch": 0.9819905213270143,
1963
+ "grad_norm": 0.6949441432952881,
1964
+ "learning_rate": 8.485370469277776e-08,
1965
+ "loss": 0.3192,
1966
+ "step": 2590
1967
+ },
1968
+ {
1969
+ "epoch": 0.985781990521327,
1970
+ "grad_norm": 0.6286324262619019,
1971
+ "learning_rate": 5.3186497665308474e-08,
1972
+ "loss": 0.2562,
1973
+ "step": 2600
1974
+ },
1975
+ {
1976
+ "epoch": 0.9895734597156398,
1977
+ "grad_norm": 0.6858911514282227,
1978
+ "learning_rate": 2.8879220145727748e-08,
1979
+ "loss": 0.5689,
1980
+ "step": 2610
1981
+ },
1982
+ {
1983
+ "epoch": 0.9933649289099526,
1984
+ "grad_norm": 0.7098546624183655,
1985
+ "learning_rate": 1.193545394109763e-08,
1986
+ "loss": 0.3939,
1987
+ "step": 2620
1988
+ },
1989
+ {
1990
+ "epoch": 0.9971563981042654,
1991
+ "grad_norm": 0.6426143050193787,
1992
+ "learning_rate": 2.3576958057880406e-09,
1993
+ "loss": 0.3271,
1994
+ "step": 2630
1995
  }
1996
  ],
1997
  "logging_steps": 10,
 
2015
  "should_evaluate": false,
2016
  "should_log": false,
2017
  "should_save": true,
2018
+ "should_training_stop": true
2019
  },
2020
  "attributes": {}
2021
  }
2022
  },
2023
+ "total_flos": 2.3448665055370936e+18,
2024
  "train_batch_size": 8,
2025
  "trial_name": null,
2026
  "trial_params": null