cimol commited on
Commit
7039bb1
·
verified ·
1 Parent(s): f18b315

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe2ada04e1b192dc4b6d8ad3c4b523c2af0bce420c134c25c3687d18cee6a771
3
  size 231448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:758d8936028f971c6a1dbfd6ce41c233b7bb12c2f09f96a6ae802c17eebd542e
3
  size 231448
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35e410464659180c0d09d9999217f9fc4a4f42fa12177a77632fa7586713ce6b
3
  size 254576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42e3466f52f2dcc5a641090d4992a8bbdbfb0e594ba8e245aebbcabd2a9514d0
3
  size 254576
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4fa8cff5400f5c41601ed4e23d86bd647b9ecd79cbccf4a05ab9b3c387518cc
3
  size 14448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e9d350457f4e3fa2819a39d91d2925669e3fb708dae5b483527de8c6e49d2d3
3
  size 14448
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08a7d2c17ed9294e1527e5a9bde2ad23f23e2f25d5162f4a4bdc1d38d3e802d7
3
  size 14448
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e99c7bce084b3b5f753ff6dd6c4c3531fa235b1767de5e6ba78e4fc50f95a755
3
  size 14448
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c77ba85d98911ad9cf0002f91e1d1e8461a37560e5cecd2480c44d2118b826e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cee25becd0d944312ef6a737028fa37979bb421c6508dd51dcf308113ec99459
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 10.318482398986816,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 0.0016684463427656167,
5
  "eval_steps": 50,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -93,6 +93,84 @@
93
  "eval_samples_per_second": 140.32,
94
  "eval_steps_per_second": 17.548,
95
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  }
97
  ],
98
  "logging_steps": 5,
@@ -116,12 +194,12 @@
116
  "should_evaluate": false,
117
  "should_log": false,
118
  "should_save": true,
119
- "should_training_stop": false
120
  },
121
  "attributes": {}
122
  }
123
  },
124
- "total_flos": 4870317932544.0,
125
  "train_batch_size": 2,
126
  "trial_name": null,
127
  "trial_params": null
 
1
  {
2
+ "best_metric": 10.305120468139648,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.0033368926855312335,
5
  "eval_steps": 50,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
93
  "eval_samples_per_second": 140.32,
94
  "eval_steps_per_second": 17.548,
95
  "step": 50
96
+ },
97
+ {
98
+ "epoch": 0.0018352909770421783,
99
+ "grad_norm": 0.3079063296318054,
100
+ "learning_rate": 3.5e-05,
101
+ "loss": 10.3132,
102
+ "step": 55
103
+ },
104
+ {
105
+ "epoch": 0.00200213561131874,
106
+ "grad_norm": 0.28845730423927307,
107
+ "learning_rate": 2.8922313781657438e-05,
108
+ "loss": 10.3096,
109
+ "step": 60
110
+ },
111
+ {
112
+ "epoch": 0.0021689802455953015,
113
+ "grad_norm": 0.25424379110336304,
114
+ "learning_rate": 2.3029294983601594e-05,
115
+ "loss": 10.3097,
116
+ "step": 65
117
+ },
118
+ {
119
+ "epoch": 0.0023358248798718635,
120
+ "grad_norm": 0.24924302101135254,
121
+ "learning_rate": 1.7500000000000005e-05,
122
+ "loss": 10.3121,
123
+ "step": 70
124
+ },
125
+ {
126
+ "epoch": 0.002502669514148425,
127
+ "grad_norm": 0.2865857481956482,
128
+ "learning_rate": 1.2502433660971121e-05,
129
+ "loss": 10.3141,
130
+ "step": 75
131
+ },
132
+ {
133
+ "epoch": 0.0026695141484249867,
134
+ "grad_norm": 0.24225687980651855,
135
+ "learning_rate": 8.188444490835772e-06,
136
+ "loss": 10.3006,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 0.0028363587827015483,
141
+ "grad_norm": 0.2341487854719162,
142
+ "learning_rate": 4.689110867544645e-06,
143
+ "loss": 10.3027,
144
+ "step": 85
145
+ },
146
+ {
147
+ "epoch": 0.00300320341697811,
148
+ "grad_norm": 0.23641282320022583,
149
+ "learning_rate": 2.1107582724932087e-06,
150
+ "loss": 10.3043,
151
+ "step": 90
152
+ },
153
+ {
154
+ "epoch": 0.0031700480512546715,
155
+ "grad_norm": 0.23910833895206451,
156
+ "learning_rate": 5.317286445727193e-07,
157
+ "loss": 10.3077,
158
+ "step": 95
159
+ },
160
+ {
161
+ "epoch": 0.0033368926855312335,
162
+ "grad_norm": 0.2778906524181366,
163
+ "learning_rate": 0.0,
164
+ "loss": 10.3015,
165
+ "step": 100
166
+ },
167
+ {
168
+ "epoch": 0.0033368926855312335,
169
+ "eval_loss": 10.305120468139648,
170
+ "eval_runtime": 90.3868,
171
+ "eval_samples_per_second": 139.6,
172
+ "eval_steps_per_second": 17.458,
173
+ "step": 100
174
  }
175
  ],
176
  "logging_steps": 5,
 
194
  "should_evaluate": false,
195
  "should_log": false,
196
  "should_save": true,
197
+ "should_training_stop": true
198
  },
199
  "attributes": {}
200
  }
201
  },
202
+ "total_flos": 9699936829440.0,
203
  "train_batch_size": 2,
204
  "trial_name": null,
205
  "trial_params": null