nbroad commited on
Commit
dd1aa39
·
verified ·
1 Parent(s): 8464743

Training in progress, step 500

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:691508b9afe4a5a54306c4c2dd44c926c6e9505491dc565038253f6d1dffc648
3
  size 174655536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ad3eb7b0b94dc689a1899a722c5ebdaf844b0b2852f08dd0577b7772ca3f743
3
  size 174655536
wandb/run-20250201_230729-f0utp5v4/files/output.log CHANGED
@@ -213,3 +213,54 @@ The model is not an instance of PreTrainedModel. No liger kernels will be applie
213
  {'loss': 0.1094, 'grad_norm': 1.8449875116348267, 'learning_rate': 1.360816959797193e-05, 'epoch': 0.72}
214
  {'loss': 0.1214, 'grad_norm': 1.983917474746704, 'learning_rate': 1.3293731505354372e-05, 'epoch': 0.72}
215
  {'eval_loss': 0.11013749986886978, 'eval_runtime': 29.5522, 'eval_samples_per_second': 16.919, 'eval_steps_per_second': 2.132, 'epoch': 0.72}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  {'loss': 0.1094, 'grad_norm': 1.8449875116348267, 'learning_rate': 1.360816959797193e-05, 'epoch': 0.72}
214
  {'loss': 0.1214, 'grad_norm': 1.983917474746704, 'learning_rate': 1.3293731505354372e-05, 'epoch': 0.72}
215
  {'eval_loss': 0.11013749986886978, 'eval_runtime': 29.5522, 'eval_samples_per_second': 16.919, 'eval_steps_per_second': 2.132, 'epoch': 0.72}
216
+ {'loss': 0.0823, 'grad_norm': 1.5258815288543701, 'learning_rate': 1.298193152619731e-05, 'epoch': 0.72}
217
+ {'loss': 0.1236, 'grad_norm': 3.4213144779205322, 'learning_rate': 1.2672818897331979e-05, 'epoch': 0.73}
218
+ {'loss': 0.108, 'grad_norm': 1.5221023559570312, 'learning_rate': 1.2366442431225809e-05, 'epoch': 0.73}
219
+ {'loss': 0.0905, 'grad_norm': 1.9923921823501587, 'learning_rate': 1.2062850508274445e-05, 'epoch': 0.73}
220
+ {'loss': 0.1252, 'grad_norm': 2.036670446395874, 'learning_rate': 1.1762091069161845e-05, 'epoch': 0.74}
221
+ {'loss': 0.1135, 'grad_norm': 2.5935540199279785, 'learning_rate': 1.1464211607289975e-05, 'epoch': 0.74}
222
+ {'loss': 0.1373, 'grad_norm': 2.882824420928955, 'learning_rate': 1.116925916127899e-05, 'epoch': 0.74}
223
+ {'loss': 0.148, 'grad_norm': 2.0103445053100586, 'learning_rate': 1.0877280307539308e-05, 'epoch': 0.75}
224
+ {'loss': 0.0915, 'grad_norm': 1.70180344581604, 'learning_rate': 1.058832115291668e-05, 'epoch': 0.75}
225
+ {'loss': 0.1477, 'grad_norm': 2.8699634075164795, 'learning_rate': 1.0302427327411311e-05, 'epoch': 0.76}
226
+ {'loss': 0.1234, 'grad_norm': 1.696622610092163, 'learning_rate': 1.0019643976972454e-05, 'epoch': 0.76}
227
+ {'loss': 0.0802, 'grad_norm': 1.2792108058929443, 'learning_rate': 9.740015756369266e-06, 'epoch': 0.76}
228
+ {'loss': 0.1025, 'grad_norm': 1.7940163612365723, 'learning_rate': 9.463586822139339e-06, 'epoch': 0.77}
229
+ {'loss': 0.1111, 'grad_norm': 2.0700793266296387, 'learning_rate': 9.190400825615856e-06, 'epoch': 0.77}
230
+ {'loss': 0.1408, 'grad_norm': 2.9282584190368652, 'learning_rate': 8.920500906034529e-06, 'epoch': 0.77}
231
+ {'loss': 0.1124, 'grad_norm': 2.419184923171997, 'learning_rate': 8.65392968372144e-06, 'epoch': 0.78}
232
+ {'loss': 0.1264, 'grad_norm': 3.9152793884277344, 'learning_rate': 8.390729253362793e-06, 'epoch': 0.78}
233
+ {'loss': 0.1009, 'grad_norm': 1.5509711503982544, 'learning_rate': 8.13094117735766e-06, 'epoch': 0.78}
234
+ {'loss': 0.1185, 'grad_norm': 3.02478289604187, 'learning_rate': 7.874606479254786e-06, 'epoch': 0.79}
235
+ {'loss': 0.1245, 'grad_norm': 1.7570151090621948, 'learning_rate': 7.621765637274537e-06, 'epoch': 0.79}
236
+ {'loss': 0.114, 'grad_norm': 1.6860665082931519, 'learning_rate': 7.372458577916884e-06, 'epoch': 0.79}
237
+ {'loss': 0.0903, 'grad_norm': 2.6216647624969482, 'learning_rate': 7.12672466965656e-06, 'epoch': 0.8}
238
+ {'loss': 0.1071, 'grad_norm': 2.3235995769500732, 'learning_rate': 6.884602716726326e-06, 'epoch': 0.8}
239
+ {'loss': 0.1233, 'grad_norm': 1.6174989938735962, 'learning_rate': 6.6461309529893e-06, 'epoch': 0.81}
240
+ {'loss': 0.1144, 'grad_norm': 1.7587708234786987, 'learning_rate': 6.411347035901432e-06, 'epoch': 0.81}
241
+ {'loss': 0.1043, 'grad_norm': 1.5560749769210815, 'learning_rate': 6.1802880405649385e-06, 'epoch': 0.81}
242
+ {'loss': 0.1287, 'grad_norm': 2.0114247798919678, 'learning_rate': 5.952990453873698e-06, 'epoch': 0.82}
243
+ {'loss': 0.1079, 'grad_norm': 1.77411687374115, 'learning_rate': 5.72949016875158e-06, 'epoch': 0.82}
244
+ {'loss': 0.124, 'grad_norm': 1.6035161018371582, 'learning_rate': 5.509822478484482e-06, 'epoch': 0.82}
245
+ {'loss': 0.0856, 'grad_norm': 2.345869779586792, 'learning_rate': 5.294022071147177e-06, 'epoch': 0.83}
246
+ {'loss': 0.1366, 'grad_norm': 2.6405136585235596, 'learning_rate': 5.082123024125616e-06, 'epoch': 0.83}
247
+ {'loss': 0.108, 'grad_norm': 1.511285424232483, 'learning_rate': 4.87415879873575e-06, 'epoch': 0.83}
248
+ {'loss': 0.1356, 'grad_norm': 3.0424914360046387, 'learning_rate': 4.670162234939554e-06, 'epoch': 0.84}
249
+ {'loss': 0.1129, 'grad_norm': 2.102984666824341, 'learning_rate': 4.470165546159248e-06, 'epoch': 0.84}
250
+ {'loss': 0.1101, 'grad_norm': 2.232029676437378, 'learning_rate': 4.274200314190437e-06, 'epoch': 0.84}
251
+ {'loss': 0.1322, 'grad_norm': 2.306741237640381, 'learning_rate': 4.082297484214942e-06, 'epoch': 0.85}
252
+ {'loss': 0.1135, 'grad_norm': 2.2724695205688477, 'learning_rate': 3.894487359914229e-06, 'epoch': 0.85}
253
+ {'loss': 0.0969, 'grad_norm': 1.9427849054336548, 'learning_rate': 3.710799598684099e-06, 'epoch': 0.86}
254
+ {'loss': 0.1525, 'grad_norm': 2.483985424041748, 'learning_rate': 3.531263206951405e-06, 'epoch': 0.86}
255
+ {'loss': 0.1077, 'grad_norm': 1.8495820760726929, 'learning_rate': 3.355906535593666e-06, 'epoch': 0.86}
256
+ {'loss': 0.1095, 'grad_norm': 2.190869092941284, 'learning_rate': 3.1847572754620867e-06, 'epoch': 0.87}
257
+ {'loss': 0.1069, 'grad_norm': 1.6702929735183716, 'learning_rate': 3.017842453008868e-06, 'epoch': 0.87}
258
+ {'loss': 0.1171, 'grad_norm': 1.8601711988449097, 'learning_rate': 2.8551884260194194e-06, 'epoch': 0.87}
259
+ {'loss': 0.1291, 'grad_norm': 1.8374691009521484, 'learning_rate': 2.6968208794501305e-06, 'epoch': 0.88}
260
+ {'loss': 0.0966, 'grad_norm': 1.2284519672393799, 'learning_rate': 2.542764821372474e-06, 'epoch': 0.88}
261
+ {'loss': 0.1233, 'grad_norm': 2.5712902545928955, 'learning_rate': 2.3930445790238887e-06, 'epoch': 0.88}
262
+ {'loss': 0.1153, 'grad_norm': 1.797639012336731, 'learning_rate': 2.247683794966262e-06, 'epoch': 0.89}
263
+ {'loss': 0.1209, 'grad_norm': 1.7352319955825806, 'learning_rate': 2.1067054233524597e-06, 'epoch': 0.89}
264
+ {'loss': 0.1121, 'grad_norm': 2.025880813598633, 'learning_rate': 1.9701317263016395e-06, 'epoch': 0.9}
265
+ {'loss': 0.1215, 'grad_norm': 1.783371925354004, 'learning_rate': 1.8379842703837778e-06, 'epoch': 0.9}
266
+ {'eval_loss': 0.09919534623622894, 'eval_runtime': 29.508, 'eval_samples_per_second': 16.945, 'eval_steps_per_second': 2.135, 'epoch': 0.9}
wandb/run-20250201_230729-f0utp5v4/run-f0utp5v4.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04d02a27f7cbadabe8b49867d2ab66a7721e2e2cc1e19abcf27f6350a822b5e1
3
- size 917504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa3e5916c559d9097cf20e16e3ea4182e4ae193b1eb93b59c6182e62e6063ca8
3
+ size 1146880