nbroad commited on
Commit
03aa40c
·
verified ·
1 Parent(s): dd1aa39

Training in progress, step 556

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad3eb7b0b94dc689a1899a722c5ebdaf844b0b2852f08dd0577b7772ca3f743
3
  size 174655536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e93418ab0ec9789f1f061bc7bbbc1cda6d53e361a8bd5356d480afe6088887b
3
  size 174655536
wandb/run-20250201_230729-f0utp5v4/files/output.log CHANGED
@@ -264,3 +264,32 @@ The model is not an instance of PreTrainedModel. No liger kernels will be applie
264
  {'loss': 0.1121, 'grad_norm': 2.025880813598633, 'learning_rate': 1.9701317263016395e-06, 'epoch': 0.9}
265
  {'loss': 0.1215, 'grad_norm': 1.783371925354004, 'learning_rate': 1.8379842703837778e-06, 'epoch': 0.9}
266
  {'eval_loss': 0.09919534623622894, 'eval_runtime': 29.508, 'eval_samples_per_second': 16.945, 'eval_steps_per_second': 2.135, 'epoch': 0.9}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  {'loss': 0.1121, 'grad_norm': 2.025880813598633, 'learning_rate': 1.9701317263016395e-06, 'epoch': 0.9}
265
  {'loss': 0.1215, 'grad_norm': 1.783371925354004, 'learning_rate': 1.8379842703837778e-06, 'epoch': 0.9}
266
  {'eval_loss': 0.09919534623622894, 'eval_runtime': 29.508, 'eval_samples_per_second': 16.945, 'eval_steps_per_second': 2.135, 'epoch': 0.9}
267
+ {'loss': 0.0834, 'grad_norm': 2.043714761734009, 'learning_rate': 1.7102839232140643e-06, 'epoch': 0.9}
268
+ {'loss': 0.1209, 'grad_norm': 1.7978439331054688, 'learning_rate': 1.587050850157673e-06, 'epoch': 0.91}
269
+ {'loss': 0.1003, 'grad_norm': 1.8997597694396973, 'learning_rate': 1.4683045111453942e-06, 'epoch': 0.91}
270
+ {'loss': 0.1457, 'grad_norm': 1.7464087009429932, 'learning_rate': 1.3540636576007114e-06, 'epoch': 0.91}
271
+ {'loss': 0.109, 'grad_norm': 2.095072031021118, 'learning_rate': 1.2443463294787215e-06, 'epoch': 0.92}
272
+ {'loss': 0.1198, 'grad_norm': 2.3129279613494873, 'learning_rate': 1.139169852417422e-06, 'epoch': 0.92}
273
+ {'loss': 0.1088, 'grad_norm': 2.199665069580078, 'learning_rate': 1.0385508350017836e-06, 'epoch': 0.92}
274
+ {'loss': 0.098, 'grad_norm': 1.9769052267074585, 'learning_rate': 9.425051661410677e-07, 'epoch': 0.93}
275
+ {'loss': 0.1172, 'grad_norm': 2.117598295211792, 'learning_rate': 8.510480125597809e-07, 'epoch': 0.93}
276
+ {'loss': 0.1381, 'grad_norm': 1.9077558517456055, 'learning_rate': 7.641938164026829e-07, 'epoch': 0.93}
277
+ {'loss': 0.1273, 'grad_norm': 2.680253744125366, 'learning_rate': 6.819562929541956e-07, 'epoch': 0.94}
278
+ {'loss': 0.092, 'grad_norm': 1.4617819786071777, 'learning_rate': 6.043484284725942e-07, 'epoch': 0.94}
279
+ {'loss': 0.1122, 'grad_norm': 1.4491819143295288, 'learning_rate': 5.313824781393417e-07, 'epoch': 0.95}
280
+ {'loss': 0.1137, 'grad_norm': 3.7938284873962402, 'learning_rate': 4.630699641238401e-07, 'epoch': 0.95}
281
+ {'loss': 0.1021, 'grad_norm': 1.7520281076431274, 'learning_rate': 3.9942167376395886e-07, 'epoch': 0.95}
282
+ {'loss': 0.0842, 'grad_norm': 1.836514949798584, 'learning_rate': 3.404476578625815e-07, 'epoch': 0.96}
283
+ {'loss': 0.1288, 'grad_norm': 2.900033950805664, 'learning_rate': 2.861572291004644e-07, 'epoch': 0.96}
284
+ {'loss': 0.0961, 'grad_norm': 1.456766128540039, 'learning_rate': 2.3655896056566706e-07, 'epoch': 0.96}
285
+ {'loss': 0.113, 'grad_norm': 2.9401018619537354, 'learning_rate': 1.9166068439974815e-07, 'epoch': 0.97}
286
+ {'loss': 0.1173, 'grad_norm': 1.9628756046295166, 'learning_rate': 1.5146949056099479e-07, 'epoch': 0.97}
287
+ {'loss': 0.136, 'grad_norm': 2.926396608352661, 'learning_rate': 1.1599172570482486e-07, 'epoch': 0.97}
288
+ {'loss': 0.1254, 'grad_norm': 1.7841299772262573, 'learning_rate': 8.523299218158198e-08, 'epoch': 0.98}
289
+ {'loss': 0.0775, 'grad_norm': 1.310727596282959, 'learning_rate': 5.919814715185323e-08, 'epoch': 0.98}
290
+ {'loss': 0.1036, 'grad_norm': 2.0527987480163574, 'learning_rate': 3.789130181947598e-08, 'epoch': 0.99}
291
+ {'loss': 0.1269, 'grad_norm': 3.3416221141815186, 'learning_rate': 2.1315820782323858e-08, 'epoch': 0.99}
292
+ {'loss': 0.1051, 'grad_norm': 1.6027116775512695, 'learning_rate': 9.474321501001804e-09, 'epoch': 0.99}
293
+ {'loss': 0.0901, 'grad_norm': 1.4576125144958496, 'learning_rate': 2.368673885516648e-09, 'epoch': 1.0}
294
+ {'loss': 0.152, 'grad_norm': 2.3267993927001953, 'learning_rate': 0.0, 'epoch': 1.0}
295
+ {'train_runtime': 6239.1919, 'train_samples_per_second': 5.705, 'train_steps_per_second': 0.089, 'train_loss': 0.14814304485381077, 'epoch': 1.0}
wandb/run-20250201_230729-f0utp5v4/run-f0utp5v4.wandb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa3e5916c559d9097cf20e16e3ea4182e4ae193b1eb93b59c6182e62e6063ca8
3
- size 1146880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fcb3b6d4b6e9b835ee5e15b2ea579970826d1b8e1b3e8234853ca29cbda2b7d
3
+ size 1277952