louislu9911 commited on
Commit
38701ed
·
verified ·
1 Parent(s): d52e137

Training in progress, epoch 0

Browse files
all_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.69,
3
+ "train_loss": 1.3989773021803962,
4
+ "train_runtime": 2686.7823,
5
+ "train_samples_per_second": 215.019,
6
+ "train_steps_per_second": 0.268
7
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65d8af14b76b88a72dba26c9f42886b968beebc64a154174afd93b4d1db2e341
3
  size 241501816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb537367d710dc552253241b93ccdfd63866346a17353f9e474d22fb8771a021
3
  size 241501816
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.69,
3
+ "train_loss": 1.3989773021803962,
4
+ "train_runtime": 2686.7823,
5
+ "train_samples_per_second": 215.019,
6
+ "train_steps_per_second": 0.268
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,732 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8032710280373832,
3
+ "best_model_checkpoint": "resnet-152-finetuned-cassava-leaf-disease/checkpoint-703",
4
+ "epoch": 29.690721649484537,
5
+ "eval_steps": 500,
6
+ "global_step": 720,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.41,
13
+ "learning_rate": 6.944444444444445e-06,
14
+ "loss": 7.3336,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.82,
19
+ "learning_rate": 1.388888888888889e-05,
20
+ "loss": 7.2158,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.99,
25
+ "eval_accuracy": 0.0014018691588785046,
26
+ "eval_loss": 6.815145015716553,
27
+ "eval_runtime": 8.158,
28
+ "eval_samples_per_second": 262.318,
29
+ "eval_steps_per_second": 1.348,
30
+ "step": 24
31
+ },
32
+ {
33
+ "epoch": 1.24,
34
+ "learning_rate": 2.0833333333333336e-05,
35
+ "loss": 6.9635,
36
+ "step": 30
37
+ },
38
+ {
39
+ "epoch": 1.65,
40
+ "learning_rate": 2.777777777777778e-05,
41
+ "loss": 6.6136,
42
+ "step": 40
43
+ },
44
+ {
45
+ "epoch": 1.98,
46
+ "eval_accuracy": 0.046261682242990654,
47
+ "eval_loss": 5.699448108673096,
48
+ "eval_runtime": 8.3972,
49
+ "eval_samples_per_second": 254.846,
50
+ "eval_steps_per_second": 1.31,
51
+ "step": 48
52
+ },
53
+ {
54
+ "epoch": 2.06,
55
+ "learning_rate": 3.472222222222222e-05,
56
+ "loss": 6.1712,
57
+ "step": 50
58
+ },
59
+ {
60
+ "epoch": 2.47,
61
+ "learning_rate": 4.166666666666667e-05,
62
+ "loss": 5.5789,
63
+ "step": 60
64
+ },
65
+ {
66
+ "epoch": 2.89,
67
+ "learning_rate": 4.8611111111111115e-05,
68
+ "loss": 4.7064,
69
+ "step": 70
70
+ },
71
+ {
72
+ "epoch": 2.97,
73
+ "eval_accuracy": 0.45560747663551404,
74
+ "eval_loss": 3.352895975112915,
75
+ "eval_runtime": 7.9592,
76
+ "eval_samples_per_second": 268.872,
77
+ "eval_steps_per_second": 1.382,
78
+ "step": 72
79
+ },
80
+ {
81
+ "epoch": 3.3,
82
+ "learning_rate": 4.938271604938271e-05,
83
+ "loss": 3.8038,
84
+ "step": 80
85
+ },
86
+ {
87
+ "epoch": 3.71,
88
+ "learning_rate": 4.8611111111111115e-05,
89
+ "loss": 3.0888,
90
+ "step": 90
91
+ },
92
+ {
93
+ "epoch": 4.0,
94
+ "eval_accuracy": 0.5920560747663551,
95
+ "eval_loss": 2.1475517749786377,
96
+ "eval_runtime": 8.0136,
97
+ "eval_samples_per_second": 267.048,
98
+ "eval_steps_per_second": 1.373,
99
+ "step": 97
100
+ },
101
+ {
102
+ "epoch": 4.12,
103
+ "learning_rate": 4.783950617283951e-05,
104
+ "loss": 2.5659,
105
+ "step": 100
106
+ },
107
+ {
108
+ "epoch": 4.54,
109
+ "learning_rate": 4.70679012345679e-05,
110
+ "loss": 2.1887,
111
+ "step": 110
112
+ },
113
+ {
114
+ "epoch": 4.95,
115
+ "learning_rate": 4.62962962962963e-05,
116
+ "loss": 1.8458,
117
+ "step": 120
118
+ },
119
+ {
120
+ "epoch": 4.99,
121
+ "eval_accuracy": 0.6285046728971962,
122
+ "eval_loss": 1.588365077972412,
123
+ "eval_runtime": 8.4217,
124
+ "eval_samples_per_second": 254.105,
125
+ "eval_steps_per_second": 1.306,
126
+ "step": 121
127
+ },
128
+ {
129
+ "epoch": 5.36,
130
+ "learning_rate": 4.5524691358024696e-05,
131
+ "loss": 1.6147,
132
+ "step": 130
133
+ },
134
+ {
135
+ "epoch": 5.77,
136
+ "learning_rate": 4.4753086419753084e-05,
137
+ "loss": 1.4168,
138
+ "step": 140
139
+ },
140
+ {
141
+ "epoch": 5.98,
142
+ "eval_accuracy": 0.6588785046728972,
143
+ "eval_loss": 1.2460048198699951,
144
+ "eval_runtime": 7.9049,
145
+ "eval_samples_per_second": 270.718,
146
+ "eval_steps_per_second": 1.392,
147
+ "step": 145
148
+ },
149
+ {
150
+ "epoch": 6.19,
151
+ "learning_rate": 4.3981481481481486e-05,
152
+ "loss": 1.271,
153
+ "step": 150
154
+ },
155
+ {
156
+ "epoch": 6.6,
157
+ "learning_rate": 4.3209876543209875e-05,
158
+ "loss": 1.1951,
159
+ "step": 160
160
+ },
161
+ {
162
+ "epoch": 6.97,
163
+ "eval_accuracy": 0.6869158878504673,
164
+ "eval_loss": 1.0658469200134277,
165
+ "eval_runtime": 7.9886,
166
+ "eval_samples_per_second": 267.881,
167
+ "eval_steps_per_second": 1.377,
168
+ "step": 169
169
+ },
170
+ {
171
+ "epoch": 7.01,
172
+ "learning_rate": 4.243827160493827e-05,
173
+ "loss": 1.1222,
174
+ "step": 170
175
+ },
176
+ {
177
+ "epoch": 7.42,
178
+ "learning_rate": 4.166666666666667e-05,
179
+ "loss": 1.0719,
180
+ "step": 180
181
+ },
182
+ {
183
+ "epoch": 7.84,
184
+ "learning_rate": 4.089506172839506e-05,
185
+ "loss": 0.9871,
186
+ "step": 190
187
+ },
188
+ {
189
+ "epoch": 8.0,
190
+ "eval_accuracy": 0.7037383177570093,
191
+ "eval_loss": 0.9436482191085815,
192
+ "eval_runtime": 8.7539,
193
+ "eval_samples_per_second": 244.461,
194
+ "eval_steps_per_second": 1.257,
195
+ "step": 194
196
+ },
197
+ {
198
+ "epoch": 8.25,
199
+ "learning_rate": 4.012345679012346e-05,
200
+ "loss": 0.9461,
201
+ "step": 200
202
+ },
203
+ {
204
+ "epoch": 8.66,
205
+ "learning_rate": 3.935185185185186e-05,
206
+ "loss": 0.9247,
207
+ "step": 210
208
+ },
209
+ {
210
+ "epoch": 8.99,
211
+ "eval_accuracy": 0.7238317757009346,
212
+ "eval_loss": 0.8639922738075256,
213
+ "eval_runtime": 8.532,
214
+ "eval_samples_per_second": 250.821,
215
+ "eval_steps_per_second": 1.289,
216
+ "step": 218
217
+ },
218
+ {
219
+ "epoch": 9.07,
220
+ "learning_rate": 3.8580246913580246e-05,
221
+ "loss": 0.8806,
222
+ "step": 220
223
+ },
224
+ {
225
+ "epoch": 9.48,
226
+ "learning_rate": 3.780864197530865e-05,
227
+ "loss": 0.8467,
228
+ "step": 230
229
+ },
230
+ {
231
+ "epoch": 9.9,
232
+ "learning_rate": 3.7037037037037037e-05,
233
+ "loss": 0.826,
234
+ "step": 240
235
+ },
236
+ {
237
+ "epoch": 9.98,
238
+ "eval_accuracy": 0.7317757009345794,
239
+ "eval_loss": 0.7976669073104858,
240
+ "eval_runtime": 7.7765,
241
+ "eval_samples_per_second": 275.189,
242
+ "eval_steps_per_second": 1.415,
243
+ "step": 242
244
+ },
245
+ {
246
+ "epoch": 10.31,
247
+ "learning_rate": 3.626543209876543e-05,
248
+ "loss": 0.79,
249
+ "step": 250
250
+ },
251
+ {
252
+ "epoch": 10.72,
253
+ "learning_rate": 3.5493827160493834e-05,
254
+ "loss": 0.7964,
255
+ "step": 260
256
+ },
257
+ {
258
+ "epoch": 10.97,
259
+ "eval_accuracy": 0.7509345794392523,
260
+ "eval_loss": 0.7539175152778625,
261
+ "eval_runtime": 7.6095,
262
+ "eval_samples_per_second": 281.226,
263
+ "eval_steps_per_second": 1.446,
264
+ "step": 266
265
+ },
266
+ {
267
+ "epoch": 11.13,
268
+ "learning_rate": 3.472222222222222e-05,
269
+ "loss": 0.7405,
270
+ "step": 270
271
+ },
272
+ {
273
+ "epoch": 11.55,
274
+ "learning_rate": 3.395061728395062e-05,
275
+ "loss": 0.7471,
276
+ "step": 280
277
+ },
278
+ {
279
+ "epoch": 11.96,
280
+ "learning_rate": 3.317901234567901e-05,
281
+ "loss": 0.7168,
282
+ "step": 290
283
+ },
284
+ {
285
+ "epoch": 12.0,
286
+ "eval_accuracy": 0.7514018691588785,
287
+ "eval_loss": 0.7179247140884399,
288
+ "eval_runtime": 8.7034,
289
+ "eval_samples_per_second": 245.882,
290
+ "eval_steps_per_second": 1.264,
291
+ "step": 291
292
+ },
293
+ {
294
+ "epoch": 12.37,
295
+ "learning_rate": 3.240740740740741e-05,
296
+ "loss": 0.7166,
297
+ "step": 300
298
+ },
299
+ {
300
+ "epoch": 12.78,
301
+ "learning_rate": 3.16358024691358e-05,
302
+ "loss": 0.6893,
303
+ "step": 310
304
+ },
305
+ {
306
+ "epoch": 12.99,
307
+ "eval_accuracy": 0.7630841121495328,
308
+ "eval_loss": 0.6952459812164307,
309
+ "eval_runtime": 8.8433,
310
+ "eval_samples_per_second": 241.991,
311
+ "eval_steps_per_second": 1.244,
312
+ "step": 315
313
+ },
314
+ {
315
+ "epoch": 13.2,
316
+ "learning_rate": 3.08641975308642e-05,
317
+ "loss": 0.6942,
318
+ "step": 320
319
+ },
320
+ {
321
+ "epoch": 13.61,
322
+ "learning_rate": 3.0092592592592593e-05,
323
+ "loss": 0.687,
324
+ "step": 330
325
+ },
326
+ {
327
+ "epoch": 13.98,
328
+ "eval_accuracy": 0.764018691588785,
329
+ "eval_loss": 0.6812998652458191,
330
+ "eval_runtime": 8.1933,
331
+ "eval_samples_per_second": 261.19,
332
+ "eval_steps_per_second": 1.343,
333
+ "step": 339
334
+ },
335
+ {
336
+ "epoch": 14.02,
337
+ "learning_rate": 2.9320987654320992e-05,
338
+ "loss": 0.6621,
339
+ "step": 340
340
+ },
341
+ {
342
+ "epoch": 14.43,
343
+ "learning_rate": 2.8549382716049384e-05,
344
+ "loss": 0.6721,
345
+ "step": 350
346
+ },
347
+ {
348
+ "epoch": 14.85,
349
+ "learning_rate": 2.777777777777778e-05,
350
+ "loss": 0.6496,
351
+ "step": 360
352
+ },
353
+ {
354
+ "epoch": 14.97,
355
+ "eval_accuracy": 0.7691588785046729,
356
+ "eval_loss": 0.6577351689338684,
357
+ "eval_runtime": 7.8611,
358
+ "eval_samples_per_second": 272.227,
359
+ "eval_steps_per_second": 1.399,
360
+ "step": 363
361
+ },
362
+ {
363
+ "epoch": 15.26,
364
+ "learning_rate": 2.700617283950617e-05,
365
+ "loss": 0.6403,
366
+ "step": 370
367
+ },
368
+ {
369
+ "epoch": 15.67,
370
+ "learning_rate": 2.623456790123457e-05,
371
+ "loss": 0.6477,
372
+ "step": 380
373
+ },
374
+ {
375
+ "epoch": 16.0,
376
+ "eval_accuracy": 0.7813084112149533,
377
+ "eval_loss": 0.6433616280555725,
378
+ "eval_runtime": 7.6827,
379
+ "eval_samples_per_second": 278.548,
380
+ "eval_steps_per_second": 1.432,
381
+ "step": 388
382
+ },
383
+ {
384
+ "epoch": 16.08,
385
+ "learning_rate": 2.5462962962962965e-05,
386
+ "loss": 0.6452,
387
+ "step": 390
388
+ },
389
+ {
390
+ "epoch": 16.49,
391
+ "learning_rate": 2.4691358024691357e-05,
392
+ "loss": 0.6371,
393
+ "step": 400
394
+ },
395
+ {
396
+ "epoch": 16.91,
397
+ "learning_rate": 2.3919753086419755e-05,
398
+ "loss": 0.5992,
399
+ "step": 410
400
+ },
401
+ {
402
+ "epoch": 16.99,
403
+ "eval_accuracy": 0.7771028037383177,
404
+ "eval_loss": 0.6388670802116394,
405
+ "eval_runtime": 8.24,
406
+ "eval_samples_per_second": 259.708,
407
+ "eval_steps_per_second": 1.335,
408
+ "step": 412
409
+ },
410
+ {
411
+ "epoch": 17.32,
412
+ "learning_rate": 2.314814814814815e-05,
413
+ "loss": 0.6016,
414
+ "step": 420
415
+ },
416
+ {
417
+ "epoch": 17.73,
418
+ "learning_rate": 2.2376543209876542e-05,
419
+ "loss": 0.6059,
420
+ "step": 430
421
+ },
422
+ {
423
+ "epoch": 17.98,
424
+ "eval_accuracy": 0.7906542056074767,
425
+ "eval_loss": 0.623566746711731,
426
+ "eval_runtime": 8.516,
427
+ "eval_samples_per_second": 251.293,
428
+ "eval_steps_per_second": 1.292,
429
+ "step": 436
430
+ },
431
+ {
432
+ "epoch": 18.14,
433
+ "learning_rate": 2.1604938271604937e-05,
434
+ "loss": 0.6031,
435
+ "step": 440
436
+ },
437
+ {
438
+ "epoch": 18.56,
439
+ "learning_rate": 2.0833333333333336e-05,
440
+ "loss": 0.6126,
441
+ "step": 450
442
+ },
443
+ {
444
+ "epoch": 18.97,
445
+ "learning_rate": 2.006172839506173e-05,
446
+ "loss": 0.5993,
447
+ "step": 460
448
+ },
449
+ {
450
+ "epoch": 18.97,
451
+ "eval_accuracy": 0.7934579439252336,
452
+ "eval_loss": 0.6181445717811584,
453
+ "eval_runtime": 7.8023,
454
+ "eval_samples_per_second": 274.277,
455
+ "eval_steps_per_second": 1.41,
456
+ "step": 460
457
+ },
458
+ {
459
+ "epoch": 19.38,
460
+ "learning_rate": 1.9290123456790123e-05,
461
+ "loss": 0.5872,
462
+ "step": 470
463
+ },
464
+ {
465
+ "epoch": 19.79,
466
+ "learning_rate": 1.8518518518518518e-05,
467
+ "loss": 0.5802,
468
+ "step": 480
469
+ },
470
+ {
471
+ "epoch": 20.0,
472
+ "eval_accuracy": 0.7911214953271029,
473
+ "eval_loss": 0.611458957195282,
474
+ "eval_runtime": 8.4234,
475
+ "eval_samples_per_second": 254.054,
476
+ "eval_steps_per_second": 1.306,
477
+ "step": 485
478
+ },
479
+ {
480
+ "epoch": 20.21,
481
+ "learning_rate": 1.7746913580246917e-05,
482
+ "loss": 0.6008,
483
+ "step": 490
484
+ },
485
+ {
486
+ "epoch": 20.62,
487
+ "learning_rate": 1.697530864197531e-05,
488
+ "loss": 0.5905,
489
+ "step": 500
490
+ },
491
+ {
492
+ "epoch": 20.99,
493
+ "eval_accuracy": 0.7929906542056074,
494
+ "eval_loss": 0.6005003452301025,
495
+ "eval_runtime": 7.7149,
496
+ "eval_samples_per_second": 277.386,
497
+ "eval_steps_per_second": 1.426,
498
+ "step": 509
499
+ },
500
+ {
501
+ "epoch": 21.03,
502
+ "learning_rate": 1.6203703703703704e-05,
503
+ "loss": 0.5861,
504
+ "step": 510
505
+ },
506
+ {
507
+ "epoch": 21.44,
508
+ "learning_rate": 1.54320987654321e-05,
509
+ "loss": 0.5865,
510
+ "step": 520
511
+ },
512
+ {
513
+ "epoch": 21.86,
514
+ "learning_rate": 1.4660493827160496e-05,
515
+ "loss": 0.5825,
516
+ "step": 530
517
+ },
518
+ {
519
+ "epoch": 21.98,
520
+ "eval_accuracy": 0.7953271028037383,
521
+ "eval_loss": 0.5945016145706177,
522
+ "eval_runtime": 7.5452,
523
+ "eval_samples_per_second": 283.623,
524
+ "eval_steps_per_second": 1.458,
525
+ "step": 533
526
+ },
527
+ {
528
+ "epoch": 22.27,
529
+ "learning_rate": 1.388888888888889e-05,
530
+ "loss": 0.5732,
531
+ "step": 540
532
+ },
533
+ {
534
+ "epoch": 22.68,
535
+ "learning_rate": 1.3117283950617285e-05,
536
+ "loss": 0.5837,
537
+ "step": 550
538
+ },
539
+ {
540
+ "epoch": 22.97,
541
+ "eval_accuracy": 0.7981308411214953,
542
+ "eval_loss": 0.5882120132446289,
543
+ "eval_runtime": 8.1333,
544
+ "eval_samples_per_second": 263.116,
545
+ "eval_steps_per_second": 1.352,
546
+ "step": 557
547
+ },
548
+ {
549
+ "epoch": 23.09,
550
+ "learning_rate": 1.2345679012345678e-05,
551
+ "loss": 0.5791,
552
+ "step": 560
553
+ },
554
+ {
555
+ "epoch": 23.51,
556
+ "learning_rate": 1.1574074074074075e-05,
557
+ "loss": 0.5783,
558
+ "step": 570
559
+ },
560
+ {
561
+ "epoch": 23.92,
562
+ "learning_rate": 1.0802469135802469e-05,
563
+ "loss": 0.5501,
564
+ "step": 580
565
+ },
566
+ {
567
+ "epoch": 24.0,
568
+ "eval_accuracy": 0.8023364485981308,
569
+ "eval_loss": 0.5847517251968384,
570
+ "eval_runtime": 8.0927,
571
+ "eval_samples_per_second": 264.435,
572
+ "eval_steps_per_second": 1.359,
573
+ "step": 582
574
+ },
575
+ {
576
+ "epoch": 24.33,
577
+ "learning_rate": 1.0030864197530866e-05,
578
+ "loss": 0.5841,
579
+ "step": 590
580
+ },
581
+ {
582
+ "epoch": 24.74,
583
+ "learning_rate": 9.259259259259259e-06,
584
+ "loss": 0.5593,
585
+ "step": 600
586
+ },
587
+ {
588
+ "epoch": 24.99,
589
+ "eval_accuracy": 0.7962616822429907,
590
+ "eval_loss": 0.5861061811447144,
591
+ "eval_runtime": 7.5867,
592
+ "eval_samples_per_second": 282.074,
593
+ "eval_steps_per_second": 1.45,
594
+ "step": 606
595
+ },
596
+ {
597
+ "epoch": 25.15,
598
+ "learning_rate": 8.487654320987654e-06,
599
+ "loss": 0.5526,
600
+ "step": 610
601
+ },
602
+ {
603
+ "epoch": 25.57,
604
+ "learning_rate": 7.71604938271605e-06,
605
+ "loss": 0.5792,
606
+ "step": 620
607
+ },
608
+ {
609
+ "epoch": 25.98,
610
+ "learning_rate": 6.944444444444445e-06,
611
+ "loss": 0.5454,
612
+ "step": 630
613
+ },
614
+ {
615
+ "epoch": 25.98,
616
+ "eval_accuracy": 0.8014018691588785,
617
+ "eval_loss": 0.5776079893112183,
618
+ "eval_runtime": 7.665,
619
+ "eval_samples_per_second": 279.191,
620
+ "eval_steps_per_second": 1.435,
621
+ "step": 630
622
+ },
623
+ {
624
+ "epoch": 26.39,
625
+ "learning_rate": 6.172839506172839e-06,
626
+ "loss": 0.5477,
627
+ "step": 640
628
+ },
629
+ {
630
+ "epoch": 26.8,
631
+ "learning_rate": 5.401234567901234e-06,
632
+ "loss": 0.5656,
633
+ "step": 650
634
+ },
635
+ {
636
+ "epoch": 26.97,
637
+ "eval_accuracy": 0.7976635514018692,
638
+ "eval_loss": 0.579258143901825,
639
+ "eval_runtime": 7.9989,
640
+ "eval_samples_per_second": 267.536,
641
+ "eval_steps_per_second": 1.375,
642
+ "step": 654
643
+ },
644
+ {
645
+ "epoch": 27.22,
646
+ "learning_rate": 4.6296296296296296e-06,
647
+ "loss": 0.5546,
648
+ "step": 660
649
+ },
650
+ {
651
+ "epoch": 27.63,
652
+ "learning_rate": 3.858024691358025e-06,
653
+ "loss": 0.5708,
654
+ "step": 670
655
+ },
656
+ {
657
+ "epoch": 28.0,
658
+ "eval_accuracy": 0.8004672897196262,
659
+ "eval_loss": 0.5814595818519592,
660
+ "eval_runtime": 8.2466,
661
+ "eval_samples_per_second": 259.502,
662
+ "eval_steps_per_second": 1.334,
663
+ "step": 679
664
+ },
665
+ {
666
+ "epoch": 28.04,
667
+ "learning_rate": 3.0864197530864196e-06,
668
+ "loss": 0.5505,
669
+ "step": 680
670
+ },
671
+ {
672
+ "epoch": 28.45,
673
+ "learning_rate": 2.3148148148148148e-06,
674
+ "loss": 0.5565,
675
+ "step": 690
676
+ },
677
+ {
678
+ "epoch": 28.87,
679
+ "learning_rate": 1.5432098765432098e-06,
680
+ "loss": 0.5541,
681
+ "step": 700
682
+ },
683
+ {
684
+ "epoch": 28.99,
685
+ "eval_accuracy": 0.8032710280373832,
686
+ "eval_loss": 0.5749428868293762,
687
+ "eval_runtime": 8.685,
688
+ "eval_samples_per_second": 246.402,
689
+ "eval_steps_per_second": 1.267,
690
+ "step": 703
691
+ },
692
+ {
693
+ "epoch": 29.28,
694
+ "learning_rate": 7.716049382716049e-07,
695
+ "loss": 0.5511,
696
+ "step": 710
697
+ },
698
+ {
699
+ "epoch": 29.69,
700
+ "learning_rate": 0.0,
701
+ "loss": 0.5439,
702
+ "step": 720
703
+ },
704
+ {
705
+ "epoch": 29.69,
706
+ "eval_accuracy": 0.7995327102803739,
707
+ "eval_loss": 0.5757396221160889,
708
+ "eval_runtime": 7.6093,
709
+ "eval_samples_per_second": 281.236,
710
+ "eval_steps_per_second": 1.446,
711
+ "step": 720
712
+ },
713
+ {
714
+ "epoch": 29.69,
715
+ "step": 720,
716
+ "total_flos": 3.1088341164957254e+19,
717
+ "train_loss": 1.3989773021803962,
718
+ "train_runtime": 2686.7823,
719
+ "train_samples_per_second": 215.019,
720
+ "train_steps_per_second": 0.268
721
+ }
722
+ ],
723
+ "logging_steps": 10,
724
+ "max_steps": 720,
725
+ "num_input_tokens_seen": 0,
726
+ "num_train_epochs": 30,
727
+ "save_steps": 500,
728
+ "total_flos": 3.1088341164957254e+19,
729
+ "train_batch_size": 200,
730
+ "trial_name": null,
731
+ "trial_params": null
732
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffb5eb953c47a0ebf8616e69b5a76e67861971738a45ebc8366f452bc2a13476
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfddf1013031b1a74db418158b66097226c7d8c2e9ed6d9fa0551310fe72edac
3
  size 4728