alexrodpas commited on
Commit
7690249
·
1 Parent(s): dd1f686

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +547 -0
trainer_state.json ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 44368,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "learning_rate": 1.9776866209880996e-05,
14
+ "loss": 1.6544,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 0.09,
19
+ "learning_rate": 1.9551478543094125e-05,
20
+ "loss": 1.4272,
21
+ "step": 1000
22
+ },
23
+ {
24
+ "epoch": 0.14,
25
+ "learning_rate": 1.932609087630725e-05,
26
+ "loss": 1.3783,
27
+ "step": 1500
28
+ },
29
+ {
30
+ "epoch": 0.18,
31
+ "learning_rate": 1.9100703209520376e-05,
32
+ "loss": 1.324,
33
+ "step": 2000
34
+ },
35
+ {
36
+ "epoch": 0.23,
37
+ "learning_rate": 1.8875315542733504e-05,
38
+ "loss": 1.2637,
39
+ "step": 2500
40
+ },
41
+ {
42
+ "epoch": 0.27,
43
+ "learning_rate": 1.864992787594663e-05,
44
+ "loss": 1.2255,
45
+ "step": 3000
46
+ },
47
+ {
48
+ "epoch": 0.32,
49
+ "learning_rate": 1.8424540209159756e-05,
50
+ "loss": 1.211,
51
+ "step": 3500
52
+ },
53
+ {
54
+ "epoch": 0.36,
55
+ "learning_rate": 1.8199603317706455e-05,
56
+ "loss": 1.1977,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.41,
61
+ "learning_rate": 1.7974215650919584e-05,
62
+ "loss": 1.1879,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.45,
67
+ "learning_rate": 1.774882798413271e-05,
68
+ "loss": 1.1487,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.5,
73
+ "learning_rate": 1.7523440317345835e-05,
74
+ "loss": 1.1427,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.54,
79
+ "learning_rate": 1.7298052650558964e-05,
80
+ "loss": 1.1066,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.59,
85
+ "learning_rate": 1.707266498377209e-05,
86
+ "loss": 1.1576,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.63,
91
+ "learning_rate": 1.6847277316985215e-05,
92
+ "loss": 1.0895,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 0.68,
97
+ "learning_rate": 1.662188965019834e-05,
98
+ "loss": 1.0893,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 0.72,
103
+ "learning_rate": 1.639650198341147e-05,
104
+ "loss": 1.0693,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 0.77,
109
+ "learning_rate": 1.6171114316624595e-05,
110
+ "loss": 1.0979,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 0.81,
115
+ "learning_rate": 1.5945726649837724e-05,
116
+ "loss": 1.0202,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 0.86,
121
+ "learning_rate": 1.572033898305085e-05,
122
+ "loss": 1.0574,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 0.9,
127
+ "learning_rate": 1.549540209159755e-05,
128
+ "loss": 0.9914,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 0.95,
133
+ "learning_rate": 1.5270014424810675e-05,
134
+ "loss": 1.0377,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 0.99,
139
+ "learning_rate": 1.5045077533357375e-05,
140
+ "loss": 0.9989,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 1.04,
145
+ "learning_rate": 1.4820140641904077e-05,
146
+ "loss": 0.8036,
147
+ "step": 11500
148
+ },
149
+ {
150
+ "epoch": 1.08,
151
+ "learning_rate": 1.4594752975117204e-05,
152
+ "loss": 0.7445,
153
+ "step": 12000
154
+ },
155
+ {
156
+ "epoch": 1.13,
157
+ "learning_rate": 1.436936530833033e-05,
158
+ "loss": 0.7566,
159
+ "step": 12500
160
+ },
161
+ {
162
+ "epoch": 1.17,
163
+ "learning_rate": 1.4143977641543456e-05,
164
+ "loss": 0.7529,
165
+ "step": 13000
166
+ },
167
+ {
168
+ "epoch": 1.22,
169
+ "learning_rate": 1.3918589974756582e-05,
170
+ "loss": 0.7446,
171
+ "step": 13500
172
+ },
173
+ {
174
+ "epoch": 1.26,
175
+ "learning_rate": 1.369320230796971e-05,
176
+ "loss": 0.7539,
177
+ "step": 14000
178
+ },
179
+ {
180
+ "epoch": 1.31,
181
+ "learning_rate": 1.3468265416516409e-05,
182
+ "loss": 0.7389,
183
+ "step": 14500
184
+ },
185
+ {
186
+ "epoch": 1.35,
187
+ "learning_rate": 1.3242877749729536e-05,
188
+ "loss": 0.7502,
189
+ "step": 15000
190
+ },
191
+ {
192
+ "epoch": 1.4,
193
+ "learning_rate": 1.3017940858276236e-05,
194
+ "loss": 0.7319,
195
+ "step": 15500
196
+ },
197
+ {
198
+ "epoch": 1.44,
199
+ "learning_rate": 1.2792553191489363e-05,
200
+ "loss": 0.7732,
201
+ "step": 16000
202
+ },
203
+ {
204
+ "epoch": 1.49,
205
+ "learning_rate": 1.2567165524702489e-05,
206
+ "loss": 0.7458,
207
+ "step": 16500
208
+ },
209
+ {
210
+ "epoch": 1.53,
211
+ "learning_rate": 1.2341777857915616e-05,
212
+ "loss": 0.7277,
213
+ "step": 17000
214
+ },
215
+ {
216
+ "epoch": 1.58,
217
+ "learning_rate": 1.2116390191128742e-05,
218
+ "loss": 0.7559,
219
+ "step": 17500
220
+ },
221
+ {
222
+ "epoch": 1.62,
223
+ "learning_rate": 1.1891002524341869e-05,
224
+ "loss": 0.7572,
225
+ "step": 18000
226
+ },
227
+ {
228
+ "epoch": 1.67,
229
+ "learning_rate": 1.1665614857554996e-05,
230
+ "loss": 0.7387,
231
+ "step": 18500
232
+ },
233
+ {
234
+ "epoch": 1.71,
235
+ "learning_rate": 1.1440227190768123e-05,
236
+ "loss": 0.7502,
237
+ "step": 19000
238
+ },
239
+ {
240
+ "epoch": 1.76,
241
+ "learning_rate": 1.1214839523981249e-05,
242
+ "loss": 0.7007,
243
+ "step": 19500
244
+ },
245
+ {
246
+ "epoch": 1.8,
247
+ "learning_rate": 1.0989451857194376e-05,
248
+ "loss": 0.7435,
249
+ "step": 20000
250
+ },
251
+ {
252
+ "epoch": 1.85,
253
+ "learning_rate": 1.0764514965741076e-05,
254
+ "loss": 0.7114,
255
+ "step": 20500
256
+ },
257
+ {
258
+ "epoch": 1.89,
259
+ "learning_rate": 1.0539127298954201e-05,
260
+ "loss": 0.7574,
261
+ "step": 21000
262
+ },
263
+ {
264
+ "epoch": 1.94,
265
+ "learning_rate": 1.0313739632167327e-05,
266
+ "loss": 0.7184,
267
+ "step": 21500
268
+ },
269
+ {
270
+ "epoch": 1.98,
271
+ "learning_rate": 1.0088351965380456e-05,
272
+ "loss": 0.6812,
273
+ "step": 22000
274
+ },
275
+ {
276
+ "epoch": 2.03,
277
+ "learning_rate": 9.862964298593581e-06,
278
+ "loss": 0.5916,
279
+ "step": 22500
280
+ },
281
+ {
282
+ "epoch": 2.07,
283
+ "learning_rate": 9.637576631806709e-06,
284
+ "loss": 0.5152,
285
+ "step": 23000
286
+ },
287
+ {
288
+ "epoch": 2.12,
289
+ "learning_rate": 9.412188965019836e-06,
290
+ "loss": 0.4892,
291
+ "step": 23500
292
+ },
293
+ {
294
+ "epoch": 2.16,
295
+ "learning_rate": 9.187252073566536e-06,
296
+ "loss": 0.5003,
297
+ "step": 24000
298
+ },
299
+ {
300
+ "epoch": 2.21,
301
+ "learning_rate": 8.961864406779663e-06,
302
+ "loss": 0.4962,
303
+ "step": 24500
304
+ },
305
+ {
306
+ "epoch": 2.25,
307
+ "learning_rate": 8.736476739992788e-06,
308
+ "loss": 0.4955,
309
+ "step": 25000
310
+ },
311
+ {
312
+ "epoch": 2.3,
313
+ "learning_rate": 8.511089073205916e-06,
314
+ "loss": 0.5183,
315
+ "step": 25500
316
+ },
317
+ {
318
+ "epoch": 2.34,
319
+ "learning_rate": 8.285701406419041e-06,
320
+ "loss": 0.501,
321
+ "step": 26000
322
+ },
323
+ {
324
+ "epoch": 2.39,
325
+ "learning_rate": 8.060313739632168e-06,
326
+ "loss": 0.4932,
327
+ "step": 26500
328
+ },
329
+ {
330
+ "epoch": 2.43,
331
+ "learning_rate": 7.835376848178868e-06,
332
+ "loss": 0.5092,
333
+ "step": 27000
334
+ },
335
+ {
336
+ "epoch": 2.48,
337
+ "learning_rate": 7.609989181391995e-06,
338
+ "loss": 0.5327,
339
+ "step": 27500
340
+ },
341
+ {
342
+ "epoch": 2.52,
343
+ "learning_rate": 7.384601514605121e-06,
344
+ "loss": 0.4913,
345
+ "step": 28000
346
+ },
347
+ {
348
+ "epoch": 2.57,
349
+ "learning_rate": 7.159213847818248e-06,
350
+ "loss": 0.4862,
351
+ "step": 28500
352
+ },
353
+ {
354
+ "epoch": 2.61,
355
+ "learning_rate": 6.9338261810313745e-06,
356
+ "loss": 0.5005,
357
+ "step": 29000
358
+ },
359
+ {
360
+ "epoch": 2.66,
361
+ "learning_rate": 6.708889289578074e-06,
362
+ "loss": 0.498,
363
+ "step": 29500
364
+ },
365
+ {
366
+ "epoch": 2.7,
367
+ "learning_rate": 6.483952398124775e-06,
368
+ "loss": 0.5037,
369
+ "step": 30000
370
+ },
371
+ {
372
+ "epoch": 2.75,
373
+ "learning_rate": 6.2585647313379015e-06,
374
+ "loss": 0.4922,
375
+ "step": 30500
376
+ },
377
+ {
378
+ "epoch": 2.79,
379
+ "learning_rate": 6.033177064551028e-06,
380
+ "loss": 0.4944,
381
+ "step": 31000
382
+ },
383
+ {
384
+ "epoch": 2.84,
385
+ "learning_rate": 5.807789397764155e-06,
386
+ "loss": 0.4795,
387
+ "step": 31500
388
+ },
389
+ {
390
+ "epoch": 2.88,
391
+ "learning_rate": 5.5824017309772815e-06,
392
+ "loss": 0.498,
393
+ "step": 32000
394
+ },
395
+ {
396
+ "epoch": 2.93,
397
+ "learning_rate": 5.357014064190408e-06,
398
+ "loss": 0.4985,
399
+ "step": 32500
400
+ },
401
+ {
402
+ "epoch": 2.98,
403
+ "learning_rate": 5.131626397403534e-06,
404
+ "loss": 0.4915,
405
+ "step": 33000
406
+ },
407
+ {
408
+ "epoch": 3.02,
409
+ "learning_rate": 4.9062387306166605e-06,
410
+ "loss": 0.4185,
411
+ "step": 33500
412
+ },
413
+ {
414
+ "epoch": 3.07,
415
+ "learning_rate": 4.680851063829788e-06,
416
+ "loss": 0.3421,
417
+ "step": 34000
418
+ },
419
+ {
420
+ "epoch": 3.11,
421
+ "learning_rate": 4.455463397042914e-06,
422
+ "loss": 0.353,
423
+ "step": 34500
424
+ },
425
+ {
426
+ "epoch": 3.16,
427
+ "learning_rate": 4.2300757302560405e-06,
428
+ "loss": 0.33,
429
+ "step": 35000
430
+ },
431
+ {
432
+ "epoch": 3.2,
433
+ "learning_rate": 4.004688063469167e-06,
434
+ "loss": 0.3277,
435
+ "step": 35500
436
+ },
437
+ {
438
+ "epoch": 3.25,
439
+ "learning_rate": 3.779300396682294e-06,
440
+ "loss": 0.3456,
441
+ "step": 36000
442
+ },
443
+ {
444
+ "epoch": 3.29,
445
+ "learning_rate": 3.553912729895421e-06,
446
+ "loss": 0.3295,
447
+ "step": 36500
448
+ },
449
+ {
450
+ "epoch": 3.34,
451
+ "learning_rate": 3.3289758384421207e-06,
452
+ "loss": 0.3293,
453
+ "step": 37000
454
+ },
455
+ {
456
+ "epoch": 3.38,
457
+ "learning_rate": 3.1035881716552475e-06,
458
+ "loss": 0.3478,
459
+ "step": 37500
460
+ },
461
+ {
462
+ "epoch": 3.43,
463
+ "learning_rate": 2.8782005048683735e-06,
464
+ "loss": 0.3639,
465
+ "step": 38000
466
+ },
467
+ {
468
+ "epoch": 3.47,
469
+ "learning_rate": 2.6528128380815007e-06,
470
+ "loss": 0.3333,
471
+ "step": 38500
472
+ },
473
+ {
474
+ "epoch": 3.52,
475
+ "learning_rate": 2.427425171294627e-06,
476
+ "loss": 0.3324,
477
+ "step": 39000
478
+ },
479
+ {
480
+ "epoch": 3.56,
481
+ "learning_rate": 2.2024882798413274e-06,
482
+ "loss": 0.3435,
483
+ "step": 39500
484
+ },
485
+ {
486
+ "epoch": 3.61,
487
+ "learning_rate": 1.9771006130544537e-06,
488
+ "loss": 0.3512,
489
+ "step": 40000
490
+ },
491
+ {
492
+ "epoch": 3.65,
493
+ "learning_rate": 1.7517129462675803e-06,
494
+ "loss": 0.3388,
495
+ "step": 40500
496
+ },
497
+ {
498
+ "epoch": 3.7,
499
+ "learning_rate": 1.526325279480707e-06,
500
+ "loss": 0.3421,
501
+ "step": 41000
502
+ },
503
+ {
504
+ "epoch": 3.74,
505
+ "learning_rate": 1.3009376126938337e-06,
506
+ "loss": 0.3648,
507
+ "step": 41500
508
+ },
509
+ {
510
+ "epoch": 3.79,
511
+ "learning_rate": 1.0760007212405338e-06,
512
+ "loss": 0.3271,
513
+ "step": 42000
514
+ },
515
+ {
516
+ "epoch": 3.83,
517
+ "learning_rate": 8.506130544536603e-07,
518
+ "loss": 0.321,
519
+ "step": 42500
520
+ },
521
+ {
522
+ "epoch": 3.88,
523
+ "learning_rate": 6.256761630003607e-07,
524
+ "loss": 0.3432,
525
+ "step": 43000
526
+ },
527
+ {
528
+ "epoch": 3.92,
529
+ "learning_rate": 4.0028849621348723e-07,
530
+ "loss": 0.322,
531
+ "step": 43500
532
+ },
533
+ {
534
+ "epoch": 3.97,
535
+ "learning_rate": 1.7490082942661378e-07,
536
+ "loss": 0.3316,
537
+ "step": 44000
538
+ }
539
+ ],
540
+ "logging_steps": 500,
541
+ "max_steps": 44368,
542
+ "num_train_epochs": 4,
543
+ "save_steps": 500,
544
+ "total_flos": 6.955379978528563e+16,
545
+ "trial_name": null,
546
+ "trial_params": null
547
+ }