Bastien2 commited on
Commit
d076c88
·
verified ·
1 Parent(s): 730f792

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ library_name: transformers
4
+ tags:
5
+ - autotrain
6
+ - text-classification
7
+ base_model: albert/albert-base-v2
8
+ widget:
9
+ - text: "I love AutoTrain"
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Text Classification
15
+
16
+ ## Validation Metrics
17
+ loss: 0.03531607240438461
18
+
19
+ f1_macro: 1.0
20
+
21
+ f1_micro: 1.0
22
+
23
+ f1_weighted: 1.0
24
+
25
+ precision_macro: 1.0
26
+
27
+ precision_micro: 1.0
28
+
29
+ precision_weighted: 1.0
30
+
31
+ recall_macro: 1.0
32
+
33
+ recall_micro: 1.0
34
+
35
+ recall_weighted: 1.0
36
+
37
+ accuracy: 1.0
checkpoint-114/config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert/albert-base-v2",
3
+ "_num_labels": 4,
4
+ "architectures": [
5
+ "AlbertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0,
8
+ "bos_token_id": 2,
9
+ "classifier_dropout_prob": 0.1,
10
+ "down_scale_factor": 1,
11
+ "embedding_size": 128,
12
+ "eos_token_id": 3,
13
+ "gap_size": 0,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0,
16
+ "hidden_size": 768,
17
+ "id2label": {
18
+ "0": "CLE_DSI_SIEP",
19
+ "1": "CLE_DSI_SIL",
20
+ "2": "CLE_DSI_SPIA",
21
+ "3": "CLE_DSI_SSUR"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "inner_group_num": 1,
25
+ "intermediate_size": 3072,
26
+ "label2id": {
27
+ "CLE_DSI_SIEP": 0,
28
+ "CLE_DSI_SIL": 1,
29
+ "CLE_DSI_SPIA": 2,
30
+ "CLE_DSI_SSUR": 3
31
+ },
32
+ "layer_norm_eps": 1e-12,
33
+ "max_position_embeddings": 512,
34
+ "model_type": "albert",
35
+ "net_structure_type": 0,
36
+ "num_attention_heads": 12,
37
+ "num_hidden_groups": 1,
38
+ "num_hidden_layers": 12,
39
+ "num_memory_blocks": 0,
40
+ "pad_token_id": 0,
41
+ "position_embedding_type": "absolute",
42
+ "problem_type": "single_label_classification",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.48.0",
45
+ "type_vocab_size": 2,
46
+ "vocab_size": 30000
47
+ }
checkpoint-114/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a53bb5c68096335596662cfb704ac41d74c4d5b05e3dd20c859157829902a56
3
+ size 46750064
checkpoint-114/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb0b068389a78e17161299ff344a7723182238d85586afbaa69254b89baf8163
3
+ size 93515533
checkpoint-114/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edbd43132fa4e0fbcd229f68eb6c2a177d44af3d159781934300170bdd357f79
3
+ size 13990
checkpoint-114/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ad0b2dbf10a88fa69bee8140a80b022f98b4ec7f870cd106fce4ffc02b02f3
3
+ size 1064
checkpoint-114/trainer_state.json ADDED
@@ -0,0 +1,894 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.03531607240438461,
3
+ "best_model_checkpoint": "MerlAIn-Base-Albert-007/checkpoint-114",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 114,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02631578947368421,
13
+ "grad_norm": 28.699817657470703,
14
+ "learning_rate": 4.166666666666667e-06,
15
+ "loss": 1.6582,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.05263157894736842,
20
+ "grad_norm": 27.002161026000977,
21
+ "learning_rate": 8.333333333333334e-06,
22
+ "loss": 1.6469,
23
+ "step": 2
24
+ },
25
+ {
26
+ "epoch": 0.07894736842105263,
27
+ "grad_norm": 48.87849044799805,
28
+ "learning_rate": 1.25e-05,
29
+ "loss": 1.5648,
30
+ "step": 3
31
+ },
32
+ {
33
+ "epoch": 0.10526315789473684,
34
+ "grad_norm": 23.19579315185547,
35
+ "learning_rate": 1.6666666666666667e-05,
36
+ "loss": 1.3417,
37
+ "step": 4
38
+ },
39
+ {
40
+ "epoch": 0.13157894736842105,
41
+ "grad_norm": 18.25263786315918,
42
+ "learning_rate": 2.0833333333333336e-05,
43
+ "loss": 1.4482,
44
+ "step": 5
45
+ },
46
+ {
47
+ "epoch": 0.15789473684210525,
48
+ "grad_norm": 25.438217163085938,
49
+ "learning_rate": 2.5e-05,
50
+ "loss": 1.4125,
51
+ "step": 6
52
+ },
53
+ {
54
+ "epoch": 0.18421052631578946,
55
+ "grad_norm": 29.029157638549805,
56
+ "learning_rate": 2.916666666666667e-05,
57
+ "loss": 1.4635,
58
+ "step": 7
59
+ },
60
+ {
61
+ "epoch": 0.21052631578947367,
62
+ "grad_norm": 36.47263717651367,
63
+ "learning_rate": 3.3333333333333335e-05,
64
+ "loss": 1.4031,
65
+ "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.23684210526315788,
69
+ "grad_norm": 28.951688766479492,
70
+ "learning_rate": 3.7500000000000003e-05,
71
+ "loss": 1.4862,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.2631578947368421,
76
+ "grad_norm": 29.594417572021484,
77
+ "learning_rate": 4.166666666666667e-05,
78
+ "loss": 1.1583,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.2894736842105263,
83
+ "grad_norm": 34.775856018066406,
84
+ "learning_rate": 4.5833333333333334e-05,
85
+ "loss": 1.5073,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 0.3157894736842105,
90
+ "grad_norm": 47.184288024902344,
91
+ "learning_rate": 5e-05,
92
+ "loss": 1.43,
93
+ "step": 12
94
+ },
95
+ {
96
+ "epoch": 0.34210526315789475,
97
+ "grad_norm": 32.09071350097656,
98
+ "learning_rate": 4.9509803921568634e-05,
99
+ "loss": 1.299,
100
+ "step": 13
101
+ },
102
+ {
103
+ "epoch": 0.3684210526315789,
104
+ "grad_norm": 38.88185119628906,
105
+ "learning_rate": 4.901960784313725e-05,
106
+ "loss": 1.5853,
107
+ "step": 14
108
+ },
109
+ {
110
+ "epoch": 0.39473684210526316,
111
+ "grad_norm": 38.60731506347656,
112
+ "learning_rate": 4.8529411764705885e-05,
113
+ "loss": 1.546,
114
+ "step": 15
115
+ },
116
+ {
117
+ "epoch": 0.42105263157894735,
118
+ "grad_norm": 31.08054542541504,
119
+ "learning_rate": 4.803921568627452e-05,
120
+ "loss": 1.4833,
121
+ "step": 16
122
+ },
123
+ {
124
+ "epoch": 0.4473684210526316,
125
+ "grad_norm": 35.068878173828125,
126
+ "learning_rate": 4.7549019607843135e-05,
127
+ "loss": 1.379,
128
+ "step": 17
129
+ },
130
+ {
131
+ "epoch": 0.47368421052631576,
132
+ "grad_norm": 36.29180145263672,
133
+ "learning_rate": 4.705882352941177e-05,
134
+ "loss": 1.368,
135
+ "step": 18
136
+ },
137
+ {
138
+ "epoch": 0.5,
139
+ "grad_norm": 43.891021728515625,
140
+ "learning_rate": 4.656862745098039e-05,
141
+ "loss": 1.3128,
142
+ "step": 19
143
+ },
144
+ {
145
+ "epoch": 0.5263157894736842,
146
+ "grad_norm": 33.44991683959961,
147
+ "learning_rate": 4.607843137254902e-05,
148
+ "loss": 1.3283,
149
+ "step": 20
150
+ },
151
+ {
152
+ "epoch": 0.5526315789473685,
153
+ "grad_norm": 25.939502716064453,
154
+ "learning_rate": 4.558823529411765e-05,
155
+ "loss": 1.3224,
156
+ "step": 21
157
+ },
158
+ {
159
+ "epoch": 0.5789473684210527,
160
+ "grad_norm": 36.61793899536133,
161
+ "learning_rate": 4.5098039215686275e-05,
162
+ "loss": 1.2939,
163
+ "step": 22
164
+ },
165
+ {
166
+ "epoch": 0.6052631578947368,
167
+ "grad_norm": 29.500669479370117,
168
+ "learning_rate": 4.460784313725491e-05,
169
+ "loss": 1.3616,
170
+ "step": 23
171
+ },
172
+ {
173
+ "epoch": 0.631578947368421,
174
+ "grad_norm": 36.494136810302734,
175
+ "learning_rate": 4.411764705882353e-05,
176
+ "loss": 1.3781,
177
+ "step": 24
178
+ },
179
+ {
180
+ "epoch": 0.6578947368421053,
181
+ "grad_norm": 40.84477615356445,
182
+ "learning_rate": 4.362745098039216e-05,
183
+ "loss": 1.3689,
184
+ "step": 25
185
+ },
186
+ {
187
+ "epoch": 0.6842105263157895,
188
+ "grad_norm": 39.84971237182617,
189
+ "learning_rate": 4.313725490196079e-05,
190
+ "loss": 1.328,
191
+ "step": 26
192
+ },
193
+ {
194
+ "epoch": 0.7105263157894737,
195
+ "grad_norm": 43.569313049316406,
196
+ "learning_rate": 4.2647058823529415e-05,
197
+ "loss": 1.3209,
198
+ "step": 27
199
+ },
200
+ {
201
+ "epoch": 0.7368421052631579,
202
+ "grad_norm": 47.90934753417969,
203
+ "learning_rate": 4.215686274509804e-05,
204
+ "loss": 1.305,
205
+ "step": 28
206
+ },
207
+ {
208
+ "epoch": 0.7631578947368421,
209
+ "grad_norm": 25.691852569580078,
210
+ "learning_rate": 4.166666666666667e-05,
211
+ "loss": 1.4197,
212
+ "step": 29
213
+ },
214
+ {
215
+ "epoch": 0.7894736842105263,
216
+ "grad_norm": 24.112335205078125,
217
+ "learning_rate": 4.11764705882353e-05,
218
+ "loss": 1.2483,
219
+ "step": 30
220
+ },
221
+ {
222
+ "epoch": 0.8157894736842105,
223
+ "grad_norm": 151.701904296875,
224
+ "learning_rate": 4.068627450980392e-05,
225
+ "loss": 1.3182,
226
+ "step": 31
227
+ },
228
+ {
229
+ "epoch": 0.8421052631578947,
230
+ "grad_norm": 65.58763885498047,
231
+ "learning_rate": 4.0196078431372555e-05,
232
+ "loss": 1.3291,
233
+ "step": 32
234
+ },
235
+ {
236
+ "epoch": 0.868421052631579,
237
+ "grad_norm": 89.37367248535156,
238
+ "learning_rate": 3.970588235294117e-05,
239
+ "loss": 1.4494,
240
+ "step": 33
241
+ },
242
+ {
243
+ "epoch": 0.8947368421052632,
244
+ "grad_norm": 73.70407104492188,
245
+ "learning_rate": 3.9215686274509805e-05,
246
+ "loss": 1.3755,
247
+ "step": 34
248
+ },
249
+ {
250
+ "epoch": 0.9210526315789473,
251
+ "grad_norm": 49.350311279296875,
252
+ "learning_rate": 3.872549019607844e-05,
253
+ "loss": 1.2785,
254
+ "step": 35
255
+ },
256
+ {
257
+ "epoch": 0.9473684210526315,
258
+ "grad_norm": 54.894290924072266,
259
+ "learning_rate": 3.8235294117647055e-05,
260
+ "loss": 1.2477,
261
+ "step": 36
262
+ },
263
+ {
264
+ "epoch": 0.9736842105263158,
265
+ "grad_norm": 67.32546997070312,
266
+ "learning_rate": 3.774509803921569e-05,
267
+ "loss": 1.2391,
268
+ "step": 37
269
+ },
270
+ {
271
+ "epoch": 1.0,
272
+ "grad_norm": 52.955101013183594,
273
+ "learning_rate": 3.725490196078432e-05,
274
+ "loss": 1.3952,
275
+ "step": 38
276
+ },
277
+ {
278
+ "epoch": 1.0,
279
+ "eval_accuracy": 0.43333333333333335,
280
+ "eval_f1_macro": 0.39544493983232065,
281
+ "eval_f1_micro": 0.43333333333333335,
282
+ "eval_f1_weighted": 0.3954449398323206,
283
+ "eval_loss": 1.2776685953140259,
284
+ "eval_precision_macro": 0.4938785173160173,
285
+ "eval_precision_micro": 0.43333333333333335,
286
+ "eval_precision_weighted": 0.49387851731601734,
287
+ "eval_recall_macro": 0.43333333333333335,
288
+ "eval_recall_micro": 0.43333333333333335,
289
+ "eval_recall_weighted": 0.43333333333333335,
290
+ "eval_runtime": 62.8708,
291
+ "eval_samples_per_second": 0.954,
292
+ "eval_steps_per_second": 0.032,
293
+ "step": 38
294
+ },
295
+ {
296
+ "epoch": 1.0263157894736843,
297
+ "grad_norm": 59.585609436035156,
298
+ "learning_rate": 3.6764705882352945e-05,
299
+ "loss": 1.2837,
300
+ "step": 39
301
+ },
302
+ {
303
+ "epoch": 1.0526315789473684,
304
+ "grad_norm": 52.22113800048828,
305
+ "learning_rate": 3.627450980392157e-05,
306
+ "loss": 1.2439,
307
+ "step": 40
308
+ },
309
+ {
310
+ "epoch": 1.0789473684210527,
311
+ "grad_norm": 54.0033073425293,
312
+ "learning_rate": 3.5784313725490195e-05,
313
+ "loss": 1.1426,
314
+ "step": 41
315
+ },
316
+ {
317
+ "epoch": 1.1052631578947367,
318
+ "grad_norm": 36.3646354675293,
319
+ "learning_rate": 3.529411764705883e-05,
320
+ "loss": 1.1503,
321
+ "step": 42
322
+ },
323
+ {
324
+ "epoch": 1.131578947368421,
325
+ "grad_norm": 83.59153747558594,
326
+ "learning_rate": 3.480392156862745e-05,
327
+ "loss": 1.309,
328
+ "step": 43
329
+ },
330
+ {
331
+ "epoch": 1.1578947368421053,
332
+ "grad_norm": 89.75745391845703,
333
+ "learning_rate": 3.431372549019608e-05,
334
+ "loss": 1.2147,
335
+ "step": 44
336
+ },
337
+ {
338
+ "epoch": 1.1842105263157894,
339
+ "grad_norm": 47.33842849731445,
340
+ "learning_rate": 3.382352941176471e-05,
341
+ "loss": 1.1673,
342
+ "step": 45
343
+ },
344
+ {
345
+ "epoch": 1.2105263157894737,
346
+ "grad_norm": 42.632164001464844,
347
+ "learning_rate": 3.3333333333333335e-05,
348
+ "loss": 1.1007,
349
+ "step": 46
350
+ },
351
+ {
352
+ "epoch": 1.236842105263158,
353
+ "grad_norm": 46.119991302490234,
354
+ "learning_rate": 3.284313725490196e-05,
355
+ "loss": 1.04,
356
+ "step": 47
357
+ },
358
+ {
359
+ "epoch": 1.263157894736842,
360
+ "grad_norm": 39.25000762939453,
361
+ "learning_rate": 3.235294117647059e-05,
362
+ "loss": 1.023,
363
+ "step": 48
364
+ },
365
+ {
366
+ "epoch": 1.2894736842105263,
367
+ "grad_norm": 34.97711944580078,
368
+ "learning_rate": 3.186274509803922e-05,
369
+ "loss": 1.0791,
370
+ "step": 49
371
+ },
372
+ {
373
+ "epoch": 1.3157894736842106,
374
+ "grad_norm": 45.260520935058594,
375
+ "learning_rate": 3.137254901960784e-05,
376
+ "loss": 0.9704,
377
+ "step": 50
378
+ },
379
+ {
380
+ "epoch": 1.3421052631578947,
381
+ "grad_norm": 36.56230163574219,
382
+ "learning_rate": 3.0882352941176475e-05,
383
+ "loss": 1.0815,
384
+ "step": 51
385
+ },
386
+ {
387
+ "epoch": 1.368421052631579,
388
+ "grad_norm": 28.379478454589844,
389
+ "learning_rate": 3.0392156862745097e-05,
390
+ "loss": 0.8439,
391
+ "step": 52
392
+ },
393
+ {
394
+ "epoch": 1.3947368421052633,
395
+ "grad_norm": 31.5749568939209,
396
+ "learning_rate": 2.9901960784313725e-05,
397
+ "loss": 0.9349,
398
+ "step": 53
399
+ },
400
+ {
401
+ "epoch": 1.4210526315789473,
402
+ "grad_norm": 33.99452590942383,
403
+ "learning_rate": 2.9411764705882354e-05,
404
+ "loss": 0.81,
405
+ "step": 54
406
+ },
407
+ {
408
+ "epoch": 1.4473684210526316,
409
+ "grad_norm": 19.833820343017578,
410
+ "learning_rate": 2.8921568627450986e-05,
411
+ "loss": 0.8103,
412
+ "step": 55
413
+ },
414
+ {
415
+ "epoch": 1.4736842105263157,
416
+ "grad_norm": 17.318157196044922,
417
+ "learning_rate": 2.8431372549019608e-05,
418
+ "loss": 0.6071,
419
+ "step": 56
420
+ },
421
+ {
422
+ "epoch": 1.5,
423
+ "grad_norm": 28.025053024291992,
424
+ "learning_rate": 2.7941176470588236e-05,
425
+ "loss": 0.8289,
426
+ "step": 57
427
+ },
428
+ {
429
+ "epoch": 1.526315789473684,
430
+ "grad_norm": 29.336917877197266,
431
+ "learning_rate": 2.7450980392156865e-05,
432
+ "loss": 0.7057,
433
+ "step": 58
434
+ },
435
+ {
436
+ "epoch": 1.5526315789473686,
437
+ "grad_norm": 15.270713806152344,
438
+ "learning_rate": 2.696078431372549e-05,
439
+ "loss": 0.6576,
440
+ "step": 59
441
+ },
442
+ {
443
+ "epoch": 1.5789473684210527,
444
+ "grad_norm": 26.081605911254883,
445
+ "learning_rate": 2.647058823529412e-05,
446
+ "loss": 0.7704,
447
+ "step": 60
448
+ },
449
+ {
450
+ "epoch": 1.6052631578947367,
451
+ "grad_norm": 20.44722557067871,
452
+ "learning_rate": 2.5980392156862747e-05,
453
+ "loss": 0.6667,
454
+ "step": 61
455
+ },
456
+ {
457
+ "epoch": 1.631578947368421,
458
+ "grad_norm": 20.113554000854492,
459
+ "learning_rate": 2.5490196078431373e-05,
460
+ "loss": 0.704,
461
+ "step": 62
462
+ },
463
+ {
464
+ "epoch": 1.6578947368421053,
465
+ "grad_norm": 20.734594345092773,
466
+ "learning_rate": 2.5e-05,
467
+ "loss": 0.6383,
468
+ "step": 63
469
+ },
470
+ {
471
+ "epoch": 1.6842105263157894,
472
+ "grad_norm": 15.862013816833496,
473
+ "learning_rate": 2.4509803921568626e-05,
474
+ "loss": 0.644,
475
+ "step": 64
476
+ },
477
+ {
478
+ "epoch": 1.7105263157894737,
479
+ "grad_norm": 20.326189041137695,
480
+ "learning_rate": 2.401960784313726e-05,
481
+ "loss": 0.7015,
482
+ "step": 65
483
+ },
484
+ {
485
+ "epoch": 1.736842105263158,
486
+ "grad_norm": 8.938244819641113,
487
+ "learning_rate": 2.3529411764705884e-05,
488
+ "loss": 0.4248,
489
+ "step": 66
490
+ },
491
+ {
492
+ "epoch": 1.763157894736842,
493
+ "grad_norm": 9.500786781311035,
494
+ "learning_rate": 2.303921568627451e-05,
495
+ "loss": 0.3742,
496
+ "step": 67
497
+ },
498
+ {
499
+ "epoch": 1.7894736842105263,
500
+ "grad_norm": 12.43203353881836,
501
+ "learning_rate": 2.2549019607843138e-05,
502
+ "loss": 0.467,
503
+ "step": 68
504
+ },
505
+ {
506
+ "epoch": 1.8157894736842106,
507
+ "grad_norm": 10.965188980102539,
508
+ "learning_rate": 2.2058823529411766e-05,
509
+ "loss": 0.4069,
510
+ "step": 69
511
+ },
512
+ {
513
+ "epoch": 1.8421052631578947,
514
+ "grad_norm": 38.74911117553711,
515
+ "learning_rate": 2.1568627450980395e-05,
516
+ "loss": 0.5155,
517
+ "step": 70
518
+ },
519
+ {
520
+ "epoch": 1.868421052631579,
521
+ "grad_norm": 19.16448974609375,
522
+ "learning_rate": 2.107843137254902e-05,
523
+ "loss": 0.2913,
524
+ "step": 71
525
+ },
526
+ {
527
+ "epoch": 1.8947368421052633,
528
+ "grad_norm": 20.665557861328125,
529
+ "learning_rate": 2.058823529411765e-05,
530
+ "loss": 0.3385,
531
+ "step": 72
532
+ },
533
+ {
534
+ "epoch": 1.9210526315789473,
535
+ "grad_norm": 14.908136367797852,
536
+ "learning_rate": 2.0098039215686277e-05,
537
+ "loss": 0.2891,
538
+ "step": 73
539
+ },
540
+ {
541
+ "epoch": 1.9473684210526314,
542
+ "grad_norm": 11.937300682067871,
543
+ "learning_rate": 1.9607843137254903e-05,
544
+ "loss": 0.2586,
545
+ "step": 74
546
+ },
547
+ {
548
+ "epoch": 1.973684210526316,
549
+ "grad_norm": 14.315430641174316,
550
+ "learning_rate": 1.9117647058823528e-05,
551
+ "loss": 0.2344,
552
+ "step": 75
553
+ },
554
+ {
555
+ "epoch": 2.0,
556
+ "grad_norm": 23.205062866210938,
557
+ "learning_rate": 1.862745098039216e-05,
558
+ "loss": 0.2475,
559
+ "step": 76
560
+ },
561
+ {
562
+ "epoch": 2.0,
563
+ "eval_accuracy": 0.9833333333333333,
564
+ "eval_f1_macro": 0.9833147942157954,
565
+ "eval_f1_micro": 0.9833333333333333,
566
+ "eval_f1_weighted": 0.9833147942157954,
567
+ "eval_loss": 0.27108410000801086,
568
+ "eval_precision_macro": 0.984375,
569
+ "eval_precision_micro": 0.9833333333333333,
570
+ "eval_precision_weighted": 0.984375,
571
+ "eval_recall_macro": 0.9833333333333334,
572
+ "eval_recall_micro": 0.9833333333333333,
573
+ "eval_recall_weighted": 0.9833333333333333,
574
+ "eval_runtime": 54.6682,
575
+ "eval_samples_per_second": 1.098,
576
+ "eval_steps_per_second": 0.037,
577
+ "step": 76
578
+ },
579
+ {
580
+ "epoch": 2.026315789473684,
581
+ "grad_norm": 22.89622688293457,
582
+ "learning_rate": 1.8137254901960785e-05,
583
+ "loss": 0.2629,
584
+ "step": 77
585
+ },
586
+ {
587
+ "epoch": 2.0526315789473686,
588
+ "grad_norm": 24.035755157470703,
589
+ "learning_rate": 1.7647058823529414e-05,
590
+ "loss": 0.3556,
591
+ "step": 78
592
+ },
593
+ {
594
+ "epoch": 2.0789473684210527,
595
+ "grad_norm": 23.42787742614746,
596
+ "learning_rate": 1.715686274509804e-05,
597
+ "loss": 0.3628,
598
+ "step": 79
599
+ },
600
+ {
601
+ "epoch": 2.1052631578947367,
602
+ "grad_norm": 8.628052711486816,
603
+ "learning_rate": 1.6666666666666667e-05,
604
+ "loss": 0.1729,
605
+ "step": 80
606
+ },
607
+ {
608
+ "epoch": 2.1315789473684212,
609
+ "grad_norm": 6.808111190795898,
610
+ "learning_rate": 1.6176470588235296e-05,
611
+ "loss": 0.1552,
612
+ "step": 81
613
+ },
614
+ {
615
+ "epoch": 2.1578947368421053,
616
+ "grad_norm": 14.299073219299316,
617
+ "learning_rate": 1.568627450980392e-05,
618
+ "loss": 0.3408,
619
+ "step": 82
620
+ },
621
+ {
622
+ "epoch": 2.1842105263157894,
623
+ "grad_norm": 27.81475067138672,
624
+ "learning_rate": 1.5196078431372548e-05,
625
+ "loss": 0.441,
626
+ "step": 83
627
+ },
628
+ {
629
+ "epoch": 2.2105263157894735,
630
+ "grad_norm": 8.623075485229492,
631
+ "learning_rate": 1.4705882352941177e-05,
632
+ "loss": 0.1583,
633
+ "step": 84
634
+ },
635
+ {
636
+ "epoch": 2.236842105263158,
637
+ "grad_norm": 14.49398136138916,
638
+ "learning_rate": 1.4215686274509804e-05,
639
+ "loss": 0.1173,
640
+ "step": 85
641
+ },
642
+ {
643
+ "epoch": 2.263157894736842,
644
+ "grad_norm": 5.406654357910156,
645
+ "learning_rate": 1.3725490196078432e-05,
646
+ "loss": 0.1074,
647
+ "step": 86
648
+ },
649
+ {
650
+ "epoch": 2.2894736842105265,
651
+ "grad_norm": 7.908209800720215,
652
+ "learning_rate": 1.323529411764706e-05,
653
+ "loss": 0.1228,
654
+ "step": 87
655
+ },
656
+ {
657
+ "epoch": 2.3157894736842106,
658
+ "grad_norm": 13.254148483276367,
659
+ "learning_rate": 1.2745098039215686e-05,
660
+ "loss": 0.3625,
661
+ "step": 88
662
+ },
663
+ {
664
+ "epoch": 2.3421052631578947,
665
+ "grad_norm": 8.118396759033203,
666
+ "learning_rate": 1.2254901960784313e-05,
667
+ "loss": 0.1114,
668
+ "step": 89
669
+ },
670
+ {
671
+ "epoch": 2.3684210526315788,
672
+ "grad_norm": 5.151948928833008,
673
+ "learning_rate": 1.1764705882352942e-05,
674
+ "loss": 0.1032,
675
+ "step": 90
676
+ },
677
+ {
678
+ "epoch": 2.3947368421052633,
679
+ "grad_norm": 7.1846418380737305,
680
+ "learning_rate": 1.1274509803921569e-05,
681
+ "loss": 0.0956,
682
+ "step": 91
683
+ },
684
+ {
685
+ "epoch": 2.4210526315789473,
686
+ "grad_norm": 7.737022399902344,
687
+ "learning_rate": 1.0784313725490197e-05,
688
+ "loss": 0.0812,
689
+ "step": 92
690
+ },
691
+ {
692
+ "epoch": 2.4473684210526314,
693
+ "grad_norm": 7.41852331161499,
694
+ "learning_rate": 1.0294117647058824e-05,
695
+ "loss": 0.0817,
696
+ "step": 93
697
+ },
698
+ {
699
+ "epoch": 2.473684210526316,
700
+ "grad_norm": 13.96718692779541,
701
+ "learning_rate": 9.803921568627451e-06,
702
+ "loss": 0.0757,
703
+ "step": 94
704
+ },
705
+ {
706
+ "epoch": 2.5,
707
+ "grad_norm": 8.628068923950195,
708
+ "learning_rate": 9.31372549019608e-06,
709
+ "loss": 0.0818,
710
+ "step": 95
711
+ },
712
+ {
713
+ "epoch": 2.526315789473684,
714
+ "grad_norm": 3.407517433166504,
715
+ "learning_rate": 8.823529411764707e-06,
716
+ "loss": 0.0671,
717
+ "step": 96
718
+ },
719
+ {
720
+ "epoch": 2.5526315789473686,
721
+ "grad_norm": 3.274346113204956,
722
+ "learning_rate": 8.333333333333334e-06,
723
+ "loss": 0.0717,
724
+ "step": 97
725
+ },
726
+ {
727
+ "epoch": 2.5789473684210527,
728
+ "grad_norm": 2.4493625164031982,
729
+ "learning_rate": 7.84313725490196e-06,
730
+ "loss": 0.0559,
731
+ "step": 98
732
+ },
733
+ {
734
+ "epoch": 2.6052631578947367,
735
+ "grad_norm": 1.9850170612335205,
736
+ "learning_rate": 7.3529411764705884e-06,
737
+ "loss": 0.0583,
738
+ "step": 99
739
+ },
740
+ {
741
+ "epoch": 2.6315789473684212,
742
+ "grad_norm": 1.7563467025756836,
743
+ "learning_rate": 6.862745098039216e-06,
744
+ "loss": 0.0567,
745
+ "step": 100
746
+ },
747
+ {
748
+ "epoch": 2.6578947368421053,
749
+ "grad_norm": 4.084621429443359,
750
+ "learning_rate": 6.372549019607843e-06,
751
+ "loss": 0.0549,
752
+ "step": 101
753
+ },
754
+ {
755
+ "epoch": 2.6842105263157894,
756
+ "grad_norm": 3.339154005050659,
757
+ "learning_rate": 5.882352941176471e-06,
758
+ "loss": 0.048,
759
+ "step": 102
760
+ },
761
+ {
762
+ "epoch": 2.7105263157894735,
763
+ "grad_norm": 1.8965953588485718,
764
+ "learning_rate": 5.392156862745099e-06,
765
+ "loss": 0.0447,
766
+ "step": 103
767
+ },
768
+ {
769
+ "epoch": 2.736842105263158,
770
+ "grad_norm": 2.7228641510009766,
771
+ "learning_rate": 4.901960784313726e-06,
772
+ "loss": 0.0389,
773
+ "step": 104
774
+ },
775
+ {
776
+ "epoch": 2.763157894736842,
777
+ "grad_norm": 2.2832393646240234,
778
+ "learning_rate": 4.411764705882353e-06,
779
+ "loss": 0.0436,
780
+ "step": 105
781
+ },
782
+ {
783
+ "epoch": 2.7894736842105265,
784
+ "grad_norm": 1.3695060014724731,
785
+ "learning_rate": 3.92156862745098e-06,
786
+ "loss": 0.0425,
787
+ "step": 106
788
+ },
789
+ {
790
+ "epoch": 2.8157894736842106,
791
+ "grad_norm": 1.2863951921463013,
792
+ "learning_rate": 3.431372549019608e-06,
793
+ "loss": 0.0425,
794
+ "step": 107
795
+ },
796
+ {
797
+ "epoch": 2.8421052631578947,
798
+ "grad_norm": 1.267376184463501,
799
+ "learning_rate": 2.9411764705882355e-06,
800
+ "loss": 0.0401,
801
+ "step": 108
802
+ },
803
+ {
804
+ "epoch": 2.8684210526315788,
805
+ "grad_norm": 1.3001837730407715,
806
+ "learning_rate": 2.450980392156863e-06,
807
+ "loss": 0.0407,
808
+ "step": 109
809
+ },
810
+ {
811
+ "epoch": 2.8947368421052633,
812
+ "grad_norm": 2.756816864013672,
813
+ "learning_rate": 1.96078431372549e-06,
814
+ "loss": 0.0427,
815
+ "step": 110
816
+ },
817
+ {
818
+ "epoch": 2.9210526315789473,
819
+ "grad_norm": 1.4445228576660156,
820
+ "learning_rate": 1.4705882352941177e-06,
821
+ "loss": 0.0469,
822
+ "step": 111
823
+ },
824
+ {
825
+ "epoch": 2.9473684210526314,
826
+ "grad_norm": 1.353753924369812,
827
+ "learning_rate": 9.80392156862745e-07,
828
+ "loss": 0.0395,
829
+ "step": 112
830
+ },
831
+ {
832
+ "epoch": 2.973684210526316,
833
+ "grad_norm": 1.6775199174880981,
834
+ "learning_rate": 4.901960784313725e-07,
835
+ "loss": 0.0403,
836
+ "step": 113
837
+ },
838
+ {
839
+ "epoch": 3.0,
840
+ "grad_norm": 1.318389892578125,
841
+ "learning_rate": 0.0,
842
+ "loss": 0.035,
843
+ "step": 114
844
+ },
845
+ {
846
+ "epoch": 3.0,
847
+ "eval_accuracy": 1.0,
848
+ "eval_f1_macro": 1.0,
849
+ "eval_f1_micro": 1.0,
850
+ "eval_f1_weighted": 1.0,
851
+ "eval_loss": 0.03531607240438461,
852
+ "eval_precision_macro": 1.0,
853
+ "eval_precision_micro": 1.0,
854
+ "eval_precision_weighted": 1.0,
855
+ "eval_recall_macro": 1.0,
856
+ "eval_recall_micro": 1.0,
857
+ "eval_recall_weighted": 1.0,
858
+ "eval_runtime": 58.0125,
859
+ "eval_samples_per_second": 1.034,
860
+ "eval_steps_per_second": 0.034,
861
+ "step": 114
862
+ }
863
+ ],
864
+ "logging_steps": 1,
865
+ "max_steps": 114,
866
+ "num_input_tokens_seen": 0,
867
+ "num_train_epochs": 3,
868
+ "save_steps": 500,
869
+ "stateful_callbacks": {
870
+ "EarlyStoppingCallback": {
871
+ "args": {
872
+ "early_stopping_patience": 5,
873
+ "early_stopping_threshold": 0.01
874
+ },
875
+ "attributes": {
876
+ "early_stopping_patience_counter": 0
877
+ }
878
+ },
879
+ "TrainerControl": {
880
+ "args": {
881
+ "should_epoch_stop": false,
882
+ "should_evaluate": false,
883
+ "should_log": false,
884
+ "should_save": true,
885
+ "should_training_stop": true
886
+ },
887
+ "attributes": {}
888
+ }
889
+ },
890
+ "total_flos": 42881670733824.0,
891
+ "train_batch_size": 16,
892
+ "trial_name": null,
893
+ "trial_params": null
894
+ }
checkpoint-114/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56c092df3ecaffb3a9fcbd09e9402f7d80f3613d3c33212a201618664fc8df2a
3
+ size 5368
config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "albert/albert-base-v2",
3
+ "_num_labels": 4,
4
+ "architectures": [
5
+ "AlbertForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0,
8
+ "bos_token_id": 2,
9
+ "classifier_dropout_prob": 0.1,
10
+ "down_scale_factor": 1,
11
+ "embedding_size": 128,
12
+ "eos_token_id": 3,
13
+ "gap_size": 0,
14
+ "hidden_act": "gelu_new",
15
+ "hidden_dropout_prob": 0,
16
+ "hidden_size": 768,
17
+ "id2label": {
18
+ "0": "CLE_DSI_SIEP",
19
+ "1": "CLE_DSI_SIL",
20
+ "2": "CLE_DSI_SPIA",
21
+ "3": "CLE_DSI_SSUR"
22
+ },
23
+ "initializer_range": 0.02,
24
+ "inner_group_num": 1,
25
+ "intermediate_size": 3072,
26
+ "label2id": {
27
+ "CLE_DSI_SIEP": 0,
28
+ "CLE_DSI_SIL": 1,
29
+ "CLE_DSI_SPIA": 2,
30
+ "CLE_DSI_SSUR": 3
31
+ },
32
+ "layer_norm_eps": 1e-12,
33
+ "max_position_embeddings": 512,
34
+ "model_type": "albert",
35
+ "net_structure_type": 0,
36
+ "num_attention_heads": 12,
37
+ "num_hidden_groups": 1,
38
+ "num_hidden_layers": 12,
39
+ "num_memory_blocks": 0,
40
+ "pad_token_id": 0,
41
+ "position_embedding_type": "absolute",
42
+ "problem_type": "single_label_classification",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.48.0",
45
+ "type_vocab_size": 2,
46
+ "vocab_size": 30000
47
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a53bb5c68096335596662cfb704ac41d74c4d5b05e3dd20c859157829902a56
3
+ size 46750064
runs/Jul04_10-24-21_r-bastien2-gt-inno-8adyb25k-35dd9-pdk18/events.out.tfevents.1751624663.r-bastien2-gt-inno-8adyb25k-35dd9-pdk18.102.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62b42315b308a9ac94142de0c122b0bab3343fd68ec6fce26c0fa266a755ef1b
3
- size 29696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b526109108336e6d1f20000ad7801d5a8fad3cac0ad432f5d24c5b8c68fb5ef
3
+ size 31897
runs/Jul04_10-24-21_r-bastien2-gt-inno-8adyb25k-35dd9-pdk18/events.out.tfevents.1751630575.r-bastien2-gt-inno-8adyb25k-35dd9-pdk18.102.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1439d679b576bd2d6e69ffa6751647a86bbd839677e5997e72be6d8c3450694
3
+ size 906
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": {
6
+ "content": "[MASK]",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "[SEP]",
14
+ "unk_token": "<unk>"
15
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fefb02b667a6c5c2fe27602d28e5fb3428f66ab89c7d6f388e7c8d44a02d0336
3
+ size 760289
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": true,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "keep_accents": false,
51
+ "mask_token": "[MASK]",
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "remove_space": true,
55
+ "sep_token": "[SEP]",
56
+ "tokenizer_class": "AlbertTokenizer",
57
+ "unk_token": "<unk>"
58
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56c092df3ecaffb3a9fcbd09e9402f7d80f3613d3c33212a201618664fc8df2a
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "MerlAIn-Base-Albert-007/autotrain-data",
3
+ "model": "albert/albert-base-v2",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 512,
7
+ "batch_size": 16,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "MerlAIn-Base-Albert-007",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "Bastien2",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }