Bastien2 commited on Jun 30

Commit

40c6f71

verified ·

1 Parent(s): 42032c4

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

README.md +37 -0
checkpoint-456/config.json +47 -0
checkpoint-456/model.safetensors +3 -0
checkpoint-456/optimizer.pt +3 -0
checkpoint-456/rng_state.pth +3 -0
checkpoint-456/scheduler.pt +3 -0
checkpoint-456/trainer_state.json +3450 -0
checkpoint-456/training_args.bin +3 -0
config.json +47 -0
model.safetensors +3 -0
runs/Jun30_09-14-46_r-bastien2-gt-inno-2d039o9i-771b3-8acwj/events.out.tfevents.1751274888.r-bastien2-gt-inno-2d039o9i-771b3-8acwj.108.0 +2 -2
runs/Jun30_09-14-46_r-bastien2-gt-inno-2d039o9i-771b3-8acwj/events.out.tfevents.1751302564.r-bastien2-gt-inno-2d039o9i-771b3-8acwj.108.1 +3 -0
special_tokens_map.json +15 -0
spiece.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
training_params.json +30 -0

README.md ADDED Viewed

	@@ -0,0 +1,37 @@

+---
+library_name: transformers
+tags:
+- autotrain
+- text-classification
+base_model: albert/albert-base-v2
+widget:
+- text: "I love AutoTrain"
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 0.7394976019859314
+f1_macro: 0.7635683760683761
+f1_micro: 0.7666666666666667
+f1_weighted: 0.763568376068376
+precision_macro: 0.7832887700534759
+precision_micro: 0.7666666666666667
+precision_weighted: 0.783288770053476
+recall_macro: 0.7666666666666666
+recall_micro: 0.7666666666666667
+recall_weighted: 0.7666666666666667
+accuracy: 0.7666666666666667

checkpoint-456/config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "albert/albert-base-v2",
+  "_num_labels": 4,
+  "architectures": [
+    "AlbertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "CLE_DSI_SIEP",
+    "1": "CLE_DSI_SIL",
+    "2": "CLE_DSI_SPIA",
+    "3": "CLE_DSI_SSUR"
+  },
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "label2id": {
+    "CLE_DSI_SIEP": 0,
+    "CLE_DSI_SIL": 1,
+    "CLE_DSI_SPIA": 2,
+    "CLE_DSI_SSUR": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

checkpoint-456/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7de090cbcc2fc88c209787820c086224988f028790b65f74b65f19186c33681f
+size 46750064

checkpoint-456/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fb139b88e9ef22a50055fde14377fc7d2466f823165f07f65f4235334a9c763
+size 93515533

checkpoint-456/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e62cf6403d0b3a75e60a594e89c6ea29b639c4d39069da9b2cb7f1b2caeb2f7
+size 13990

checkpoint-456/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:704cfd08203f995474c39e6e1d8e50b9e297b05822047b677b9d92081ae459e5
+size 1064

checkpoint-456/trainer_state.json ADDED Viewed

	@@ -0,0 +1,3450 @@

+{
+  "best_metric": 0.7394976019859314,
+  "best_model_checkpoint": "MerlAIn-Base-Albert-005/checkpoint-456",
+  "epoch": 12.0,
+  "eval_steps": 500,
+  "global_step": 456,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.02631578947368421,
+      "grad_norm": 39.9515380859375,
+      "learning_rate": 1.0869565217391306e-06,
+      "loss": 1.3383,
+      "step": 1
+    },
+    {
+      "epoch": 0.05263157894736842,
+      "grad_norm": 40.45222091674805,
+      "learning_rate": 2.173913043478261e-06,
+      "loss": 1.557,
+      "step": 2
+    },
+    {
+      "epoch": 0.07894736842105263,
+      "grad_norm": 34.01548385620117,
+      "learning_rate": 3.2608695652173914e-06,
+      "loss": 1.401,
+      "step": 3
+    },
+    {
+      "epoch": 0.10526315789473684,
+      "grad_norm": 43.126163482666016,
+      "learning_rate": 4.347826086956522e-06,
+      "loss": 1.4821,
+      "step": 4
+    },
+    {
+      "epoch": 0.13157894736842105,
+      "grad_norm": 53.70107650756836,
+      "learning_rate": 5.4347826086956525e-06,
+      "loss": 1.3236,
+      "step": 5
+    },
+    {
+      "epoch": 0.15789473684210525,
+      "grad_norm": 35.62350845336914,
+      "learning_rate": 6.521739130434783e-06,
+      "loss": 1.312,
+      "step": 6
+    },
+    {
+      "epoch": 0.18421052631578946,
+      "grad_norm": 38.48075485229492,
+      "learning_rate": 7.608695652173914e-06,
+      "loss": 1.3479,
+      "step": 7
+    },
+    {
+      "epoch": 0.21052631578947367,
+      "grad_norm": 38.26470184326172,
+      "learning_rate": 8.695652173913044e-06,
+      "loss": 1.3377,
+      "step": 8
+    },
+    {
+      "epoch": 0.23684210526315788,
+      "grad_norm": 56.54499816894531,
+      "learning_rate": 9.782608695652175e-06,
+      "loss": 1.3648,
+      "step": 9
+    },
+    {
+      "epoch": 0.2631578947368421,
+      "grad_norm": 28.90220832824707,
+      "learning_rate": 1.0869565217391305e-05,
+      "loss": 1.4225,
+      "step": 10
+    },
+    {
+      "epoch": 0.2894736842105263,
+      "grad_norm": 21.76893424987793,
+      "learning_rate": 1.1956521739130435e-05,
+      "loss": 1.2528,
+      "step": 11
+    },
+    {
+      "epoch": 0.3157894736842105,
+      "grad_norm": 26.75815773010254,
+      "learning_rate": 1.3043478260869566e-05,
+      "loss": 1.4086,
+      "step": 12
+    },
+    {
+      "epoch": 0.34210526315789475,
+      "grad_norm": 41.68316650390625,
+      "learning_rate": 1.4130434782608694e-05,
+      "loss": 1.29,
+      "step": 13
+    },
+    {
+      "epoch": 0.3684210526315789,
+      "grad_norm": 21.34151268005371,
+      "learning_rate": 1.5217391304347828e-05,
+      "loss": 1.3297,
+      "step": 14
+    },
+    {
+      "epoch": 0.39473684210526316,
+      "grad_norm": 27.759029388427734,
+      "learning_rate": 1.630434782608696e-05,
+      "loss": 1.3983,
+      "step": 15
+    },
+    {
+      "epoch": 0.42105263157894735,
+      "grad_norm": 33.36210250854492,
+      "learning_rate": 1.739130434782609e-05,
+      "loss": 1.344,
+      "step": 16
+    },
+    {
+      "epoch": 0.4473684210526316,
+      "grad_norm": 24.369775772094727,
+      "learning_rate": 1.8478260869565216e-05,
+      "loss": 1.3336,
+      "step": 17
+    },
+    {
+      "epoch": 0.47368421052631576,
+      "grad_norm": 18.205625534057617,
+      "learning_rate": 1.956521739130435e-05,
+      "loss": 1.273,
+      "step": 18
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 49.468936920166016,
+      "learning_rate": 2.065217391304348e-05,
+      "loss": 1.2196,
+      "step": 19
+    },
+    {
+      "epoch": 0.5263157894736842,
+      "grad_norm": 21.179851531982422,
+      "learning_rate": 2.173913043478261e-05,
+      "loss": 1.2888,
+      "step": 20
+    },
+    {
+      "epoch": 0.5526315789473685,
+      "grad_norm": 19.813323974609375,
+      "learning_rate": 2.282608695652174e-05,
+      "loss": 1.2947,
+      "step": 21
+    },
+    {
+      "epoch": 0.5789473684210527,
+      "grad_norm": 23.896602630615234,
+      "learning_rate": 2.391304347826087e-05,
+      "loss": 1.363,
+      "step": 22
+    },
+    {
+      "epoch": 0.6052631578947368,
+      "grad_norm": 31.29140281677246,
+      "learning_rate": 2.5e-05,
+      "loss": 1.1668,
+      "step": 23
+    },
+    {
+      "epoch": 0.631578947368421,
+      "grad_norm": 24.209211349487305,
+      "learning_rate": 2.608695652173913e-05,
+      "loss": 1.3319,
+      "step": 24
+    },
+    {
+      "epoch": 0.6578947368421053,
+      "grad_norm": 38.2145881652832,
+      "learning_rate": 2.7173913043478262e-05,
+      "loss": 1.5831,
+      "step": 25
+    },
+    {
+      "epoch": 0.6842105263157895,
+      "grad_norm": 35.99103927612305,
+      "learning_rate": 2.826086956521739e-05,
+      "loss": 1.4907,
+      "step": 26
+    },
+    {
+      "epoch": 0.7105263157894737,
+      "grad_norm": 50.263328552246094,
+      "learning_rate": 2.9347826086956526e-05,
+      "loss": 1.2867,
+      "step": 27
+    },
+    {
+      "epoch": 0.7368421052631579,
+      "grad_norm": 29.839618682861328,
+      "learning_rate": 3.0434782608695656e-05,
+      "loss": 1.1837,
+      "step": 28
+    },
+    {
+      "epoch": 0.7631578947368421,
+      "grad_norm": 12.5934476852417,
+      "learning_rate": 3.152173913043479e-05,
+      "loss": 1.2788,
+      "step": 29
+    },
+    {
+      "epoch": 0.7894736842105263,
+      "grad_norm": 34.49674987792969,
+      "learning_rate": 3.260869565217392e-05,
+      "loss": 1.1644,
+      "step": 30
+    },
+    {
+      "epoch": 0.8157894736842105,
+      "grad_norm": 18.00296974182129,
+      "learning_rate": 3.369565217391305e-05,
+      "loss": 1.2322,
+      "step": 31
+    },
+    {
+      "epoch": 0.8421052631578947,
+      "grad_norm": 42.88111114501953,
+      "learning_rate": 3.478260869565218e-05,
+      "loss": 1.392,
+      "step": 32
+    },
+    {
+      "epoch": 0.868421052631579,
+      "grad_norm": 24.13482093811035,
+      "learning_rate": 3.58695652173913e-05,
+      "loss": 1.2026,
+      "step": 33
+    },
+    {
+      "epoch": 0.8947368421052632,
+      "grad_norm": 17.98135757446289,
+      "learning_rate": 3.695652173913043e-05,
+      "loss": 1.3158,
+      "step": 34
+    },
+    {
+      "epoch": 0.9210526315789473,
+      "grad_norm": 32.5180778503418,
+      "learning_rate": 3.804347826086957e-05,
+      "loss": 1.4483,
+      "step": 35
+    },
+    {
+      "epoch": 0.9473684210526315,
+      "grad_norm": 22.26108169555664,
+      "learning_rate": 3.91304347826087e-05,
+      "loss": 1.4622,
+      "step": 36
+    },
+    {
+      "epoch": 0.9736842105263158,
+      "grad_norm": 23.253055572509766,
+      "learning_rate": 4.021739130434783e-05,
+      "loss": 1.3209,
+      "step": 37
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 37.42800521850586,
+      "learning_rate": 4.130434782608696e-05,
+      "loss": 1.3371,
+      "step": 38
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.31666666666666665,
+      "eval_f1_macro": 0.23189176986584106,
+      "eval_f1_micro": 0.31666666666666665,
+      "eval_f1_weighted": 0.23189176986584106,
+      "eval_loss": 1.3728160858154297,
+      "eval_precision_macro": 0.22630718954248366,
+      "eval_precision_micro": 0.31666666666666665,
+      "eval_precision_weighted": 0.22630718954248366,
+      "eval_recall_macro": 0.31666666666666665,
+      "eval_recall_micro": 0.31666666666666665,
+      "eval_recall_weighted": 0.31666666666666665,
+      "eval_runtime": 61.179,
+      "eval_samples_per_second": 0.981,
+      "eval_steps_per_second": 0.033,
+      "step": 38
+    },
+    {
+      "epoch": 1.0263157894736843,
+      "grad_norm": 37.619869232177734,
+      "learning_rate": 4.239130434782609e-05,
+      "loss": 1.2901,
+      "step": 39
+    },
+    {
+      "epoch": 1.0526315789473684,
+      "grad_norm": 19.8093204498291,
+      "learning_rate": 4.347826086956522e-05,
+      "loss": 1.2963,
+      "step": 40
+    },
+    {
+      "epoch": 1.0789473684210527,
+      "grad_norm": 22.44889259338379,
+      "learning_rate": 4.456521739130435e-05,
+      "loss": 1.0632,
+      "step": 41
+    },
+    {
+      "epoch": 1.1052631578947367,
+      "grad_norm": 42.281307220458984,
+      "learning_rate": 4.565217391304348e-05,
+      "loss": 1.3932,
+      "step": 42
+    },
+    {
+      "epoch": 1.131578947368421,
+      "grad_norm": 33.52845764160156,
+      "learning_rate": 4.673913043478261e-05,
+      "loss": 1.2337,
+      "step": 43
+    },
+    {
+      "epoch": 1.1578947368421053,
+      "grad_norm": 27.728843688964844,
+      "learning_rate": 4.782608695652174e-05,
+      "loss": 1.2061,
+      "step": 44
+    },
+    {
+      "epoch": 1.1842105263157894,
+      "grad_norm": 41.600250244140625,
+      "learning_rate": 4.891304347826087e-05,
+      "loss": 1.5733,
+      "step": 45
+    },
+    {
+      "epoch": 1.2105263157894737,
+      "grad_norm": 17.71347427368164,
+      "learning_rate": 5e-05,
+      "loss": 1.3113,
+      "step": 46
+    },
+    {
+      "epoch": 1.236842105263158,
+      "grad_norm": 68.43779754638672,
+      "learning_rate": 4.987804878048781e-05,
+      "loss": 1.3112,
+      "step": 47
+    },
+    {
+      "epoch": 1.263157894736842,
+      "grad_norm": 40.969791412353516,
+      "learning_rate": 4.975609756097561e-05,
+      "loss": 1.3548,
+      "step": 48
+    },
+    {
+      "epoch": 1.2894736842105263,
+      "grad_norm": 30.01605224609375,
+      "learning_rate": 4.9634146341463415e-05,
+      "loss": 1.2392,
+      "step": 49
+    },
+    {
+      "epoch": 1.3157894736842106,
+      "grad_norm": 19.757888793945312,
+      "learning_rate": 4.951219512195122e-05,
+      "loss": 1.2283,
+      "step": 50
+    },
+    {
+      "epoch": 1.3421052631578947,
+      "grad_norm": 24.616483688354492,
+      "learning_rate": 4.9390243902439024e-05,
+      "loss": 1.3452,
+      "step": 51
+    },
+    {
+      "epoch": 1.368421052631579,
+      "grad_norm": 41.386173248291016,
+      "learning_rate": 4.926829268292683e-05,
+      "loss": 1.1968,
+      "step": 52
+    },
+    {
+      "epoch": 1.3947368421052633,
+      "grad_norm": 47.946109771728516,
+      "learning_rate": 4.914634146341464e-05,
+      "loss": 1.0527,
+      "step": 53
+    },
+    {
+      "epoch": 1.4210526315789473,
+      "grad_norm": 15.215785026550293,
+      "learning_rate": 4.902439024390244e-05,
+      "loss": 1.0679,
+      "step": 54
+    },
+    {
+      "epoch": 1.4473684210526316,
+      "grad_norm": 112.38186645507812,
+      "learning_rate": 4.890243902439025e-05,
+      "loss": 1.4802,
+      "step": 55
+    },
+    {
+      "epoch": 1.4736842105263157,
+      "grad_norm": 44.076480865478516,
+      "learning_rate": 4.878048780487805e-05,
+      "loss": 1.5318,
+      "step": 56
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 28.927885055541992,
+      "learning_rate": 4.8658536585365856e-05,
+      "loss": 1.3589,
+      "step": 57
+    },
+    {
+      "epoch": 1.526315789473684,
+      "grad_norm": 23.650590896606445,
+      "learning_rate": 4.853658536585366e-05,
+      "loss": 1.2349,
+      "step": 58
+    },
+    {
+      "epoch": 1.5526315789473686,
+      "grad_norm": 26.140121459960938,
+      "learning_rate": 4.8414634146341464e-05,
+      "loss": 1.3551,
+      "step": 59
+    },
+    {
+      "epoch": 1.5789473684210527,
+      "grad_norm": 19.280208587646484,
+      "learning_rate": 4.829268292682927e-05,
+      "loss": 1.3492,
+      "step": 60
+    },
+    {
+      "epoch": 1.6052631578947367,
+      "grad_norm": 13.773977279663086,
+      "learning_rate": 4.817073170731707e-05,
+      "loss": 1.4615,
+      "step": 61
+    },
+    {
+      "epoch": 1.631578947368421,
+      "grad_norm": 23.0042724609375,
+      "learning_rate": 4.804878048780488e-05,
+      "loss": 1.3364,
+      "step": 62
+    },
+    {
+      "epoch": 1.6578947368421053,
+      "grad_norm": 10.230228424072266,
+      "learning_rate": 4.792682926829268e-05,
+      "loss": 1.3181,
+      "step": 63
+    },
+    {
+      "epoch": 1.6842105263157894,
+      "grad_norm": 11.699337005615234,
+      "learning_rate": 4.7804878048780485e-05,
+      "loss": 1.4606,
+      "step": 64
+    },
+    {
+      "epoch": 1.7105263157894737,
+      "grad_norm": 9.87794303894043,
+      "learning_rate": 4.7682926829268296e-05,
+      "loss": 1.2379,
+      "step": 65
+    },
+    {
+      "epoch": 1.736842105263158,
+      "grad_norm": 15.70111083984375,
+      "learning_rate": 4.75609756097561e-05,
+      "loss": 1.3018,
+      "step": 66
+    },
+    {
+      "epoch": 1.763157894736842,
+      "grad_norm": 13.613493919372559,
+      "learning_rate": 4.7439024390243905e-05,
+      "loss": 1.1586,
+      "step": 67
+    },
+    {
+      "epoch": 1.7894736842105263,
+      "grad_norm": 10.26297378540039,
+      "learning_rate": 4.731707317073171e-05,
+      "loss": 1.3906,
+      "step": 68
+    },
+    {
+      "epoch": 1.8157894736842106,
+      "grad_norm": 19.95726203918457,
+      "learning_rate": 4.719512195121951e-05,
+      "loss": 1.2608,
+      "step": 69
+    },
+    {
+      "epoch": 1.8421052631578947,
+      "grad_norm": 13.270914077758789,
+      "learning_rate": 4.707317073170732e-05,
+      "loss": 1.4012,
+      "step": 70
+    },
+    {
+      "epoch": 1.868421052631579,
+      "grad_norm": 11.952712059020996,
+      "learning_rate": 4.695121951219512e-05,
+      "loss": 1.051,
+      "step": 71
+    },
+    {
+      "epoch": 1.8947368421052633,
+      "grad_norm": 20.612009048461914,
+      "learning_rate": 4.682926829268293e-05,
+      "loss": 1.2791,
+      "step": 72
+    },
+    {
+      "epoch": 1.9210526315789473,
+      "grad_norm": 16.857328414916992,
+      "learning_rate": 4.670731707317074e-05,
+      "loss": 1.3527,
+      "step": 73
+    },
+    {
+      "epoch": 1.9473684210526314,
+      "grad_norm": 10.960662841796875,
+      "learning_rate": 4.658536585365854e-05,
+      "loss": 1.2018,
+      "step": 74
+    },
+    {
+      "epoch": 1.973684210526316,
+      "grad_norm": 33.91846466064453,
+      "learning_rate": 4.6463414634146345e-05,
+      "loss": 1.3579,
+      "step": 75
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 17.503137588500977,
+      "learning_rate": 4.634146341463415e-05,
+      "loss": 1.219,
+      "step": 76
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.25,
+      "eval_f1_macro": 0.18048340548340547,
+      "eval_f1_micro": 0.25,
+      "eval_f1_weighted": 0.1804834054834055,
+      "eval_loss": 1.4174046516418457,
+      "eval_precision_macro": 0.2144345238095238,
+      "eval_precision_micro": 0.25,
+      "eval_precision_weighted": 0.2144345238095238,
+      "eval_recall_macro": 0.25,
+      "eval_recall_micro": 0.25,
+      "eval_recall_weighted": 0.25,
+      "eval_runtime": 60.4131,
+      "eval_samples_per_second": 0.993,
+      "eval_steps_per_second": 0.033,
+      "step": 76
+    },
+    {
+      "epoch": 2.026315789473684,
+      "grad_norm": 25.20892333984375,
+      "learning_rate": 4.6219512195121954e-05,
+      "loss": 1.2875,
+      "step": 77
+    },
+    {
+      "epoch": 2.0526315789473686,
+      "grad_norm": 24.98311424255371,
+      "learning_rate": 4.609756097560976e-05,
+      "loss": 1.1022,
+      "step": 78
+    },
+    {
+      "epoch": 2.0789473684210527,
+      "grad_norm": 25.237098693847656,
+      "learning_rate": 4.597560975609756e-05,
+      "loss": 1.0668,
+      "step": 79
+    },
+    {
+      "epoch": 2.1052631578947367,
+      "grad_norm": 36.29819107055664,
+      "learning_rate": 4.585365853658537e-05,
+      "loss": 1.3539,
+      "step": 80
+    },
+    {
+      "epoch": 2.1315789473684212,
+      "grad_norm": 35.843807220458984,
+      "learning_rate": 4.573170731707318e-05,
+      "loss": 1.4779,
+      "step": 81
+    },
+    {
+      "epoch": 2.1578947368421053,
+      "grad_norm": 112.81155395507812,
+      "learning_rate": 4.560975609756098e-05,
+      "loss": 1.2617,
+      "step": 82
+    },
+    {
+      "epoch": 2.1842105263157894,
+      "grad_norm": 28.52109718322754,
+      "learning_rate": 4.5487804878048786e-05,
+      "loss": 1.1843,
+      "step": 83
+    },
+    {
+      "epoch": 2.2105263157894735,
+      "grad_norm": 24.48183822631836,
+      "learning_rate": 4.536585365853659e-05,
+      "loss": 1.2935,
+      "step": 84
+    },
+    {
+      "epoch": 2.236842105263158,
+      "grad_norm": 40.971046447753906,
+      "learning_rate": 4.5243902439024394e-05,
+      "loss": 1.3401,
+      "step": 85
+    },
+    {
+      "epoch": 2.263157894736842,
+      "grad_norm": 21.38822364807129,
+      "learning_rate": 4.51219512195122e-05,
+      "loss": 1.0817,
+      "step": 86
+    },
+    {
+      "epoch": 2.2894736842105265,
+      "grad_norm": 20.794170379638672,
+      "learning_rate": 4.5e-05,
+      "loss": 1.1627,
+      "step": 87
+    },
+    {
+      "epoch": 2.3157894736842106,
+      "grad_norm": 21.098356246948242,
+      "learning_rate": 4.487804878048781e-05,
+      "loss": 1.2119,
+      "step": 88
+    },
+    {
+      "epoch": 2.3421052631578947,
+      "grad_norm": 22.802223205566406,
+      "learning_rate": 4.475609756097561e-05,
+      "loss": 1.2992,
+      "step": 89
+    },
+    {
+      "epoch": 2.3684210526315788,
+      "grad_norm": 18.339962005615234,
+      "learning_rate": 4.4634146341463416e-05,
+      "loss": 1.034,
+      "step": 90
+    },
+    {
+      "epoch": 2.3947368421052633,
+      "grad_norm": 53.66396713256836,
+      "learning_rate": 4.451219512195122e-05,
+      "loss": 1.207,
+      "step": 91
+    },
+    {
+      "epoch": 2.4210526315789473,
+      "grad_norm": 19.618972778320312,
+      "learning_rate": 4.4390243902439024e-05,
+      "loss": 1.1393,
+      "step": 92
+    },
+    {
+      "epoch": 2.4473684210526314,
+      "grad_norm": 21.265857696533203,
+      "learning_rate": 4.4268292682926835e-05,
+      "loss": 1.1223,
+      "step": 93
+    },
+    {
+      "epoch": 2.473684210526316,
+      "grad_norm": 23.079221725463867,
+      "learning_rate": 4.414634146341464e-05,
+      "loss": 1.0722,
+      "step": 94
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 21.901897430419922,
+      "learning_rate": 4.4024390243902443e-05,
+      "loss": 1.0003,
+      "step": 95
+    },
+    {
+      "epoch": 2.526315789473684,
+      "grad_norm": 25.181835174560547,
+      "learning_rate": 4.390243902439025e-05,
+      "loss": 1.0741,
+      "step": 96
+    },
+    {
+      "epoch": 2.5526315789473686,
+      "grad_norm": 31.18767547607422,
+      "learning_rate": 4.378048780487805e-05,
+      "loss": 1.0066,
+      "step": 97
+    },
+    {
+      "epoch": 2.5789473684210527,
+      "grad_norm": 60.933631896972656,
+      "learning_rate": 4.3658536585365856e-05,
+      "loss": 1.2893,
+      "step": 98
+    },
+    {
+      "epoch": 2.6052631578947367,
+      "grad_norm": 23.306636810302734,
+      "learning_rate": 4.353658536585366e-05,
+      "loss": 1.2187,
+      "step": 99
+    },
+    {
+      "epoch": 2.6315789473684212,
+      "grad_norm": 30.128135681152344,
+      "learning_rate": 4.3414634146341465e-05,
+      "loss": 0.9419,
+      "step": 100
+    },
+    {
+      "epoch": 2.6578947368421053,
+      "grad_norm": 28.193851470947266,
+      "learning_rate": 4.329268292682927e-05,
+      "loss": 1.0172,
+      "step": 101
+    },
+    {
+      "epoch": 2.6842105263157894,
+      "grad_norm": 54.759422302246094,
+      "learning_rate": 4.317073170731707e-05,
+      "loss": 1.2784,
+      "step": 102
+    },
+    {
+      "epoch": 2.7105263157894735,
+      "grad_norm": 32.32737731933594,
+      "learning_rate": 4.304878048780488e-05,
+      "loss": 1.7073,
+      "step": 103
+    },
+    {
+      "epoch": 2.736842105263158,
+      "grad_norm": 20.45136260986328,
+      "learning_rate": 4.292682926829268e-05,
+      "loss": 0.8336,
+      "step": 104
+    },
+    {
+      "epoch": 2.763157894736842,
+      "grad_norm": 25.78130531311035,
+      "learning_rate": 4.2804878048780486e-05,
+      "loss": 0.9388,
+      "step": 105
+    },
+    {
+      "epoch": 2.7894736842105265,
+      "grad_norm": 44.98958206176758,
+      "learning_rate": 4.26829268292683e-05,
+      "loss": 1.0452,
+      "step": 106
+    },
+    {
+      "epoch": 2.8157894736842106,
+      "grad_norm": 25.93155288696289,
+      "learning_rate": 4.25609756097561e-05,
+      "loss": 1.7353,
+      "step": 107
+    },
+    {
+      "epoch": 2.8421052631578947,
+      "grad_norm": 39.97303771972656,
+      "learning_rate": 4.2439024390243905e-05,
+      "loss": 1.0937,
+      "step": 108
+    },
+    {
+      "epoch": 2.8684210526315788,
+      "grad_norm": 28.07468032836914,
+      "learning_rate": 4.231707317073171e-05,
+      "loss": 1.0547,
+      "step": 109
+    },
+    {
+      "epoch": 2.8947368421052633,
+      "grad_norm": 33.94812774658203,
+      "learning_rate": 4.2195121951219514e-05,
+      "loss": 1.4567,
+      "step": 110
+    },
+    {
+      "epoch": 2.9210526315789473,
+      "grad_norm": 19.667274475097656,
+      "learning_rate": 4.207317073170732e-05,
+      "loss": 0.9645,
+      "step": 111
+    },
+    {
+      "epoch": 2.9473684210526314,
+      "grad_norm": 22.64081382751465,
+      "learning_rate": 4.195121951219512e-05,
+      "loss": 1.0789,
+      "step": 112
+    },
+    {
+      "epoch": 2.973684210526316,
+      "grad_norm": 31.188119888305664,
+      "learning_rate": 4.1829268292682926e-05,
+      "loss": 1.1605,
+      "step": 113
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 22.26033592224121,
+      "learning_rate": 4.170731707317073e-05,
+      "loss": 1.0198,
+      "step": 114
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.31666666666666665,
+      "eval_f1_macro": 0.30134680134680136,
+      "eval_f1_micro": 0.31666666666666665,
+      "eval_f1_weighted": 0.3013468013468013,
+      "eval_loss": 1.318847417831421,
+      "eval_precision_macro": 0.306547619047619,
+      "eval_precision_micro": 0.31666666666666665,
+      "eval_precision_weighted": 0.30654761904761896,
+      "eval_recall_macro": 0.31666666666666665,
+      "eval_recall_micro": 0.31666666666666665,
+      "eval_recall_weighted": 0.31666666666666665,
+      "eval_runtime": 66.5323,
+      "eval_samples_per_second": 0.902,
+      "eval_steps_per_second": 0.03,
+      "step": 114
+    },
+    {
+      "epoch": 3.026315789473684,
+      "grad_norm": 22.01658821105957,
+      "learning_rate": 4.1585365853658535e-05,
+      "loss": 0.9237,
+      "step": 115
+    },
+    {
+      "epoch": 3.0526315789473686,
+      "grad_norm": 34.970157623291016,
+      "learning_rate": 4.146341463414634e-05,
+      "loss": 1.1732,
+      "step": 116
+    },
+    {
+      "epoch": 3.0789473684210527,
+      "grad_norm": 38.19720458984375,
+      "learning_rate": 4.134146341463414e-05,
+      "loss": 0.9921,
+      "step": 117
+    },
+    {
+      "epoch": 3.1052631578947367,
+      "grad_norm": 12.365692138671875,
+      "learning_rate": 4.1219512195121954e-05,
+      "loss": 0.7438,
+      "step": 118
+    },
+    {
+      "epoch": 3.1315789473684212,
+      "grad_norm": 11.447243690490723,
+      "learning_rate": 4.109756097560976e-05,
+      "loss": 0.8613,
+      "step": 119
+    },
+    {
+      "epoch": 3.1578947368421053,
+      "grad_norm": 23.869182586669922,
+      "learning_rate": 4.097560975609756e-05,
+      "loss": 0.9255,
+      "step": 120
+    },
+    {
+      "epoch": 3.1842105263157894,
+      "grad_norm": 23.761123657226562,
+      "learning_rate": 4.085365853658537e-05,
+      "loss": 1.0873,
+      "step": 121
+    },
+    {
+      "epoch": 3.2105263157894735,
+      "grad_norm": 49.23319625854492,
+      "learning_rate": 4.073170731707317e-05,
+      "loss": 1.2417,
+      "step": 122
+    },
+    {
+      "epoch": 3.236842105263158,
+      "grad_norm": 20.207958221435547,
+      "learning_rate": 4.060975609756098e-05,
+      "loss": 0.9844,
+      "step": 123
+    },
+    {
+      "epoch": 3.263157894736842,
+      "grad_norm": 27.667728424072266,
+      "learning_rate": 4.0487804878048786e-05,
+      "loss": 0.9163,
+      "step": 124
+    },
+    {
+      "epoch": 3.2894736842105265,
+      "grad_norm": 26.463153839111328,
+      "learning_rate": 4.036585365853659e-05,
+      "loss": 0.992,
+      "step": 125
+    },
+    {
+      "epoch": 3.3157894736842106,
+      "grad_norm": 30.33213996887207,
+      "learning_rate": 4.0243902439024395e-05,
+      "loss": 0.9267,
+      "step": 126
+    },
+    {
+      "epoch": 3.3421052631578947,
+      "grad_norm": 25.357423782348633,
+      "learning_rate": 4.01219512195122e-05,
+      "loss": 1.0649,
+      "step": 127
+    },
+    {
+      "epoch": 3.3684210526315788,
+      "grad_norm": 32.58332443237305,
+      "learning_rate": 4e-05,
+      "loss": 1.3273,
+      "step": 128
+    },
+    {
+      "epoch": 3.3947368421052633,
+      "grad_norm": 27.008098602294922,
+      "learning_rate": 3.987804878048781e-05,
+      "loss": 0.7744,
+      "step": 129
+    },
+    {
+      "epoch": 3.4210526315789473,
+      "grad_norm": 28.078779220581055,
+      "learning_rate": 3.975609756097561e-05,
+      "loss": 0.7122,
+      "step": 130
+    },
+    {
+      "epoch": 3.4473684210526314,
+      "grad_norm": 42.70326232910156,
+      "learning_rate": 3.9634146341463416e-05,
+      "loss": 1.0784,
+      "step": 131
+    },
+    {
+      "epoch": 3.473684210526316,
+      "grad_norm": 49.816429138183594,
+      "learning_rate": 3.951219512195122e-05,
+      "loss": 0.8521,
+      "step": 132
+    },
+    {
+      "epoch": 3.5,
+      "grad_norm": 49.62358474731445,
+      "learning_rate": 3.939024390243903e-05,
+      "loss": 1.0135,
+      "step": 133
+    },
+    {
+      "epoch": 3.526315789473684,
+      "grad_norm": 29.133995056152344,
+      "learning_rate": 3.9268292682926835e-05,
+      "loss": 0.9807,
+      "step": 134
+    },
+    {
+      "epoch": 3.5526315789473686,
+      "grad_norm": 38.17136001586914,
+      "learning_rate": 3.914634146341464e-05,
+      "loss": 1.0422,
+      "step": 135
+    },
+    {
+      "epoch": 3.5789473684210527,
+      "grad_norm": 37.97235870361328,
+      "learning_rate": 3.9024390243902444e-05,
+      "loss": 0.7642,
+      "step": 136
+    },
+    {
+      "epoch": 3.6052631578947367,
+      "grad_norm": 48.906150817871094,
+      "learning_rate": 3.890243902439025e-05,
+      "loss": 1.164,
+      "step": 137
+    },
+    {
+      "epoch": 3.6315789473684212,
+      "grad_norm": 29.808584213256836,
+      "learning_rate": 3.878048780487805e-05,
+      "loss": 0.9559,
+      "step": 138
+    },
+    {
+      "epoch": 3.6578947368421053,
+      "grad_norm": 34.88420867919922,
+      "learning_rate": 3.8658536585365857e-05,
+      "loss": 1.3472,
+      "step": 139
+    },
+    {
+      "epoch": 3.6842105263157894,
+      "grad_norm": 20.547361373901367,
+      "learning_rate": 3.853658536585366e-05,
+      "loss": 0.7452,
+      "step": 140
+    },
+    {
+      "epoch": 3.7105263157894735,
+      "grad_norm": 53.17014694213867,
+      "learning_rate": 3.8414634146341465e-05,
+      "loss": 1.1893,
+      "step": 141
+    },
+    {
+      "epoch": 3.736842105263158,
+      "grad_norm": 36.8974723815918,
+      "learning_rate": 3.829268292682927e-05,
+      "loss": 1.1465,
+      "step": 142
+    },
+    {
+      "epoch": 3.763157894736842,
+      "grad_norm": 79.12947082519531,
+      "learning_rate": 3.8170731707317073e-05,
+      "loss": 1.1274,
+      "step": 143
+    },
+    {
+      "epoch": 3.7894736842105265,
+      "grad_norm": 46.056846618652344,
+      "learning_rate": 3.804878048780488e-05,
+      "loss": 0.8157,
+      "step": 144
+    },
+    {
+      "epoch": 3.8157894736842106,
+      "grad_norm": 52.49138641357422,
+      "learning_rate": 3.792682926829268e-05,
+      "loss": 0.923,
+      "step": 145
+    },
+    {
+      "epoch": 3.8421052631578947,
+      "grad_norm": 142.27134704589844,
+      "learning_rate": 3.780487804878049e-05,
+      "loss": 1.2777,
+      "step": 146
+    },
+    {
+      "epoch": 3.8684210526315788,
+      "grad_norm": 172.19480895996094,
+      "learning_rate": 3.76829268292683e-05,
+      "loss": 1.0432,
+      "step": 147
+    },
+    {
+      "epoch": 3.8947368421052633,
+      "grad_norm": 123.80152130126953,
+      "learning_rate": 3.75609756097561e-05,
+      "loss": 0.7937,
+      "step": 148
+    },
+    {
+      "epoch": 3.9210526315789473,
+      "grad_norm": 77.49285125732422,
+      "learning_rate": 3.7439024390243906e-05,
+      "loss": 0.6857,
+      "step": 149
+    },
+    {
+      "epoch": 3.9473684210526314,
+      "grad_norm": 82.35449981689453,
+      "learning_rate": 3.731707317073171e-05,
+      "loss": 0.9595,
+      "step": 150
+    },
+    {
+      "epoch": 3.973684210526316,
+      "grad_norm": 38.3079833984375,
+      "learning_rate": 3.7195121951219514e-05,
+      "loss": 0.7892,
+      "step": 151
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 119.38838195800781,
+      "learning_rate": 3.707317073170732e-05,
+      "loss": 1.4254,
+      "step": 152
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.45,
+      "eval_f1_macro": 0.4606509735541993,
+      "eval_f1_micro": 0.45,
+      "eval_f1_weighted": 0.4606509735541993,
+      "eval_loss": 1.090307354927063,
+      "eval_precision_macro": 0.4900568181818182,
+      "eval_precision_micro": 0.45,
+      "eval_precision_weighted": 0.4900568181818182,
+      "eval_recall_macro": 0.44999999999999996,
+      "eval_recall_micro": 0.45,
+      "eval_recall_weighted": 0.45,
+      "eval_runtime": 69.1646,
+      "eval_samples_per_second": 0.867,
+      "eval_steps_per_second": 0.029,
+      "step": 152
+    },
+    {
+      "epoch": 4.026315789473684,
+      "grad_norm": 91.3956527709961,
+      "learning_rate": 3.695121951219512e-05,
+      "loss": 1.0411,
+      "step": 153
+    },
+    {
+      "epoch": 4.052631578947368,
+      "grad_norm": 57.57364273071289,
+      "learning_rate": 3.682926829268293e-05,
+      "loss": 0.7083,
+      "step": 154
+    },
+    {
+      "epoch": 4.078947368421052,
+      "grad_norm": 82.79764556884766,
+      "learning_rate": 3.670731707317073e-05,
+      "loss": 0.9112,
+      "step": 155
+    },
+    {
+      "epoch": 4.105263157894737,
+      "grad_norm": 47.4633674621582,
+      "learning_rate": 3.6585365853658535e-05,
+      "loss": 0.9273,
+      "step": 156
+    },
+    {
+      "epoch": 4.131578947368421,
+      "grad_norm": 59.94166564941406,
+      "learning_rate": 3.646341463414634e-05,
+      "loss": 0.8346,
+      "step": 157
+    },
+    {
+      "epoch": 4.157894736842105,
+      "grad_norm": 59.959259033203125,
+      "learning_rate": 3.634146341463415e-05,
+      "loss": 0.7768,
+      "step": 158
+    },
+    {
+      "epoch": 4.184210526315789,
+      "grad_norm": 38.368404388427734,
+      "learning_rate": 3.6219512195121955e-05,
+      "loss": 0.6442,
+      "step": 159
+    },
+    {
+      "epoch": 4.2105263157894735,
+      "grad_norm": 152.19378662109375,
+      "learning_rate": 3.609756097560976e-05,
+      "loss": 1.1892,
+      "step": 160
+    },
+    {
+      "epoch": 4.2368421052631575,
+      "grad_norm": 70.09424591064453,
+      "learning_rate": 3.597560975609756e-05,
+      "loss": 0.9863,
+      "step": 161
+    },
+    {
+      "epoch": 4.2631578947368425,
+      "grad_norm": 35.991065979003906,
+      "learning_rate": 3.585365853658537e-05,
+      "loss": 0.8468,
+      "step": 162
+    },
+    {
+      "epoch": 4.2894736842105265,
+      "grad_norm": 39.37136459350586,
+      "learning_rate": 3.573170731707317e-05,
+      "loss": 0.8338,
+      "step": 163
+    },
+    {
+      "epoch": 4.315789473684211,
+      "grad_norm": 103.584716796875,
+      "learning_rate": 3.5609756097560976e-05,
+      "loss": 1.0,
+      "step": 164
+    },
+    {
+      "epoch": 4.342105263157895,
+      "grad_norm": 38.11565399169922,
+      "learning_rate": 3.548780487804878e-05,
+      "loss": 0.671,
+      "step": 165
+    },
+    {
+      "epoch": 4.368421052631579,
+      "grad_norm": 45.16068649291992,
+      "learning_rate": 3.5365853658536584e-05,
+      "loss": 0.9203,
+      "step": 166
+    },
+    {
+      "epoch": 4.394736842105263,
+      "grad_norm": 123.4576416015625,
+      "learning_rate": 3.524390243902439e-05,
+      "loss": 0.9496,
+      "step": 167
+    },
+    {
+      "epoch": 4.421052631578947,
+      "grad_norm": 33.224815368652344,
+      "learning_rate": 3.512195121951219e-05,
+      "loss": 0.6605,
+      "step": 168
+    },
+    {
+      "epoch": 4.447368421052632,
+      "grad_norm": 42.245147705078125,
+      "learning_rate": 3.5e-05,
+      "loss": 0.733,
+      "step": 169
+    },
+    {
+      "epoch": 4.473684210526316,
+      "grad_norm": 35.62926483154297,
+      "learning_rate": 3.48780487804878e-05,
+      "loss": 0.9771,
+      "step": 170
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 36.670928955078125,
+      "learning_rate": 3.475609756097561e-05,
+      "loss": 0.5183,
+      "step": 171
+    },
+    {
+      "epoch": 4.526315789473684,
+      "grad_norm": 25.697141647338867,
+      "learning_rate": 3.4634146341463416e-05,
+      "loss": 0.8279,
+      "step": 172
+    },
+    {
+      "epoch": 4.552631578947368,
+      "grad_norm": 50.771846771240234,
+      "learning_rate": 3.451219512195122e-05,
+      "loss": 0.6015,
+      "step": 173
+    },
+    {
+      "epoch": 4.578947368421053,
+      "grad_norm": 60.814430236816406,
+      "learning_rate": 3.4390243902439025e-05,
+      "loss": 0.7535,
+      "step": 174
+    },
+    {
+      "epoch": 4.605263157894737,
+      "grad_norm": 29.89155387878418,
+      "learning_rate": 3.4268292682926836e-05,
+      "loss": 0.628,
+      "step": 175
+    },
+    {
+      "epoch": 4.631578947368421,
+      "grad_norm": 20.725378036499023,
+      "learning_rate": 3.414634146341464e-05,
+      "loss": 0.4225,
+      "step": 176
+    },
+    {
+      "epoch": 4.657894736842105,
+      "grad_norm": 26.378557205200195,
+      "learning_rate": 3.4024390243902444e-05,
+      "loss": 0.8201,
+      "step": 177
+    },
+    {
+      "epoch": 4.684210526315789,
+      "grad_norm": 51.66449737548828,
+      "learning_rate": 3.390243902439025e-05,
+      "loss": 0.7664,
+      "step": 178
+    },
+    {
+      "epoch": 4.7105263157894735,
+      "grad_norm": 47.04916000366211,
+      "learning_rate": 3.378048780487805e-05,
+      "loss": 0.9299,
+      "step": 179
+    },
+    {
+      "epoch": 4.7368421052631575,
+      "grad_norm": 81.7121353149414,
+      "learning_rate": 3.365853658536586e-05,
+      "loss": 0.8565,
+      "step": 180
+    },
+    {
+      "epoch": 4.7631578947368425,
+      "grad_norm": 56.56669998168945,
+      "learning_rate": 3.353658536585366e-05,
+      "loss": 0.7974,
+      "step": 181
+    },
+    {
+      "epoch": 4.7894736842105265,
+      "grad_norm": 44.86998748779297,
+      "learning_rate": 3.3414634146341465e-05,
+      "loss": 0.5811,
+      "step": 182
+    },
+    {
+      "epoch": 4.815789473684211,
+      "grad_norm": 27.705507278442383,
+      "learning_rate": 3.329268292682927e-05,
+      "loss": 0.6238,
+      "step": 183
+    },
+    {
+      "epoch": 4.842105263157895,
+      "grad_norm": 27.237136840820312,
+      "learning_rate": 3.3170731707317074e-05,
+      "loss": 0.6288,
+      "step": 184
+    },
+    {
+      "epoch": 4.868421052631579,
+      "grad_norm": 56.495155334472656,
+      "learning_rate": 3.304878048780488e-05,
+      "loss": 0.6755,
+      "step": 185
+    },
+    {
+      "epoch": 4.894736842105263,
+      "grad_norm": 51.76018524169922,
+      "learning_rate": 3.292682926829269e-05,
+      "loss": 0.6209,
+      "step": 186
+    },
+    {
+      "epoch": 4.921052631578947,
+      "grad_norm": 42.66984558105469,
+      "learning_rate": 3.280487804878049e-05,
+      "loss": 0.8452,
+      "step": 187
+    },
+    {
+      "epoch": 4.947368421052632,
+      "grad_norm": 98.05541229248047,
+      "learning_rate": 3.26829268292683e-05,
+      "loss": 0.8141,
+      "step": 188
+    },
+    {
+      "epoch": 4.973684210526316,
+      "grad_norm": 43.486305236816406,
+      "learning_rate": 3.25609756097561e-05,
+      "loss": 0.8128,
+      "step": 189
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 48.97419357299805,
+      "learning_rate": 3.2439024390243906e-05,
+      "loss": 0.6552,
+      "step": 190
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.55,
+      "eval_f1_macro": 0.539948914431673,
+      "eval_f1_micro": 0.55,
+      "eval_f1_weighted": 0.539948914431673,
+      "eval_loss": 0.9649682641029358,
+      "eval_precision_macro": 0.5645833333333333,
+      "eval_precision_micro": 0.55,
+      "eval_precision_weighted": 0.5645833333333333,
+      "eval_recall_macro": 0.55,
+      "eval_recall_micro": 0.55,
+      "eval_recall_weighted": 0.55,
+      "eval_runtime": 71.5473,
+      "eval_samples_per_second": 0.839,
+      "eval_steps_per_second": 0.028,
+      "step": 190
+    },
+    {
+      "epoch": 5.026315789473684,
+      "grad_norm": 47.699520111083984,
+      "learning_rate": 3.231707317073171e-05,
+      "loss": 0.5292,
+      "step": 191
+    },
+    {
+      "epoch": 5.052631578947368,
+      "grad_norm": 27.44911003112793,
+      "learning_rate": 3.2195121951219514e-05,
+      "loss": 0.405,
+      "step": 192
+    },
+    {
+      "epoch": 5.078947368421052,
+      "grad_norm": 35.62791442871094,
+      "learning_rate": 3.207317073170732e-05,
+      "loss": 0.3673,
+      "step": 193
+    },
+    {
+      "epoch": 5.105263157894737,
+      "grad_norm": 28.489892959594727,
+      "learning_rate": 3.195121951219512e-05,
+      "loss": 0.6755,
+      "step": 194
+    },
+    {
+      "epoch": 5.131578947368421,
+      "grad_norm": 136.23341369628906,
+      "learning_rate": 3.182926829268293e-05,
+      "loss": 0.774,
+      "step": 195
+    },
+    {
+      "epoch": 5.157894736842105,
+      "grad_norm": 32.27811813354492,
+      "learning_rate": 3.170731707317073e-05,
+      "loss": 0.4562,
+      "step": 196
+    },
+    {
+      "epoch": 5.184210526315789,
+      "grad_norm": 43.724117279052734,
+      "learning_rate": 3.1585365853658536e-05,
+      "loss": 0.582,
+      "step": 197
+    },
+    {
+      "epoch": 5.2105263157894735,
+      "grad_norm": 122.9839096069336,
+      "learning_rate": 3.146341463414634e-05,
+      "loss": 0.8316,
+      "step": 198
+    },
+    {
+      "epoch": 5.2368421052631575,
+      "grad_norm": 43.039146423339844,
+      "learning_rate": 3.134146341463415e-05,
+      "loss": 0.7792,
+      "step": 199
+    },
+    {
+      "epoch": 5.2631578947368425,
+      "grad_norm": 48.31818389892578,
+      "learning_rate": 3.1219512195121955e-05,
+      "loss": 0.3813,
+      "step": 200
+    },
+    {
+      "epoch": 5.2894736842105265,
+      "grad_norm": 27.5224666595459,
+      "learning_rate": 3.109756097560976e-05,
+      "loss": 0.4812,
+      "step": 201
+    },
+    {
+      "epoch": 5.315789473684211,
+      "grad_norm": 36.04642868041992,
+      "learning_rate": 3.0975609756097564e-05,
+      "loss": 0.6757,
+      "step": 202
+    },
+    {
+      "epoch": 5.342105263157895,
+      "grad_norm": 33.89840316772461,
+      "learning_rate": 3.085365853658537e-05,
+      "loss": 0.3825,
+      "step": 203
+    },
+    {
+      "epoch": 5.368421052631579,
+      "grad_norm": 85.91902923583984,
+      "learning_rate": 3.073170731707317e-05,
+      "loss": 0.7955,
+      "step": 204
+    },
+    {
+      "epoch": 5.394736842105263,
+      "grad_norm": 66.69794464111328,
+      "learning_rate": 3.0609756097560976e-05,
+      "loss": 0.5711,
+      "step": 205
+    },
+    {
+      "epoch": 5.421052631578947,
+      "grad_norm": 55.40018844604492,
+      "learning_rate": 3.048780487804878e-05,
+      "loss": 0.5853,
+      "step": 206
+    },
+    {
+      "epoch": 5.447368421052632,
+      "grad_norm": 30.135583877563477,
+      "learning_rate": 3.0365853658536585e-05,
+      "loss": 0.582,
+      "step": 207
+    },
+    {
+      "epoch": 5.473684210526316,
+      "grad_norm": 31.25365447998047,
+      "learning_rate": 3.0243902439024392e-05,
+      "loss": 0.9509,
+      "step": 208
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 22.51866340637207,
+      "learning_rate": 3.0121951219512197e-05,
+      "loss": 0.273,
+      "step": 209
+    },
+    {
+      "epoch": 5.526315789473684,
+      "grad_norm": 103.33230590820312,
+      "learning_rate": 3e-05,
+      "loss": 0.5085,
+      "step": 210
+    },
+    {
+      "epoch": 5.552631578947368,
+      "grad_norm": 49.145145416259766,
+      "learning_rate": 2.9878048780487805e-05,
+      "loss": 0.3109,
+      "step": 211
+    },
+    {
+      "epoch": 5.578947368421053,
+      "grad_norm": 42.34318923950195,
+      "learning_rate": 2.975609756097561e-05,
+      "loss": 0.5871,
+      "step": 212
+    },
+    {
+      "epoch": 5.605263157894737,
+      "grad_norm": 42.719451904296875,
+      "learning_rate": 2.9634146341463413e-05,
+      "loss": 0.5125,
+      "step": 213
+    },
+    {
+      "epoch": 5.631578947368421,
+      "grad_norm": 52.85165786743164,
+      "learning_rate": 2.951219512195122e-05,
+      "loss": 0.8334,
+      "step": 214
+    },
+    {
+      "epoch": 5.657894736842105,
+      "grad_norm": 103.75800323486328,
+      "learning_rate": 2.9390243902439025e-05,
+      "loss": 0.5251,
+      "step": 215
+    },
+    {
+      "epoch": 5.684210526315789,
+      "grad_norm": 378.4760437011719,
+      "learning_rate": 2.926829268292683e-05,
+      "loss": 1.1182,
+      "step": 216
+    },
+    {
+      "epoch": 5.7105263157894735,
+      "grad_norm": 25.041152954101562,
+      "learning_rate": 2.9146341463414634e-05,
+      "loss": 0.5151,
+      "step": 217
+    },
+    {
+      "epoch": 5.7368421052631575,
+      "grad_norm": 104.20264434814453,
+      "learning_rate": 2.9024390243902438e-05,
+      "loss": 0.8081,
+      "step": 218
+    },
+    {
+      "epoch": 5.7631578947368425,
+      "grad_norm": 71.08457946777344,
+      "learning_rate": 2.8902439024390242e-05,
+      "loss": 0.993,
+      "step": 219
+    },
+    {
+      "epoch": 5.7894736842105265,
+      "grad_norm": 64.38380432128906,
+      "learning_rate": 2.8780487804878046e-05,
+      "loss": 0.8127,
+      "step": 220
+    },
+    {
+      "epoch": 5.815789473684211,
+      "grad_norm": 28.343429565429688,
+      "learning_rate": 2.8658536585365854e-05,
+      "loss": 0.4542,
+      "step": 221
+    },
+    {
+      "epoch": 5.842105263157895,
+      "grad_norm": 46.55160140991211,
+      "learning_rate": 2.8536585365853658e-05,
+      "loss": 0.5087,
+      "step": 222
+    },
+    {
+      "epoch": 5.868421052631579,
+      "grad_norm": 111.2691879272461,
+      "learning_rate": 2.8414634146341462e-05,
+      "loss": 0.4867,
+      "step": 223
+    },
+    {
+      "epoch": 5.894736842105263,
+      "grad_norm": 76.03248596191406,
+      "learning_rate": 2.8292682926829267e-05,
+      "loss": 0.9315,
+      "step": 224
+    },
+    {
+      "epoch": 5.921052631578947,
+      "grad_norm": 58.543758392333984,
+      "learning_rate": 2.817073170731707e-05,
+      "loss": 0.398,
+      "step": 225
+    },
+    {
+      "epoch": 5.947368421052632,
+      "grad_norm": 69.87763214111328,
+      "learning_rate": 2.8048780487804882e-05,
+      "loss": 1.0981,
+      "step": 226
+    },
+    {
+      "epoch": 5.973684210526316,
+      "grad_norm": 107.25030517578125,
+      "learning_rate": 2.7926829268292686e-05,
+      "loss": 0.7306,
+      "step": 227
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 104.7223129272461,
+      "learning_rate": 2.780487804878049e-05,
+      "loss": 0.4531,
+      "step": 228
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.6333333333333333,
+      "eval_f1_macro": 0.5696302606408418,
+      "eval_f1_micro": 0.6333333333333333,
+      "eval_f1_weighted": 0.5696302606408418,
+      "eval_loss": 1.0229661464691162,
+      "eval_precision_macro": 0.6182503770739065,
+      "eval_precision_micro": 0.6333333333333333,
+      "eval_precision_weighted": 0.6182503770739065,
+      "eval_recall_macro": 0.6333333333333334,
+      "eval_recall_micro": 0.6333333333333333,
+      "eval_recall_weighted": 0.6333333333333333,
+      "eval_runtime": 69.4357,
+      "eval_samples_per_second": 0.864,
+      "eval_steps_per_second": 0.029,
+      "step": 228
+    },
+    {
+      "epoch": 6.026315789473684,
+      "grad_norm": 49.86199951171875,
+      "learning_rate": 2.7682926829268298e-05,
+      "loss": 0.4233,
+      "step": 229
+    },
+    {
+      "epoch": 6.052631578947368,
+      "grad_norm": 47.08704376220703,
+      "learning_rate": 2.7560975609756102e-05,
+      "loss": 0.514,
+      "step": 230
+    },
+    {
+      "epoch": 6.078947368421052,
+      "grad_norm": 80.2113037109375,
+      "learning_rate": 2.7439024390243906e-05,
+      "loss": 0.4019,
+      "step": 231
+    },
+    {
+      "epoch": 6.105263157894737,
+      "grad_norm": 23.823795318603516,
+      "learning_rate": 2.731707317073171e-05,
+      "loss": 0.2943,
+      "step": 232
+    },
+    {
+      "epoch": 6.131578947368421,
+      "grad_norm": 299.1929016113281,
+      "learning_rate": 2.7195121951219515e-05,
+      "loss": 0.3543,
+      "step": 233
+    },
+    {
+      "epoch": 6.157894736842105,
+      "grad_norm": 73.6047592163086,
+      "learning_rate": 2.707317073170732e-05,
+      "loss": 0.4757,
+      "step": 234
+    },
+    {
+      "epoch": 6.184210526315789,
+      "grad_norm": 30.344865798950195,
+      "learning_rate": 2.6951219512195123e-05,
+      "loss": 0.3526,
+      "step": 235
+    },
+    {
+      "epoch": 6.2105263157894735,
+      "grad_norm": 98.406494140625,
+      "learning_rate": 2.682926829268293e-05,
+      "loss": 0.8068,
+      "step": 236
+    },
+    {
+      "epoch": 6.2368421052631575,
+      "grad_norm": 95.38936614990234,
+      "learning_rate": 2.6707317073170735e-05,
+      "loss": 0.2938,
+      "step": 237
+    },
+    {
+      "epoch": 6.2631578947368425,
+      "grad_norm": 90.7430648803711,
+      "learning_rate": 2.658536585365854e-05,
+      "loss": 0.4397,
+      "step": 238
+    },
+    {
+      "epoch": 6.2894736842105265,
+      "grad_norm": 64.78533172607422,
+      "learning_rate": 2.6463414634146344e-05,
+      "loss": 0.4477,
+      "step": 239
+    },
+    {
+      "epoch": 6.315789473684211,
+      "grad_norm": 20.26727294921875,
+      "learning_rate": 2.6341463414634148e-05,
+      "loss": 0.2176,
+      "step": 240
+    },
+    {
+      "epoch": 6.342105263157895,
+      "grad_norm": 39.570987701416016,
+      "learning_rate": 2.6219512195121952e-05,
+      "loss": 0.4976,
+      "step": 241
+    },
+    {
+      "epoch": 6.368421052631579,
+      "grad_norm": 56.996559143066406,
+      "learning_rate": 2.609756097560976e-05,
+      "loss": 0.6441,
+      "step": 242
+    },
+    {
+      "epoch": 6.394736842105263,
+      "grad_norm": 172.96588134765625,
+      "learning_rate": 2.5975609756097564e-05,
+      "loss": 0.3586,
+      "step": 243
+    },
+    {
+      "epoch": 6.421052631578947,
+      "grad_norm": 39.748512268066406,
+      "learning_rate": 2.5853658536585368e-05,
+      "loss": 0.4,
+      "step": 244
+    },
+    {
+      "epoch": 6.447368421052632,
+      "grad_norm": 47.228515625,
+      "learning_rate": 2.5731707317073172e-05,
+      "loss": 0.5478,
+      "step": 245
+    },
+    {
+      "epoch": 6.473684210526316,
+      "grad_norm": 83.61058807373047,
+      "learning_rate": 2.5609756097560977e-05,
+      "loss": 0.6447,
+      "step": 246
+    },
+    {
+      "epoch": 6.5,
+      "grad_norm": 96.23981475830078,
+      "learning_rate": 2.548780487804878e-05,
+      "loss": 0.6428,
+      "step": 247
+    },
+    {
+      "epoch": 6.526315789473684,
+      "grad_norm": 91.69573974609375,
+      "learning_rate": 2.536585365853659e-05,
+      "loss": 0.5696,
+      "step": 248
+    },
+    {
+      "epoch": 6.552631578947368,
+      "grad_norm": 80.09562683105469,
+      "learning_rate": 2.5243902439024393e-05,
+      "loss": 0.5676,
+      "step": 249
+    },
+    {
+      "epoch": 6.578947368421053,
+      "grad_norm": 71.28556060791016,
+      "learning_rate": 2.5121951219512197e-05,
+      "loss": 0.5665,
+      "step": 250
+    },
+    {
+      "epoch": 6.605263157894737,
+      "grad_norm": 159.1014862060547,
+      "learning_rate": 2.5e-05,
+      "loss": 0.6344,
+      "step": 251
+    },
+    {
+      "epoch": 6.631578947368421,
+      "grad_norm": 101.53102111816406,
+      "learning_rate": 2.4878048780487805e-05,
+      "loss": 0.52,
+      "step": 252
+    },
+    {
+      "epoch": 6.657894736842105,
+      "grad_norm": 80.52534484863281,
+      "learning_rate": 2.475609756097561e-05,
+      "loss": 0.4053,
+      "step": 253
+    },
+    {
+      "epoch": 6.684210526315789,
+      "grad_norm": 66.0854263305664,
+      "learning_rate": 2.4634146341463414e-05,
+      "loss": 0.4303,
+      "step": 254
+    },
+    {
+      "epoch": 6.7105263157894735,
+      "grad_norm": 97.85746002197266,
+      "learning_rate": 2.451219512195122e-05,
+      "loss": 0.3143,
+      "step": 255
+    },
+    {
+      "epoch": 6.7368421052631575,
+      "grad_norm": 57.940155029296875,
+      "learning_rate": 2.4390243902439026e-05,
+      "loss": 0.5949,
+      "step": 256
+    },
+    {
+      "epoch": 6.7631578947368425,
+      "grad_norm": 40.67607879638672,
+      "learning_rate": 2.426829268292683e-05,
+      "loss": 0.4334,
+      "step": 257
+    },
+    {
+      "epoch": 6.7894736842105265,
+      "grad_norm": 133.9537811279297,
+      "learning_rate": 2.4146341463414634e-05,
+      "loss": 0.3846,
+      "step": 258
+    },
+    {
+      "epoch": 6.815789473684211,
+      "grad_norm": 83.99858856201172,
+      "learning_rate": 2.402439024390244e-05,
+      "loss": 0.3965,
+      "step": 259
+    },
+    {
+      "epoch": 6.842105263157895,
+      "grad_norm": 98.94632720947266,
+      "learning_rate": 2.3902439024390243e-05,
+      "loss": 0.6727,
+      "step": 260
+    },
+    {
+      "epoch": 6.868421052631579,
+      "grad_norm": 121.6048812866211,
+      "learning_rate": 2.378048780487805e-05,
+      "loss": 0.6255,
+      "step": 261
+    },
+    {
+      "epoch": 6.894736842105263,
+      "grad_norm": 41.528053283691406,
+      "learning_rate": 2.3658536585365854e-05,
+      "loss": 0.5627,
+      "step": 262
+    },
+    {
+      "epoch": 6.921052631578947,
+      "grad_norm": 95.7408447265625,
+      "learning_rate": 2.353658536585366e-05,
+      "loss": 0.487,
+      "step": 263
+    },
+    {
+      "epoch": 6.947368421052632,
+      "grad_norm": 48.754642486572266,
+      "learning_rate": 2.3414634146341466e-05,
+      "loss": 0.2175,
+      "step": 264
+    },
+    {
+      "epoch": 6.973684210526316,
+      "grad_norm": 39.36869430541992,
+      "learning_rate": 2.329268292682927e-05,
+      "loss": 0.3606,
+      "step": 265
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 116.08528900146484,
+      "learning_rate": 2.3170731707317075e-05,
+      "loss": 0.4935,
+      "step": 266
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.6333333333333333,
+      "eval_f1_macro": 0.6192316540142626,
+      "eval_f1_micro": 0.6333333333333333,
+      "eval_f1_weighted": 0.6192316540142627,
+      "eval_loss": 0.8341596126556396,
+      "eval_precision_macro": 0.6416666666666667,
+      "eval_precision_micro": 0.6333333333333333,
+      "eval_precision_weighted": 0.6416666666666667,
+      "eval_recall_macro": 0.6333333333333333,
+      "eval_recall_micro": 0.6333333333333333,
+      "eval_recall_weighted": 0.6333333333333333,
+      "eval_runtime": 67.0822,
+      "eval_samples_per_second": 0.894,
+      "eval_steps_per_second": 0.03,
+      "step": 266
+    },
+    {
+      "epoch": 7.026315789473684,
+      "grad_norm": 33.8989143371582,
+      "learning_rate": 2.304878048780488e-05,
+      "loss": 0.3444,
+      "step": 267
+    },
+    {
+      "epoch": 7.052631578947368,
+      "grad_norm": 37.1051139831543,
+      "learning_rate": 2.2926829268292687e-05,
+      "loss": 0.4544,
+      "step": 268
+    },
+    {
+      "epoch": 7.078947368421052,
+      "grad_norm": 65.14508056640625,
+      "learning_rate": 2.280487804878049e-05,
+      "loss": 0.6778,
+      "step": 269
+    },
+    {
+      "epoch": 7.105263157894737,
+      "grad_norm": 64.3533706665039,
+      "learning_rate": 2.2682926829268295e-05,
+      "loss": 0.4157,
+      "step": 270
+    },
+    {
+      "epoch": 7.131578947368421,
+      "grad_norm": 26.499357223510742,
+      "learning_rate": 2.25609756097561e-05,
+      "loss": 0.243,
+      "step": 271
+    },
+    {
+      "epoch": 7.157894736842105,
+      "grad_norm": 48.940486907958984,
+      "learning_rate": 2.2439024390243904e-05,
+      "loss": 0.3446,
+      "step": 272
+    },
+    {
+      "epoch": 7.184210526315789,
+      "grad_norm": 31.625215530395508,
+      "learning_rate": 2.2317073170731708e-05,
+      "loss": 0.2034,
+      "step": 273
+    },
+    {
+      "epoch": 7.2105263157894735,
+      "grad_norm": 25.036733627319336,
+      "learning_rate": 2.2195121951219512e-05,
+      "loss": 0.1459,
+      "step": 274
+    },
+    {
+      "epoch": 7.2368421052631575,
+      "grad_norm": 81.56002807617188,
+      "learning_rate": 2.207317073170732e-05,
+      "loss": 0.4705,
+      "step": 275
+    },
+    {
+      "epoch": 7.2631578947368425,
+      "grad_norm": 97.85039520263672,
+      "learning_rate": 2.1951219512195124e-05,
+      "loss": 0.3849,
+      "step": 276
+    },
+    {
+      "epoch": 7.2894736842105265,
+      "grad_norm": 92.32601165771484,
+      "learning_rate": 2.1829268292682928e-05,
+      "loss": 0.4016,
+      "step": 277
+    },
+    {
+      "epoch": 7.315789473684211,
+      "grad_norm": 52.31543731689453,
+      "learning_rate": 2.1707317073170732e-05,
+      "loss": 0.2492,
+      "step": 278
+    },
+    {
+      "epoch": 7.342105263157895,
+      "grad_norm": 103.44355010986328,
+      "learning_rate": 2.1585365853658537e-05,
+      "loss": 0.4627,
+      "step": 279
+    },
+    {
+      "epoch": 7.368421052631579,
+      "grad_norm": 124.98883056640625,
+      "learning_rate": 2.146341463414634e-05,
+      "loss": 0.4096,
+      "step": 280
+    },
+    {
+      "epoch": 7.394736842105263,
+      "grad_norm": 58.3911018371582,
+      "learning_rate": 2.134146341463415e-05,
+      "loss": 0.1543,
+      "step": 281
+    },
+    {
+      "epoch": 7.421052631578947,
+      "grad_norm": 65.31929016113281,
+      "learning_rate": 2.1219512195121953e-05,
+      "loss": 0.5474,
+      "step": 282
+    },
+    {
+      "epoch": 7.447368421052632,
+      "grad_norm": 22.333057403564453,
+      "learning_rate": 2.1097560975609757e-05,
+      "loss": 0.2357,
+      "step": 283
+    },
+    {
+      "epoch": 7.473684210526316,
+      "grad_norm": 43.33408737182617,
+      "learning_rate": 2.097560975609756e-05,
+      "loss": 0.1541,
+      "step": 284
+    },
+    {
+      "epoch": 7.5,
+      "grad_norm": 107.57859802246094,
+      "learning_rate": 2.0853658536585365e-05,
+      "loss": 0.239,
+      "step": 285
+    },
+    {
+      "epoch": 7.526315789473684,
+      "grad_norm": 69.31283569335938,
+      "learning_rate": 2.073170731707317e-05,
+      "loss": 0.5448,
+      "step": 286
+    },
+    {
+      "epoch": 7.552631578947368,
+      "grad_norm": 102.60783386230469,
+      "learning_rate": 2.0609756097560977e-05,
+      "loss": 0.3203,
+      "step": 287
+    },
+    {
+      "epoch": 7.578947368421053,
+      "grad_norm": 112.20484924316406,
+      "learning_rate": 2.048780487804878e-05,
+      "loss": 0.4882,
+      "step": 288
+    },
+    {
+      "epoch": 7.605263157894737,
+      "grad_norm": 69.58258819580078,
+      "learning_rate": 2.0365853658536586e-05,
+      "loss": 0.8047,
+      "step": 289
+    },
+    {
+      "epoch": 7.631578947368421,
+      "grad_norm": 37.90599822998047,
+      "learning_rate": 2.0243902439024393e-05,
+      "loss": 0.1967,
+      "step": 290
+    },
+    {
+      "epoch": 7.657894736842105,
+      "grad_norm": 40.24911880493164,
+      "learning_rate": 2.0121951219512197e-05,
+      "loss": 0.4404,
+      "step": 291
+    },
+    {
+      "epoch": 7.684210526315789,
+      "grad_norm": 70.96076202392578,
+      "learning_rate": 2e-05,
+      "loss": 0.2328,
+      "step": 292
+    },
+    {
+      "epoch": 7.7105263157894735,
+      "grad_norm": 60.926185607910156,
+      "learning_rate": 1.9878048780487806e-05,
+      "loss": 0.4773,
+      "step": 293
+    },
+    {
+      "epoch": 7.7368421052631575,
+      "grad_norm": 25.279253005981445,
+      "learning_rate": 1.975609756097561e-05,
+      "loss": 0.3064,
+      "step": 294
+    },
+    {
+      "epoch": 7.7631578947368425,
+      "grad_norm": 188.24697875976562,
+      "learning_rate": 1.9634146341463418e-05,
+      "loss": 0.3841,
+      "step": 295
+    },
+    {
+      "epoch": 7.7894736842105265,
+      "grad_norm": 78.81002807617188,
+      "learning_rate": 1.9512195121951222e-05,
+      "loss": 0.243,
+      "step": 296
+    },
+    {
+      "epoch": 7.815789473684211,
+      "grad_norm": 50.173526763916016,
+      "learning_rate": 1.9390243902439026e-05,
+      "loss": 0.4069,
+      "step": 297
+    },
+    {
+      "epoch": 7.842105263157895,
+      "grad_norm": 16.644210815429688,
+      "learning_rate": 1.926829268292683e-05,
+      "loss": 0.1207,
+      "step": 298
+    },
+    {
+      "epoch": 7.868421052631579,
+      "grad_norm": 318.4234924316406,
+      "learning_rate": 1.9146341463414635e-05,
+      "loss": 0.7463,
+      "step": 299
+    },
+    {
+      "epoch": 7.894736842105263,
+      "grad_norm": 61.193946838378906,
+      "learning_rate": 1.902439024390244e-05,
+      "loss": 0.4838,
+      "step": 300
+    },
+    {
+      "epoch": 7.921052631578947,
+      "grad_norm": 42.694698333740234,
+      "learning_rate": 1.8902439024390246e-05,
+      "loss": 0.2745,
+      "step": 301
+    },
+    {
+      "epoch": 7.947368421052632,
+      "grad_norm": 97.97174835205078,
+      "learning_rate": 1.878048780487805e-05,
+      "loss": 0.6297,
+      "step": 302
+    },
+    {
+      "epoch": 7.973684210526316,
+      "grad_norm": 98.98281860351562,
+      "learning_rate": 1.8658536585365855e-05,
+      "loss": 0.5306,
+      "step": 303
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 82.52599334716797,
+      "learning_rate": 1.853658536585366e-05,
+      "loss": 0.0833,
+      "step": 304
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.6333333333333333,
+      "eval_f1_macro": 0.6299972429004687,
+      "eval_f1_micro": 0.6333333333333333,
+      "eval_f1_weighted": 0.6299972429004687,
+      "eval_loss": 0.8269728422164917,
+      "eval_precision_macro": 0.6510416666666666,
+      "eval_precision_micro": 0.6333333333333333,
+      "eval_precision_weighted": 0.6510416666666666,
+      "eval_recall_macro": 0.6333333333333333,
+      "eval_recall_micro": 0.6333333333333333,
+      "eval_recall_weighted": 0.6333333333333333,
+      "eval_runtime": 71.9256,
+      "eval_samples_per_second": 0.834,
+      "eval_steps_per_second": 0.028,
+      "step": 304
+    },
+    {
+      "epoch": 8.026315789473685,
+      "grad_norm": 21.338314056396484,
+      "learning_rate": 1.8414634146341463e-05,
+      "loss": 0.1345,
+      "step": 305
+    },
+    {
+      "epoch": 8.052631578947368,
+      "grad_norm": 46.4133186340332,
+      "learning_rate": 1.8292682926829268e-05,
+      "loss": 0.225,
+      "step": 306
+    },
+    {
+      "epoch": 8.078947368421053,
+      "grad_norm": 65.10118865966797,
+      "learning_rate": 1.8170731707317075e-05,
+      "loss": 0.2522,
+      "step": 307
+    },
+    {
+      "epoch": 8.105263157894736,
+      "grad_norm": 55.527183532714844,
+      "learning_rate": 1.804878048780488e-05,
+      "loss": 0.3192,
+      "step": 308
+    },
+    {
+      "epoch": 8.131578947368421,
+      "grad_norm": 185.77838134765625,
+      "learning_rate": 1.7926829268292684e-05,
+      "loss": 0.3924,
+      "step": 309
+    },
+    {
+      "epoch": 8.157894736842104,
+      "grad_norm": 37.39326095581055,
+      "learning_rate": 1.7804878048780488e-05,
+      "loss": 0.3674,
+      "step": 310
+    },
+    {
+      "epoch": 8.18421052631579,
+      "grad_norm": 120.3980941772461,
+      "learning_rate": 1.7682926829268292e-05,
+      "loss": 0.3816,
+      "step": 311
+    },
+    {
+      "epoch": 8.210526315789474,
+      "grad_norm": 114.17828369140625,
+      "learning_rate": 1.7560975609756096e-05,
+      "loss": 0.5185,
+      "step": 312
+    },
+    {
+      "epoch": 8.236842105263158,
+      "grad_norm": 216.08956909179688,
+      "learning_rate": 1.74390243902439e-05,
+      "loss": 0.3604,
+      "step": 313
+    },
+    {
+      "epoch": 8.263157894736842,
+      "grad_norm": 83.65510559082031,
+      "learning_rate": 1.7317073170731708e-05,
+      "loss": 0.2593,
+      "step": 314
+    },
+    {
+      "epoch": 8.289473684210526,
+      "grad_norm": 49.834617614746094,
+      "learning_rate": 1.7195121951219512e-05,
+      "loss": 0.2843,
+      "step": 315
+    },
+    {
+      "epoch": 8.31578947368421,
+      "grad_norm": 130.27406311035156,
+      "learning_rate": 1.707317073170732e-05,
+      "loss": 0.4344,
+      "step": 316
+    },
+    {
+      "epoch": 8.342105263157896,
+      "grad_norm": 218.7544403076172,
+      "learning_rate": 1.6951219512195124e-05,
+      "loss": 0.7271,
+      "step": 317
+    },
+    {
+      "epoch": 8.368421052631579,
+      "grad_norm": 54.966861724853516,
+      "learning_rate": 1.682926829268293e-05,
+      "loss": 0.2361,
+      "step": 318
+    },
+    {
+      "epoch": 8.394736842105264,
+      "grad_norm": 33.67280197143555,
+      "learning_rate": 1.6707317073170733e-05,
+      "loss": 0.2209,
+      "step": 319
+    },
+    {
+      "epoch": 8.421052631578947,
+      "grad_norm": 50.271949768066406,
+      "learning_rate": 1.6585365853658537e-05,
+      "loss": 0.3898,
+      "step": 320
+    },
+    {
+      "epoch": 8.447368421052632,
+      "grad_norm": 43.207115173339844,
+      "learning_rate": 1.6463414634146345e-05,
+      "loss": 0.3961,
+      "step": 321
+    },
+    {
+      "epoch": 8.473684210526315,
+      "grad_norm": 33.84910583496094,
+      "learning_rate": 1.634146341463415e-05,
+      "loss": 0.146,
+      "step": 322
+    },
+    {
+      "epoch": 8.5,
+      "grad_norm": 22.738853454589844,
+      "learning_rate": 1.6219512195121953e-05,
+      "loss": 0.1157,
+      "step": 323
+    },
+    {
+      "epoch": 8.526315789473685,
+      "grad_norm": 25.66840362548828,
+      "learning_rate": 1.6097560975609757e-05,
+      "loss": 0.218,
+      "step": 324
+    },
+    {
+      "epoch": 8.552631578947368,
+      "grad_norm": 7.81503438949585,
+      "learning_rate": 1.597560975609756e-05,
+      "loss": 0.0832,
+      "step": 325
+    },
+    {
+      "epoch": 8.578947368421053,
+      "grad_norm": 40.258338928222656,
+      "learning_rate": 1.5853658536585366e-05,
+      "loss": 0.1769,
+      "step": 326
+    },
+    {
+      "epoch": 8.605263157894736,
+      "grad_norm": 70.59412384033203,
+      "learning_rate": 1.573170731707317e-05,
+      "loss": 0.4184,
+      "step": 327
+    },
+    {
+      "epoch": 8.631578947368421,
+      "grad_norm": 52.296180725097656,
+      "learning_rate": 1.5609756097560978e-05,
+      "loss": 0.215,
+      "step": 328
+    },
+    {
+      "epoch": 8.657894736842106,
+      "grad_norm": 100.57954406738281,
+      "learning_rate": 1.5487804878048782e-05,
+      "loss": 0.1613,
+      "step": 329
+    },
+    {
+      "epoch": 8.68421052631579,
+      "grad_norm": 41.98234558105469,
+      "learning_rate": 1.5365853658536586e-05,
+      "loss": 0.3339,
+      "step": 330
+    },
+    {
+      "epoch": 8.710526315789474,
+      "grad_norm": 43.48909378051758,
+      "learning_rate": 1.524390243902439e-05,
+      "loss": 0.2052,
+      "step": 331
+    },
+    {
+      "epoch": 8.736842105263158,
+      "grad_norm": 108.465087890625,
+      "learning_rate": 1.5121951219512196e-05,
+      "loss": 0.2712,
+      "step": 332
+    },
+    {
+      "epoch": 8.763157894736842,
+      "grad_norm": 35.05289840698242,
+      "learning_rate": 1.5e-05,
+      "loss": 0.0903,
+      "step": 333
+    },
+    {
+      "epoch": 8.789473684210526,
+      "grad_norm": 21.532867431640625,
+      "learning_rate": 1.4878048780487805e-05,
+      "loss": 0.1428,
+      "step": 334
+    },
+    {
+      "epoch": 8.81578947368421,
+      "grad_norm": 62.48358154296875,
+      "learning_rate": 1.475609756097561e-05,
+      "loss": 0.242,
+      "step": 335
+    },
+    {
+      "epoch": 8.842105263157894,
+      "grad_norm": 33.35801696777344,
+      "learning_rate": 1.4634146341463415e-05,
+      "loss": 0.3042,
+      "step": 336
+    },
+    {
+      "epoch": 8.868421052631579,
+      "grad_norm": 38.024749755859375,
+      "learning_rate": 1.4512195121951219e-05,
+      "loss": 0.2156,
+      "step": 337
+    },
+    {
+      "epoch": 8.894736842105264,
+      "grad_norm": 34.33394241333008,
+      "learning_rate": 1.4390243902439023e-05,
+      "loss": 0.1868,
+      "step": 338
+    },
+    {
+      "epoch": 8.921052631578947,
+      "grad_norm": 68.10643768310547,
+      "learning_rate": 1.4268292682926829e-05,
+      "loss": 0.3916,
+      "step": 339
+    },
+    {
+      "epoch": 8.947368421052632,
+      "grad_norm": 108.21729278564453,
+      "learning_rate": 1.4146341463414633e-05,
+      "loss": 0.3538,
+      "step": 340
+    },
+    {
+      "epoch": 8.973684210526315,
+      "grad_norm": 39.021934509277344,
+      "learning_rate": 1.4024390243902441e-05,
+      "loss": 0.2256,
+      "step": 341
+    },
+    {
+      "epoch": 9.0,
+      "grad_norm": 62.53577423095703,
+      "learning_rate": 1.3902439024390245e-05,
+      "loss": 0.1077,
+      "step": 342
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.65,
+      "eval_f1_macro": 0.6464173059000645,
+      "eval_f1_micro": 0.65,
+      "eval_f1_weighted": 0.6464173059000645,
+      "eval_loss": 0.7723664045333862,
+      "eval_precision_macro": 0.6687062937062936,
+      "eval_precision_micro": 0.65,
+      "eval_precision_weighted": 0.6687062937062938,
+      "eval_recall_macro": 0.65,
+      "eval_recall_micro": 0.65,
+      "eval_recall_weighted": 0.65,
+      "eval_runtime": 81.5249,
+      "eval_samples_per_second": 0.736,
+      "eval_steps_per_second": 0.025,
+      "step": 342
+    },
+    {
+      "epoch": 9.026315789473685,
+      "grad_norm": 44.54621124267578,
+      "learning_rate": 1.3780487804878051e-05,
+      "loss": 0.1634,
+      "step": 343
+    },
+    {
+      "epoch": 9.052631578947368,
+      "grad_norm": 51.46065139770508,
+      "learning_rate": 1.3658536585365855e-05,
+      "loss": 0.118,
+      "step": 344
+    },
+    {
+      "epoch": 9.078947368421053,
+      "grad_norm": 46.26784133911133,
+      "learning_rate": 1.353658536585366e-05,
+      "loss": 0.2486,
+      "step": 345
+    },
+    {
+      "epoch": 9.105263157894736,
+      "grad_norm": 43.58497619628906,
+      "learning_rate": 1.3414634146341466e-05,
+      "loss": 0.2445,
+      "step": 346
+    },
+    {
+      "epoch": 9.131578947368421,
+      "grad_norm": 40.656192779541016,
+      "learning_rate": 1.329268292682927e-05,
+      "loss": 0.1004,
+      "step": 347
+    },
+    {
+      "epoch": 9.157894736842104,
+      "grad_norm": 55.23797607421875,
+      "learning_rate": 1.3170731707317074e-05,
+      "loss": 0.1722,
+      "step": 348
+    },
+    {
+      "epoch": 9.18421052631579,
+      "grad_norm": 52.0846061706543,
+      "learning_rate": 1.304878048780488e-05,
+      "loss": 0.1975,
+      "step": 349
+    },
+    {
+      "epoch": 9.210526315789474,
+      "grad_norm": 38.67387390136719,
+      "learning_rate": 1.2926829268292684e-05,
+      "loss": 0.1192,
+      "step": 350
+    },
+    {
+      "epoch": 9.236842105263158,
+      "grad_norm": 29.285099029541016,
+      "learning_rate": 1.2804878048780488e-05,
+      "loss": 0.1389,
+      "step": 351
+    },
+    {
+      "epoch": 9.263157894736842,
+      "grad_norm": 22.35875129699707,
+      "learning_rate": 1.2682926829268294e-05,
+      "loss": 0.0819,
+      "step": 352
+    },
+    {
+      "epoch": 9.289473684210526,
+      "grad_norm": 12.087577819824219,
+      "learning_rate": 1.2560975609756098e-05,
+      "loss": 0.0583,
+      "step": 353
+    },
+    {
+      "epoch": 9.31578947368421,
+      "grad_norm": 17.734233856201172,
+      "learning_rate": 1.2439024390243903e-05,
+      "loss": 0.1236,
+      "step": 354
+    },
+    {
+      "epoch": 9.342105263157896,
+      "grad_norm": 22.403324127197266,
+      "learning_rate": 1.2317073170731707e-05,
+      "loss": 0.1625,
+      "step": 355
+    },
+    {
+      "epoch": 9.368421052631579,
+      "grad_norm": 7.183572292327881,
+      "learning_rate": 1.2195121951219513e-05,
+      "loss": 0.053,
+      "step": 356
+    },
+    {
+      "epoch": 9.394736842105264,
+      "grad_norm": 79.79871368408203,
+      "learning_rate": 1.2073170731707317e-05,
+      "loss": 0.3546,
+      "step": 357
+    },
+    {
+      "epoch": 9.421052631578947,
+      "grad_norm": 35.34999084472656,
+      "learning_rate": 1.1951219512195121e-05,
+      "loss": 0.2396,
+      "step": 358
+    },
+    {
+      "epoch": 9.447368421052632,
+      "grad_norm": 43.688331604003906,
+      "learning_rate": 1.1829268292682927e-05,
+      "loss": 0.1568,
+      "step": 359
+    },
+    {
+      "epoch": 9.473684210526315,
+      "grad_norm": 34.7044792175293,
+      "learning_rate": 1.1707317073170733e-05,
+      "loss": 0.2334,
+      "step": 360
+    },
+    {
+      "epoch": 9.5,
+      "grad_norm": 22.405776977539062,
+      "learning_rate": 1.1585365853658537e-05,
+      "loss": 0.1796,
+      "step": 361
+    },
+    {
+      "epoch": 9.526315789473685,
+      "grad_norm": 51.72984313964844,
+      "learning_rate": 1.1463414634146343e-05,
+      "loss": 0.2123,
+      "step": 362
+    },
+    {
+      "epoch": 9.552631578947368,
+      "grad_norm": 17.692136764526367,
+      "learning_rate": 1.1341463414634148e-05,
+      "loss": 0.0802,
+      "step": 363
+    },
+    {
+      "epoch": 9.578947368421053,
+      "grad_norm": 48.475830078125,
+      "learning_rate": 1.1219512195121952e-05,
+      "loss": 0.169,
+      "step": 364
+    },
+    {
+      "epoch": 9.605263157894736,
+      "grad_norm": 29.798110961914062,
+      "learning_rate": 1.1097560975609756e-05,
+      "loss": 0.2492,
+      "step": 365
+    },
+    {
+      "epoch": 9.631578947368421,
+      "grad_norm": 29.195980072021484,
+      "learning_rate": 1.0975609756097562e-05,
+      "loss": 0.1442,
+      "step": 366
+    },
+    {
+      "epoch": 9.657894736842106,
+      "grad_norm": 50.09147262573242,
+      "learning_rate": 1.0853658536585366e-05,
+      "loss": 0.1571,
+      "step": 367
+    },
+    {
+      "epoch": 9.68421052631579,
+      "grad_norm": 121.38156127929688,
+      "learning_rate": 1.073170731707317e-05,
+      "loss": 0.3714,
+      "step": 368
+    },
+    {
+      "epoch": 9.710526315789474,
+      "grad_norm": 16.941436767578125,
+      "learning_rate": 1.0609756097560976e-05,
+      "loss": 0.1157,
+      "step": 369
+    },
+    {
+      "epoch": 9.736842105263158,
+      "grad_norm": 59.37800216674805,
+      "learning_rate": 1.048780487804878e-05,
+      "loss": 0.2167,
+      "step": 370
+    },
+    {
+      "epoch": 9.763157894736842,
+      "grad_norm": 27.14200782775879,
+      "learning_rate": 1.0365853658536585e-05,
+      "loss": 0.1044,
+      "step": 371
+    },
+    {
+      "epoch": 9.789473684210526,
+      "grad_norm": 65.77669525146484,
+      "learning_rate": 1.024390243902439e-05,
+      "loss": 0.282,
+      "step": 372
+    },
+    {
+      "epoch": 9.81578947368421,
+      "grad_norm": 38.659385681152344,
+      "learning_rate": 1.0121951219512197e-05,
+      "loss": 0.1081,
+      "step": 373
+    },
+    {
+      "epoch": 9.842105263157894,
+      "grad_norm": 54.30513381958008,
+      "learning_rate": 1e-05,
+      "loss": 0.3475,
+      "step": 374
+    },
+    {
+      "epoch": 9.868421052631579,
+      "grad_norm": 25.771577835083008,
+      "learning_rate": 9.878048780487805e-06,
+      "loss": 0.111,
+      "step": 375
+    },
+    {
+      "epoch": 9.894736842105264,
+      "grad_norm": 9.227066040039062,
+      "learning_rate": 9.756097560975611e-06,
+      "loss": 0.066,
+      "step": 376
+    },
+    {
+      "epoch": 9.921052631578947,
+      "grad_norm": 84.51748657226562,
+      "learning_rate": 9.634146341463415e-06,
+      "loss": 0.4513,
+      "step": 377
+    },
+    {
+      "epoch": 9.947368421052632,
+      "grad_norm": 195.07611083984375,
+      "learning_rate": 9.51219512195122e-06,
+      "loss": 0.3559,
+      "step": 378
+    },
+    {
+      "epoch": 9.973684210526315,
+      "grad_norm": 44.909061431884766,
+      "learning_rate": 9.390243902439025e-06,
+      "loss": 0.3273,
+      "step": 379
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 6.2627434730529785,
+      "learning_rate": 9.26829268292683e-06,
+      "loss": 0.0612,
+      "step": 380
+    },
+    {
+      "epoch": 10.0,
+      "eval_accuracy": 0.7,
+      "eval_f1_macro": 0.6867794795783927,
+      "eval_f1_micro": 0.7,
+      "eval_f1_weighted": 0.6867794795783927,
+      "eval_loss": 0.7868326306343079,
+      "eval_precision_macro": 0.7062908496732027,
+      "eval_precision_micro": 0.7,
+      "eval_precision_weighted": 0.7062908496732025,
+      "eval_recall_macro": 0.7,
+      "eval_recall_micro": 0.7,
+      "eval_recall_weighted": 0.7,
+      "eval_runtime": 70.9939,
+      "eval_samples_per_second": 0.845,
+      "eval_steps_per_second": 0.028,
+      "step": 380
+    },
+    {
+      "epoch": 10.026315789473685,
+      "grad_norm": 49.423892974853516,
+      "learning_rate": 9.146341463414634e-06,
+      "loss": 0.144,
+      "step": 381
+    },
+    {
+      "epoch": 10.052631578947368,
+      "grad_norm": 57.79946517944336,
+      "learning_rate": 9.02439024390244e-06,
+      "loss": 0.1253,
+      "step": 382
+    },
+    {
+      "epoch": 10.078947368421053,
+      "grad_norm": 37.29555892944336,
+      "learning_rate": 8.902439024390244e-06,
+      "loss": 0.1052,
+      "step": 383
+    },
+    {
+      "epoch": 10.105263157894736,
+      "grad_norm": 17.90376091003418,
+      "learning_rate": 8.780487804878048e-06,
+      "loss": 0.0677,
+      "step": 384
+    },
+    {
+      "epoch": 10.131578947368421,
+      "grad_norm": 19.415559768676758,
+      "learning_rate": 8.658536585365854e-06,
+      "loss": 0.1531,
+      "step": 385
+    },
+    {
+      "epoch": 10.157894736842104,
+      "grad_norm": 30.255104064941406,
+      "learning_rate": 8.53658536585366e-06,
+      "loss": 0.106,
+      "step": 386
+    },
+    {
+      "epoch": 10.18421052631579,
+      "grad_norm": 7.881728172302246,
+      "learning_rate": 8.414634146341464e-06,
+      "loss": 0.0707,
+      "step": 387
+    },
+    {
+      "epoch": 10.210526315789474,
+      "grad_norm": 73.7071533203125,
+      "learning_rate": 8.292682926829268e-06,
+      "loss": 0.4019,
+      "step": 388
+    },
+    {
+      "epoch": 10.236842105263158,
+      "grad_norm": 14.182411193847656,
+      "learning_rate": 8.170731707317074e-06,
+      "loss": 0.0674,
+      "step": 389
+    },
+    {
+      "epoch": 10.263157894736842,
+      "grad_norm": 17.048315048217773,
+      "learning_rate": 8.048780487804879e-06,
+      "loss": 0.0707,
+      "step": 390
+    },
+    {
+      "epoch": 10.289473684210526,
+      "grad_norm": 6.421445369720459,
+      "learning_rate": 7.926829268292683e-06,
+      "loss": 0.0425,
+      "step": 391
+    },
+    {
+      "epoch": 10.31578947368421,
+      "grad_norm": 6.346024036407471,
+      "learning_rate": 7.804878048780489e-06,
+      "loss": 0.0476,
+      "step": 392
+    },
+    {
+      "epoch": 10.342105263157896,
+      "grad_norm": 7.961727142333984,
+      "learning_rate": 7.682926829268293e-06,
+      "loss": 0.0716,
+      "step": 393
+    },
+    {
+      "epoch": 10.368421052631579,
+      "grad_norm": 5.910510063171387,
+      "learning_rate": 7.560975609756098e-06,
+      "loss": 0.0519,
+      "step": 394
+    },
+    {
+      "epoch": 10.394736842105264,
+      "grad_norm": 8.626511573791504,
+      "learning_rate": 7.439024390243902e-06,
+      "loss": 0.0905,
+      "step": 395
+    },
+    {
+      "epoch": 10.421052631578947,
+      "grad_norm": 117.65007781982422,
+      "learning_rate": 7.317073170731707e-06,
+      "loss": 0.1747,
+      "step": 396
+    },
+    {
+      "epoch": 10.447368421052632,
+      "grad_norm": 50.54420471191406,
+      "learning_rate": 7.195121951219512e-06,
+      "loss": 0.4497,
+      "step": 397
+    },
+    {
+      "epoch": 10.473684210526315,
+      "grad_norm": 106.56475830078125,
+      "learning_rate": 7.073170731707317e-06,
+      "loss": 0.2775,
+      "step": 398
+    },
+    {
+      "epoch": 10.5,
+      "grad_norm": 68.34547424316406,
+      "learning_rate": 6.951219512195123e-06,
+      "loss": 0.1658,
+      "step": 399
+    },
+    {
+      "epoch": 10.526315789473685,
+      "grad_norm": 37.34225082397461,
+      "learning_rate": 6.829268292682928e-06,
+      "loss": 0.1699,
+      "step": 400
+    },
+    {
+      "epoch": 10.552631578947368,
+      "grad_norm": 35.83088302612305,
+      "learning_rate": 6.707317073170733e-06,
+      "loss": 0.1478,
+      "step": 401
+    },
+    {
+      "epoch": 10.578947368421053,
+      "grad_norm": 5.4022064208984375,
+      "learning_rate": 6.585365853658537e-06,
+      "loss": 0.0508,
+      "step": 402
+    },
+    {
+      "epoch": 10.605263157894736,
+      "grad_norm": 78.61613464355469,
+      "learning_rate": 6.463414634146342e-06,
+      "loss": 0.1148,
+      "step": 403
+    },
+    {
+      "epoch": 10.631578947368421,
+      "grad_norm": 25.109567642211914,
+      "learning_rate": 6.341463414634147e-06,
+      "loss": 0.2027,
+      "step": 404
+    },
+    {
+      "epoch": 10.657894736842106,
+      "grad_norm": 20.838315963745117,
+      "learning_rate": 6.219512195121951e-06,
+      "loss": 0.2343,
+      "step": 405
+    },
+    {
+      "epoch": 10.68421052631579,
+      "grad_norm": 18.96240997314453,
+      "learning_rate": 6.0975609756097564e-06,
+      "loss": 0.0477,
+      "step": 406
+    },
+    {
+      "epoch": 10.710526315789474,
+      "grad_norm": 5.693024158477783,
+      "learning_rate": 5.975609756097561e-06,
+      "loss": 0.0536,
+      "step": 407
+    },
+    {
+      "epoch": 10.736842105263158,
+      "grad_norm": 48.42964553833008,
+      "learning_rate": 5.853658536585367e-06,
+      "loss": 0.2161,
+      "step": 408
+    },
+    {
+      "epoch": 10.763157894736842,
+      "grad_norm": 14.99223518371582,
+      "learning_rate": 5.731707317073172e-06,
+      "loss": 0.0545,
+      "step": 409
+    },
+    {
+      "epoch": 10.789473684210526,
+      "grad_norm": 22.92984390258789,
+      "learning_rate": 5.609756097560976e-06,
+      "loss": 0.0871,
+      "step": 410
+    },
+    {
+      "epoch": 10.81578947368421,
+      "grad_norm": 35.50889205932617,
+      "learning_rate": 5.487804878048781e-06,
+      "loss": 0.2263,
+      "step": 411
+    },
+    {
+      "epoch": 10.842105263157894,
+      "grad_norm": 29.516984939575195,
+      "learning_rate": 5.365853658536585e-06,
+      "loss": 0.0998,
+      "step": 412
+    },
+    {
+      "epoch": 10.868421052631579,
+      "grad_norm": 14.31261920928955,
+      "learning_rate": 5.24390243902439e-06,
+      "loss": 0.066,
+      "step": 413
+    },
+    {
+      "epoch": 10.894736842105264,
+      "grad_norm": 11.149723052978516,
+      "learning_rate": 5.121951219512195e-06,
+      "loss": 0.0432,
+      "step": 414
+    },
+    {
+      "epoch": 10.921052631578947,
+      "grad_norm": 7.057794570922852,
+      "learning_rate": 5e-06,
+      "loss": 0.0423,
+      "step": 415
+    },
+    {
+      "epoch": 10.947368421052632,
+      "grad_norm": 15.57015609741211,
+      "learning_rate": 4.8780487804878055e-06,
+      "loss": 0.0554,
+      "step": 416
+    },
+    {
+      "epoch": 10.973684210526315,
+      "grad_norm": 12.00108814239502,
+      "learning_rate": 4.75609756097561e-06,
+      "loss": 0.0587,
+      "step": 417
+    },
+    {
+      "epoch": 11.0,
+      "grad_norm": 8.689730644226074,
+      "learning_rate": 4.634146341463415e-06,
+      "loss": 0.0436,
+      "step": 418
+    },
+    {
+      "epoch": 11.0,
+      "eval_accuracy": 0.7,
+      "eval_f1_macro": 0.6974548440065682,
+      "eval_f1_micro": 0.7,
+      "eval_f1_weighted": 0.6974548440065681,
+      "eval_loss": 0.7847242951393127,
+      "eval_precision_macro": 0.7133699633699633,
+      "eval_precision_micro": 0.7,
+      "eval_precision_weighted": 0.7133699633699634,
+      "eval_recall_macro": 0.7,
+      "eval_recall_micro": 0.7,
+      "eval_recall_weighted": 0.7,
+      "eval_runtime": 68.6184,
+      "eval_samples_per_second": 0.874,
+      "eval_steps_per_second": 0.029,
+      "step": 418
+    },
+    {
+      "epoch": 11.026315789473685,
+      "grad_norm": 15.764579772949219,
+      "learning_rate": 4.51219512195122e-06,
+      "loss": 0.0385,
+      "step": 419
+    },
+    {
+      "epoch": 11.052631578947368,
+      "grad_norm": 29.863691329956055,
+      "learning_rate": 4.390243902439024e-06,
+      "loss": 0.1211,
+      "step": 420
+    },
+    {
+      "epoch": 11.078947368421053,
+      "grad_norm": 3.3814544677734375,
+      "learning_rate": 4.26829268292683e-06,
+      "loss": 0.0295,
+      "step": 421
+    },
+    {
+      "epoch": 11.105263157894736,
+      "grad_norm": 57.14234924316406,
+      "learning_rate": 4.146341463414634e-06,
+      "loss": 0.0529,
+      "step": 422
+    },
+    {
+      "epoch": 11.131578947368421,
+      "grad_norm": 11.074874877929688,
+      "learning_rate": 4.024390243902439e-06,
+      "loss": 0.0904,
+      "step": 423
+    },
+    {
+      "epoch": 11.157894736842104,
+      "grad_norm": 7.610099792480469,
+      "learning_rate": 3.902439024390244e-06,
+      "loss": 0.0426,
+      "step": 424
+    },
+    {
+      "epoch": 11.18421052631579,
+      "grad_norm": 39.320194244384766,
+      "learning_rate": 3.780487804878049e-06,
+      "loss": 0.0485,
+      "step": 425
+    },
+    {
+      "epoch": 11.210526315789474,
+      "grad_norm": 7.378133773803711,
+      "learning_rate": 3.6585365853658537e-06,
+      "loss": 0.0408,
+      "step": 426
+    },
+    {
+      "epoch": 11.236842105263158,
+      "grad_norm": 27.179210662841797,
+      "learning_rate": 3.5365853658536583e-06,
+      "loss": 0.0803,
+      "step": 427
+    },
+    {
+      "epoch": 11.263157894736842,
+      "grad_norm": 9.848379135131836,
+      "learning_rate": 3.414634146341464e-06,
+      "loss": 0.066,
+      "step": 428
+    },
+    {
+      "epoch": 11.289473684210526,
+      "grad_norm": 48.25758743286133,
+      "learning_rate": 3.2926829268292685e-06,
+      "loss": 0.1043,
+      "step": 429
+    },
+    {
+      "epoch": 11.31578947368421,
+      "grad_norm": 87.25733184814453,
+      "learning_rate": 3.1707317073170736e-06,
+      "loss": 0.089,
+      "step": 430
+    },
+    {
+      "epoch": 11.342105263157896,
+      "grad_norm": 5.009243965148926,
+      "learning_rate": 3.0487804878048782e-06,
+      "loss": 0.0378,
+      "step": 431
+    },
+    {
+      "epoch": 11.368421052631579,
+      "grad_norm": 27.693439483642578,
+      "learning_rate": 2.9268292682926833e-06,
+      "loss": 0.1046,
+      "step": 432
+    },
+    {
+      "epoch": 11.394736842105264,
+      "grad_norm": 20.024911880493164,
+      "learning_rate": 2.804878048780488e-06,
+      "loss": 0.0678,
+      "step": 433
+    },
+    {
+      "epoch": 11.421052631578947,
+      "grad_norm": 15.288347244262695,
+      "learning_rate": 2.6829268292682926e-06,
+      "loss": 0.0377,
+      "step": 434
+    },
+    {
+      "epoch": 11.447368421052632,
+      "grad_norm": 17.69446563720703,
+      "learning_rate": 2.5609756097560977e-06,
+      "loss": 0.0706,
+      "step": 435
+    },
+    {
+      "epoch": 11.473684210526315,
+      "grad_norm": 21.541053771972656,
+      "learning_rate": 2.4390243902439027e-06,
+      "loss": 0.128,
+      "step": 436
+    },
+    {
+      "epoch": 11.5,
+      "grad_norm": 15.488064765930176,
+      "learning_rate": 2.3170731707317074e-06,
+      "loss": 0.0361,
+      "step": 437
+    },
+    {
+      "epoch": 11.526315789473685,
+      "grad_norm": 22.015546798706055,
+      "learning_rate": 2.195121951219512e-06,
+      "loss": 0.1368,
+      "step": 438
+    },
+    {
+      "epoch": 11.552631578947368,
+      "grad_norm": 11.265583038330078,
+      "learning_rate": 2.073170731707317e-06,
+      "loss": 0.0636,
+      "step": 439
+    },
+    {
+      "epoch": 11.578947368421053,
+      "grad_norm": 21.78879165649414,
+      "learning_rate": 1.951219512195122e-06,
+      "loss": 0.3094,
+      "step": 440
+    },
+    {
+      "epoch": 11.605263157894736,
+      "grad_norm": 25.989824295043945,
+      "learning_rate": 1.8292682926829268e-06,
+      "loss": 0.0661,
+      "step": 441
+    },
+    {
+      "epoch": 11.631578947368421,
+      "grad_norm": 18.91653060913086,
+      "learning_rate": 1.707317073170732e-06,
+      "loss": 0.0713,
+      "step": 442
+    },
+    {
+      "epoch": 11.657894736842106,
+      "grad_norm": 5.307289123535156,
+      "learning_rate": 1.5853658536585368e-06,
+      "loss": 0.04,
+      "step": 443
+    },
+    {
+      "epoch": 11.68421052631579,
+      "grad_norm": 19.790300369262695,
+      "learning_rate": 1.4634146341463416e-06,
+      "loss": 0.0798,
+      "step": 444
+    },
+    {
+      "epoch": 11.710526315789474,
+      "grad_norm": 30.500885009765625,
+      "learning_rate": 1.3414634146341463e-06,
+      "loss": 0.1807,
+      "step": 445
+    },
+    {
+      "epoch": 11.736842105263158,
+      "grad_norm": 18.471010208129883,
+      "learning_rate": 1.2195121951219514e-06,
+      "loss": 0.1433,
+      "step": 446
+    },
+    {
+      "epoch": 11.763157894736842,
+      "grad_norm": 4.643721103668213,
+      "learning_rate": 1.097560975609756e-06,
+      "loss": 0.0299,
+      "step": 447
+    },
+    {
+      "epoch": 11.789473684210526,
+      "grad_norm": 15.291370391845703,
+      "learning_rate": 9.75609756097561e-07,
+      "loss": 0.0583,
+      "step": 448
+    },
+    {
+      "epoch": 11.81578947368421,
+      "grad_norm": 21.351959228515625,
+      "learning_rate": 8.53658536585366e-07,
+      "loss": 0.0581,
+      "step": 449
+    },
+    {
+      "epoch": 11.842105263157894,
+      "grad_norm": 39.182071685791016,
+      "learning_rate": 7.317073170731708e-07,
+      "loss": 0.1439,
+      "step": 450
+    },
+    {
+      "epoch": 11.868421052631579,
+      "grad_norm": 3.376581907272339,
+      "learning_rate": 6.097560975609757e-07,
+      "loss": 0.0393,
+      "step": 451
+    },
+    {
+      "epoch": 11.894736842105264,
+      "grad_norm": 20.73614501953125,
+      "learning_rate": 4.878048780487805e-07,
+      "loss": 0.0681,
+      "step": 452
+    },
+    {
+      "epoch": 11.921052631578947,
+      "grad_norm": 11.527457237243652,
+      "learning_rate": 3.658536585365854e-07,
+      "loss": 0.0408,
+      "step": 453
+    },
+    {
+      "epoch": 11.947368421052632,
+      "grad_norm": 12.897309303283691,
+      "learning_rate": 2.439024390243903e-07,
+      "loss": 0.0337,
+      "step": 454
+    },
+    {
+      "epoch": 11.973684210526315,
+      "grad_norm": 39.30110549926758,
+      "learning_rate": 1.2195121951219514e-07,
+      "loss": 0.1508,
+      "step": 455
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 106.00151062011719,
+      "learning_rate": 0.0,
+      "loss": 0.042,
+      "step": 456
+    },
+    {
+      "epoch": 12.0,
+      "eval_accuracy": 0.7666666666666667,
+      "eval_f1_macro": 0.7635683760683761,
+      "eval_f1_micro": 0.7666666666666667,
+      "eval_f1_weighted": 0.763568376068376,
+      "eval_loss": 0.7394976019859314,
+      "eval_precision_macro": 0.7832887700534759,
+      "eval_precision_micro": 0.7666666666666667,
+      "eval_precision_weighted": 0.783288770053476,
+      "eval_recall_macro": 0.7666666666666666,
+      "eval_recall_micro": 0.7666666666666667,
+      "eval_recall_weighted": 0.7666666666666667,
+      "eval_runtime": 65.796,
+      "eval_samples_per_second": 0.912,
+      "eval_steps_per_second": 0.03,
+      "step": 456
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 456,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 12,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 171813516853248.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-456/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:488d952d2b2f4757986d04ba3081d970de0534335a7e9cc2f9e5cba483eb2afb
+size 5368

config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_name_or_path": "albert/albert-base-v2",
+  "_num_labels": 4,
+  "architectures": [
+    "AlbertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu_new",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "CLE_DSI_SIEP",
+    "1": "CLE_DSI_SIL",
+    "2": "CLE_DSI_SPIA",
+    "3": "CLE_DSI_SSUR"
+  },
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 3072,
+  "label2id": {
+    "CLE_DSI_SIEP": 0,
+    "CLE_DSI_SIL": 1,
+    "CLE_DSI_SPIA": 2,
+    "CLE_DSI_SSUR": 3
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 12,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 2,
+  "vocab_size": 30000
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7de090cbcc2fc88c209787820c086224988f028790b65f74b65f19186c33681f
+size 46750064

runs/Jun30_09-14-46_r-bastien2-gt-inno-2d039o9i-771b3-8acwj/events.out.tfevents.1751274888.r-bastien2-gt-inno-2d039o9i-771b3-8acwj.108.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e878ccb7c3159cabf1c2026543023f277497e2fc61b2d0dc73f5dd47a3985b8
-size 109269

 version https://git-lfs.github.com/spec/v1
+oid sha256:f222dc62274514b7c2935f5ef5c155224734529f4bf66f5aa5c6d8c266b474c8
+size 111511

runs/Jun30_09-14-46_r-bastien2-gt-inno-2d039o9i-771b3-8acwj/events.out.tfevents.1751302564.r-bastien2-gt-inno-2d039o9i-771b3-8acwj.108.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a52c7e1ab09c533e8cf0023a3baa11d093066a3a4ffa209e4729e8e35366048
+size 921

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "[CLS]",
+  "cls_token": "[CLS]",
+  "eos_token": "[SEP]",
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "[SEP]",
+  "unk_token": "<unk>"
+}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fefb02b667a6c5c2fe27602d28e5fb3428f66ab89c7d6f388e7c8d44a02d0336
+size 760289

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "keep_accents": false,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "remove_space": true,
+  "sep_token": "[SEP]",
+  "tokenizer_class": "AlbertTokenizer",
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:488d952d2b2f4757986d04ba3081d970de0534335a7e9cc2f9e5cba483eb2afb
+size 5368

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "MerlAIn-Base-Albert-005/autotrain-data",
+    "model": "albert/albert-base-v2",
+    "lr": 5e-05,
+    "epochs": 12,
+    "max_seq_length": 512,
+    "batch_size": 16,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "MerlAIn-Base-Albert-005",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "Bastien2",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}