ameerazam08 commited on
Commit
e30a64c
·
1 Parent(s): ed4205a

Done last 3:42

Browse files
.ipynb_checkpoints/config-checkpoint.json DELETED
@@ -1,131 +0,0 @@
1
- {
2
- "_name_or_path": "jonatasgrosman/wav2vec2-large-xlsr-53-english",
3
- "activation_dropout": 0.05,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForSpeechClassification"
10
- ],
11
- "attention_dropout": 0.1,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 256,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": true,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": true,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": true,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.05,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.0,
55
- "finetuning_task": "wav2vec2_clf",
56
- "hidden_act": "gelu",
57
- "hidden_dropout": 0.05,
58
- "hidden_size": 1024,
59
- "id2label": {
60
- "0": "angry",
61
- "1": "fearful",
62
- "2": "happy",
63
- "3": "neutral"
64
- },
65
- "initializer_range": 0.02,
66
- "intermediate_size": 4096,
67
- "label2id": {
68
- "angry": 0,
69
- "fearful": 1,
70
- "happy": 2,
71
- "neutral": 3
72
- },
73
- "layer_norm_eps": 1e-05,
74
- "layerdrop": 0.05,
75
- "mask_channel_length": 10,
76
- "mask_channel_min_space": 1,
77
- "mask_channel_other": 0.0,
78
- "mask_channel_prob": 0.0,
79
- "mask_channel_selection": "static",
80
- "mask_feature_length": 10,
81
- "mask_feature_min_masks": 0,
82
- "mask_feature_prob": 0.0,
83
- "mask_time_length": 10,
84
- "mask_time_min_masks": 2,
85
- "mask_time_min_space": 1,
86
- "mask_time_other": 0.0,
87
- "mask_time_prob": 0.05,
88
- "mask_time_selection": "static",
89
- "model_type": "wav2vec2",
90
- "num_adapter_layers": 3,
91
- "num_attention_heads": 16,
92
- "num_codevector_groups": 2,
93
- "num_codevectors_per_group": 320,
94
- "num_conv_pos_embedding_groups": 16,
95
- "num_conv_pos_embeddings": 128,
96
- "num_feat_extract_layers": 7,
97
- "num_hidden_layers": 24,
98
- "num_negatives": 100,
99
- "output_hidden_size": 1024,
100
- "pad_token_id": 0,
101
- "pooling_mode": "mean",
102
- "problem_type": "single_label_classification",
103
- "proj_codevector_dim": 256,
104
- "push_to_hub": true,
105
- "tdnn_dilation": [
106
- 1,
107
- 2,
108
- 3,
109
- 1,
110
- 1
111
- ],
112
- "tdnn_dim": [
113
- 512,
114
- 512,
115
- 512,
116
- 512,
117
- 1500
118
- ],
119
- "tdnn_kernel": [
120
- 5,
121
- 3,
122
- 3,
123
- 1,
124
- 1
125
- ],
126
- "torch_dtype": "float32",
127
- "transformers_version": "4.23.0.dev0",
128
- "use_weighted_layer_sum": false,
129
- "vocab_size": 33,
130
- "xvector_output_dim": 512
131
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/preprocessor_config-checkpoint.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "processor_class": "Wav2Vec2ProcessorWithLM",
8
- "return_attention_mask": true,
9
- "sampling_rate": 16000
10
- }
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/trainer_state-checkpoint.json DELETED
@@ -1,121 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.9166666666666665,
5
- "global_step": 70,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.42,
12
- "learning_rate": 8.611111111111112e-05,
13
- "loss": 1.4322,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.42,
18
- "eval_accuracy": 0.3125,
19
- "eval_loss": 1.3030599355697632,
20
- "eval_runtime": 9.8316,
21
- "eval_samples_per_second": 4.882,
22
- "eval_steps_per_second": 1.221,
23
- "step": 10
24
- },
25
- {
26
- "epoch": 0.83,
27
- "learning_rate": 7.222222222222222e-05,
28
- "loss": 1.2975,
29
- "step": 20
30
- },
31
- {
32
- "epoch": 0.83,
33
- "eval_accuracy": 0.3541666567325592,
34
- "eval_loss": 1.2456461191177368,
35
- "eval_runtime": 12.762,
36
- "eval_samples_per_second": 3.761,
37
- "eval_steps_per_second": 0.94,
38
- "step": 20
39
- },
40
- {
41
- "epoch": 1.25,
42
- "learning_rate": 5.833333333333334e-05,
43
- "loss": 1.3053,
44
- "step": 30
45
- },
46
- {
47
- "epoch": 1.25,
48
- "eval_accuracy": 0.375,
49
- "eval_loss": 1.235076904296875,
50
- "eval_runtime": 9.8907,
51
- "eval_samples_per_second": 4.853,
52
- "eval_steps_per_second": 1.213,
53
- "step": 30
54
- },
55
- {
56
- "epoch": 1.67,
57
- "learning_rate": 4.4444444444444447e-05,
58
- "loss": 1.2042,
59
- "step": 40
60
- },
61
- {
62
- "epoch": 1.67,
63
- "eval_accuracy": 0.375,
64
- "eval_loss": 1.1900380849838257,
65
- "eval_runtime": 12.1455,
66
- "eval_samples_per_second": 3.952,
67
- "eval_steps_per_second": 0.988,
68
- "step": 40
69
- },
70
- {
71
- "epoch": 2.08,
72
- "learning_rate": 3.055555555555556e-05,
73
- "loss": 1.0192,
74
- "step": 50
75
- },
76
- {
77
- "epoch": 2.08,
78
- "eval_accuracy": 0.5416666865348816,
79
- "eval_loss": 1.006090760231018,
80
- "eval_runtime": 9.9246,
81
- "eval_samples_per_second": 4.836,
82
- "eval_steps_per_second": 1.209,
83
- "step": 50
84
- },
85
- {
86
- "epoch": 2.5,
87
- "learning_rate": 1.6666666666666667e-05,
88
- "loss": 0.8874,
89
- "step": 60
90
- },
91
- {
92
- "epoch": 2.5,
93
- "eval_accuracy": 0.625,
94
- "eval_loss": 0.8699900507926941,
95
- "eval_runtime": 11.1183,
96
- "eval_samples_per_second": 4.317,
97
- "eval_steps_per_second": 1.079,
98
- "step": 60
99
- },
100
- {
101
- "epoch": 2.92,
102
- "learning_rate": 2.777777777777778e-06,
103
- "loss": 0.855,
104
- "step": 70
105
- },
106
- {
107
- "epoch": 2.92,
108
- "eval_accuracy": 0.5833333134651184,
109
- "eval_loss": 0.9461669921875,
110
- "eval_runtime": 9.9807,
111
- "eval_samples_per_second": 4.809,
112
- "eval_steps_per_second": 1.202,
113
- "step": 70
114
- }
115
- ],
116
- "max_steps": 72,
117
- "num_train_epochs": 3,
118
- "total_flos": 2.022508856471088e+17,
119
- "trial_name": null,
120
- "trial_params": null
121
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e88a8a0fbd8925ff1486a322b5e34bf9d36c9f23b7ffe0949a7ac2586331c28
3
- size 2498489353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:175aca2d22d4e0ffe707aa0203de63cde30621cf49a4e7a9ded1eabcd757b3ae
3
+ size 2498489737
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f84c53f49e3893f969c261cf49738a8973d6b7e49ad472bf8d2f2a5d0515dcc0
3
  size 1266114157
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5eb5fcea715082561dc34412edd0b99d2474089f52a1ff834f8183f80b52062
3
  size 1266114157
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8c8e0bc9205594b6a626fc8dbdebad170045742f43c959074cd52c934c57a00
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5e9db4f8fb9adcedf9b8f43bc1c1355687d2a0381ee8c01e6f555f0a82cb5dc
3
  size 14503
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b88d219958fe6720d47edc802118f485ddf9d372f4bd949bb048b002fa54b3b6
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fccda41cf05d0c2582a5ae864ddced240b945973cac170056ad38621f56c053
3
  size 559
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:950d43dc50f7fafd884965bc3e374d9bbb298fcfab9bad051d1be443950e72bd
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f178de8fd426d107cb3181bc3de3565897ca73a1fcc762375a87651bd94651ae
3
  size 623
trainer_state.json CHANGED
@@ -1,121 +1,376 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9166666666666665,
5
- "global_step": 70,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.42,
12
- "learning_rate": 8.611111111111112e-05,
13
- "loss": 1.4322,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.42,
18
- "eval_accuracy": 0.3125,
19
- "eval_loss": 1.3030599355697632,
20
- "eval_runtime": 9.8316,
21
- "eval_samples_per_second": 4.882,
22
- "eval_steps_per_second": 1.221,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.83,
27
- "learning_rate": 7.222222222222222e-05,
28
- "loss": 1.2975,
29
  "step": 20
30
  },
31
  {
32
  "epoch": 0.83,
33
- "eval_accuracy": 0.3541666567325592,
34
- "eval_loss": 1.2456461191177368,
35
- "eval_runtime": 12.762,
36
- "eval_samples_per_second": 3.761,
37
- "eval_steps_per_second": 0.94,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 1.25,
42
- "learning_rate": 5.833333333333334e-05,
43
- "loss": 1.3053,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 1.25,
48
- "eval_accuracy": 0.375,
49
- "eval_loss": 1.235076904296875,
50
- "eval_runtime": 9.8907,
51
- "eval_samples_per_second": 4.853,
52
- "eval_steps_per_second": 1.213,
53
  "step": 30
54
  },
55
  {
56
  "epoch": 1.67,
57
- "learning_rate": 4.4444444444444447e-05,
58
- "loss": 1.2042,
59
  "step": 40
60
  },
61
  {
62
  "epoch": 1.67,
63
- "eval_accuracy": 0.375,
64
- "eval_loss": 1.1900380849838257,
65
- "eval_runtime": 12.1455,
66
- "eval_samples_per_second": 3.952,
67
- "eval_steps_per_second": 0.988,
68
  "step": 40
69
  },
70
  {
71
  "epoch": 2.08,
72
- "learning_rate": 3.055555555555556e-05,
73
- "loss": 1.0192,
74
  "step": 50
75
  },
76
  {
77
  "epoch": 2.08,
78
- "eval_accuracy": 0.5416666865348816,
79
- "eval_loss": 1.006090760231018,
80
- "eval_runtime": 9.9246,
81
- "eval_samples_per_second": 4.836,
82
- "eval_steps_per_second": 1.209,
83
  "step": 50
84
  },
85
  {
86
  "epoch": 2.5,
87
- "learning_rate": 1.6666666666666667e-05,
88
- "loss": 0.8874,
89
  "step": 60
90
  },
91
  {
92
  "epoch": 2.5,
93
- "eval_accuracy": 0.625,
94
- "eval_loss": 0.8699900507926941,
95
- "eval_runtime": 11.1183,
96
- "eval_samples_per_second": 4.317,
97
- "eval_steps_per_second": 1.079,
98
  "step": 60
99
  },
100
  {
101
  "epoch": 2.92,
102
- "learning_rate": 2.777777777777778e-06,
103
- "loss": 0.855,
104
  "step": 70
105
  },
106
  {
107
  "epoch": 2.92,
108
- "eval_accuracy": 0.5833333134651184,
109
- "eval_loss": 0.9461669921875,
110
- "eval_runtime": 9.9807,
111
- "eval_samples_per_second": 4.809,
112
- "eval_steps_per_second": 1.202,
113
  "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  }
115
  ],
116
- "max_steps": 72,
117
- "num_train_epochs": 3,
118
- "total_flos": 2.022508856471088e+17,
119
  "trial_name": null,
120
  "trial_params": null
121
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 240,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.42,
12
+ "learning_rate": 7.500000000000001e-05,
13
+ "loss": 1.2218,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.42,
18
+ "eval_accuracy": 0.6041666865348816,
19
+ "eval_loss": 0.9863560795783997,
20
+ "eval_runtime": 10.1253,
21
+ "eval_samples_per_second": 4.741,
22
+ "eval_steps_per_second": 1.185,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.83,
27
+ "learning_rate": 7.083333333333334e-05,
28
+ "loss": 0.9652,
29
  "step": 20
30
  },
31
  {
32
  "epoch": 0.83,
33
+ "eval_accuracy": 0.5833333134651184,
34
+ "eval_loss": 0.8854789733886719,
35
+ "eval_runtime": 11.1802,
36
+ "eval_samples_per_second": 4.293,
37
+ "eval_steps_per_second": 1.073,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 1.25,
42
+ "learning_rate": 6.666666666666667e-05,
43
+ "loss": 0.9764,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 1.25,
48
+ "eval_accuracy": 0.6666666865348816,
49
+ "eval_loss": 0.8104388117790222,
50
+ "eval_runtime": 10.0746,
51
+ "eval_samples_per_second": 4.764,
52
+ "eval_steps_per_second": 1.191,
53
  "step": 30
54
  },
55
  {
56
  "epoch": 1.67,
57
+ "learning_rate": 6.25e-05,
58
+ "loss": 0.5574,
59
  "step": 40
60
  },
61
  {
62
  "epoch": 1.67,
63
+ "eval_accuracy": 0.625,
64
+ "eval_loss": 1.0489534139633179,
65
+ "eval_runtime": 11.7457,
66
+ "eval_samples_per_second": 4.087,
67
+ "eval_steps_per_second": 1.022,
68
  "step": 40
69
  },
70
  {
71
  "epoch": 2.08,
72
+ "learning_rate": 5.833333333333334e-05,
73
+ "loss": 0.778,
74
  "step": 50
75
  },
76
  {
77
  "epoch": 2.08,
78
+ "eval_accuracy": 0.6458333134651184,
79
+ "eval_loss": 0.8387454152107239,
80
+ "eval_runtime": 10.2158,
81
+ "eval_samples_per_second": 4.699,
82
+ "eval_steps_per_second": 1.175,
83
  "step": 50
84
  },
85
  {
86
  "epoch": 2.5,
87
+ "learning_rate": 5.4166666666666664e-05,
88
+ "loss": 0.4129,
89
  "step": 60
90
  },
91
  {
92
  "epoch": 2.5,
93
+ "eval_accuracy": 0.7083333134651184,
94
+ "eval_loss": 0.7567564845085144,
95
+ "eval_runtime": 11.1124,
96
+ "eval_samples_per_second": 4.319,
97
+ "eval_steps_per_second": 1.08,
98
  "step": 60
99
  },
100
  {
101
  "epoch": 2.92,
102
+ "learning_rate": 5e-05,
103
+ "loss": 0.4054,
104
  "step": 70
105
  },
106
  {
107
  "epoch": 2.92,
108
+ "eval_accuracy": 0.75,
109
+ "eval_loss": 0.7560882568359375,
110
+ "eval_runtime": 10.3666,
111
+ "eval_samples_per_second": 4.63,
112
+ "eval_steps_per_second": 1.158,
113
  "step": 70
114
+ },
115
+ {
116
+ "epoch": 3.33,
117
+ "learning_rate": 4.5833333333333334e-05,
118
+ "loss": 0.3773,
119
+ "step": 80
120
+ },
121
+ {
122
+ "epoch": 3.33,
123
+ "eval_accuracy": 0.8125,
124
+ "eval_loss": 0.6256787180900574,
125
+ "eval_runtime": 11.7432,
126
+ "eval_samples_per_second": 4.087,
127
+ "eval_steps_per_second": 1.022,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 3.75,
132
+ "learning_rate": 4.166666666666667e-05,
133
+ "loss": 0.1139,
134
+ "step": 90
135
+ },
136
+ {
137
+ "epoch": 3.75,
138
+ "eval_accuracy": 0.875,
139
+ "eval_loss": 0.44811201095581055,
140
+ "eval_runtime": 10.2271,
141
+ "eval_samples_per_second": 4.693,
142
+ "eval_steps_per_second": 1.173,
143
+ "step": 90
144
+ },
145
+ {
146
+ "epoch": 4.17,
147
+ "learning_rate": 3.7500000000000003e-05,
148
+ "loss": 0.1395,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 4.17,
153
+ "eval_accuracy": 0.7708333134651184,
154
+ "eval_loss": 0.7507086396217346,
155
+ "eval_runtime": 11.1455,
156
+ "eval_samples_per_second": 4.307,
157
+ "eval_steps_per_second": 1.077,
158
+ "step": 100
159
+ },
160
+ {
161
+ "epoch": 4.58,
162
+ "learning_rate": 3.3333333333333335e-05,
163
+ "loss": 0.0564,
164
+ "step": 110
165
+ },
166
+ {
167
+ "epoch": 4.58,
168
+ "eval_accuracy": 0.7916666865348816,
169
+ "eval_loss": 0.7551252841949463,
170
+ "eval_runtime": 10.0567,
171
+ "eval_samples_per_second": 4.773,
172
+ "eval_steps_per_second": 1.193,
173
+ "step": 110
174
+ },
175
+ {
176
+ "epoch": 5.0,
177
+ "learning_rate": 2.916666666666667e-05,
178
+ "loss": 0.0767,
179
+ "step": 120
180
+ },
181
+ {
182
+ "epoch": 5.0,
183
+ "eval_accuracy": 0.7916666865348816,
184
+ "eval_loss": 0.7378367781639099,
185
+ "eval_runtime": 11.5811,
186
+ "eval_samples_per_second": 4.145,
187
+ "eval_steps_per_second": 1.036,
188
+ "step": 120
189
+ },
190
+ {
191
+ "epoch": 5.42,
192
+ "learning_rate": 2.5e-05,
193
+ "loss": 0.0464,
194
+ "step": 130
195
+ },
196
+ {
197
+ "epoch": 5.42,
198
+ "eval_accuracy": 0.7291666865348816,
199
+ "eval_loss": 1.1143478155136108,
200
+ "eval_runtime": 10.0199,
201
+ "eval_samples_per_second": 4.79,
202
+ "eval_steps_per_second": 1.198,
203
+ "step": 130
204
+ },
205
+ {
206
+ "epoch": 5.83,
207
+ "learning_rate": 2.0833333333333336e-05,
208
+ "loss": 0.0996,
209
+ "step": 140
210
+ },
211
+ {
212
+ "epoch": 5.83,
213
+ "eval_accuracy": 0.8333333134651184,
214
+ "eval_loss": 0.6909031867980957,
215
+ "eval_runtime": 11.7023,
216
+ "eval_samples_per_second": 4.102,
217
+ "eval_steps_per_second": 1.025,
218
+ "step": 140
219
+ },
220
+ {
221
+ "epoch": 6.25,
222
+ "learning_rate": 1.6666666666666667e-05,
223
+ "loss": 0.0166,
224
+ "step": 150
225
+ },
226
+ {
227
+ "epoch": 6.25,
228
+ "eval_accuracy": 0.8333333134651184,
229
+ "eval_loss": 0.6695077419281006,
230
+ "eval_runtime": 9.8609,
231
+ "eval_samples_per_second": 4.868,
232
+ "eval_steps_per_second": 1.217,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 6.67,
237
+ "learning_rate": 1.25e-05,
238
+ "loss": 0.0547,
239
+ "step": 160
240
+ },
241
+ {
242
+ "epoch": 6.67,
243
+ "eval_accuracy": 0.75,
244
+ "eval_loss": 0.9423481822013855,
245
+ "eval_runtime": 11.5884,
246
+ "eval_samples_per_second": 4.142,
247
+ "eval_steps_per_second": 1.036,
248
+ "step": 160
249
+ },
250
+ {
251
+ "epoch": 7.08,
252
+ "learning_rate": 8.333333333333334e-06,
253
+ "loss": 0.1214,
254
+ "step": 170
255
+ },
256
+ {
257
+ "epoch": 7.08,
258
+ "eval_accuracy": 0.7916666865348816,
259
+ "eval_loss": 0.7280401587486267,
260
+ "eval_runtime": 10.0684,
261
+ "eval_samples_per_second": 4.767,
262
+ "eval_steps_per_second": 1.192,
263
+ "step": 170
264
+ },
265
+ {
266
+ "epoch": 7.5,
267
+ "learning_rate": 4.166666666666667e-06,
268
+ "loss": 0.0096,
269
+ "step": 180
270
+ },
271
+ {
272
+ "epoch": 7.5,
273
+ "eval_accuracy": 0.7916666865348816,
274
+ "eval_loss": 0.6912185549736023,
275
+ "eval_runtime": 11.3942,
276
+ "eval_samples_per_second": 4.213,
277
+ "eval_steps_per_second": 1.053,
278
+ "step": 180
279
+ },
280
+ {
281
+ "epoch": 7.92,
282
+ "learning_rate": 0.0,
283
+ "loss": 0.0611,
284
+ "step": 190
285
+ },
286
+ {
287
+ "epoch": 7.92,
288
+ "eval_accuracy": 0.7916666865348816,
289
+ "eval_loss": 0.6880165934562683,
290
+ "eval_runtime": 10.0683,
291
+ "eval_samples_per_second": 4.767,
292
+ "eval_steps_per_second": 1.192,
293
+ "step": 190
294
+ },
295
+ {
296
+ "epoch": 8.33,
297
+ "learning_rate": 0.0,
298
+ "loss": 0.0254,
299
+ "step": 200
300
+ },
301
+ {
302
+ "epoch": 8.33,
303
+ "eval_accuracy": 0.7916666865348816,
304
+ "eval_loss": 0.6880165934562683,
305
+ "eval_runtime": 12.1412,
306
+ "eval_samples_per_second": 3.953,
307
+ "eval_steps_per_second": 0.988,
308
+ "step": 200
309
+ },
310
+ {
311
+ "epoch": 8.75,
312
+ "learning_rate": 0.0,
313
+ "loss": 0.0073,
314
+ "step": 210
315
+ },
316
+ {
317
+ "epoch": 8.75,
318
+ "eval_accuracy": 0.7916666865348816,
319
+ "eval_loss": 0.6880165934562683,
320
+ "eval_runtime": 9.9345,
321
+ "eval_samples_per_second": 4.832,
322
+ "eval_steps_per_second": 1.208,
323
+ "step": 210
324
+ },
325
+ {
326
+ "epoch": 9.17,
327
+ "learning_rate": 0.0,
328
+ "loss": 0.0153,
329
+ "step": 220
330
+ },
331
+ {
332
+ "epoch": 9.17,
333
+ "eval_accuracy": 0.7916666865348816,
334
+ "eval_loss": 0.6880165934562683,
335
+ "eval_runtime": 10.0673,
336
+ "eval_samples_per_second": 4.768,
337
+ "eval_steps_per_second": 1.192,
338
+ "step": 220
339
+ },
340
+ {
341
+ "epoch": 9.58,
342
+ "learning_rate": 0.0,
343
+ "loss": 0.0093,
344
+ "step": 230
345
+ },
346
+ {
347
+ "epoch": 9.58,
348
+ "eval_accuracy": 0.7916666865348816,
349
+ "eval_loss": 0.6880165934562683,
350
+ "eval_runtime": 9.9352,
351
+ "eval_samples_per_second": 4.831,
352
+ "eval_steps_per_second": 1.208,
353
+ "step": 230
354
+ },
355
+ {
356
+ "epoch": 10.0,
357
+ "learning_rate": 0.0,
358
+ "loss": 0.0575,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 10.0,
363
+ "eval_accuracy": 0.7916666865348816,
364
+ "eval_loss": 0.6880165934562683,
365
+ "eval_runtime": 10.1458,
366
+ "eval_samples_per_second": 4.731,
367
+ "eval_steps_per_second": 1.183,
368
+ "step": 240
369
  }
370
  ],
371
+ "max_steps": 240,
372
+ "num_train_epochs": 10,
373
+ "total_flos": 6.925070959567395e+17,
374
  "trial_name": null,
375
  "trial_params": null
376
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90d8fff125c55df8bbccce0f33f984aa2f3df275113e369eee64f76ffbb1d639
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b12271e563c64702e42089024d71119a9e970eda69d0d4cc86be8336fccf95f6
3
  size 3439