csikasote commited on
Commit
e07b84e
·
verified ·
1 Parent(s): 558205b

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -3
  2. all_results.json +15 -0
  3. eval_results.json +9 -0
  4. train_results.json +9 -0
  5. trainer_state.json +259 -0
README.md CHANGED
@@ -3,6 +3,8 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-xls-r-1b
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,10 +18,10 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # xls-r-1b-bem-natbed-non-native-model
18
 
19
- This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.6899
22
- - Wer: 0.6880
23
 
24
  ## Model description
25
 
 
3
  license: apache-2.0
4
  base_model: facebook/wav2vec2-xls-r-1b
5
  tags:
6
+ - automatic-speech-recognition
7
+ - natbed
8
  - generated_from_trainer
9
  metrics:
10
  - wer
 
18
 
19
  # xls-r-1b-bem-natbed-non-native-model
20
 
21
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on the NATBED - BEM dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.6818
24
+ - Wer: 0.7142
25
 
26
  ## Model description
27
 
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.8690744920993225,
3
+ "eval_loss": 0.6817988753318787,
4
+ "eval_runtime": 44.1522,
5
+ "eval_samples": 708,
6
+ "eval_samples_per_second": 16.035,
7
+ "eval_steps_per_second": 2.016,
8
+ "eval_wer": 0.7141644021739131,
9
+ "total_flos": 1.2230713692520196e+19,
10
+ "train_loss": 1.001419219970703,
11
+ "train_runtime": 2976.6499,
12
+ "train_samples": 3542,
13
+ "train_samples_per_second": 35.698,
14
+ "train_steps_per_second": 2.227
15
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.8690744920993225,
3
+ "eval_loss": 0.6817988753318787,
4
+ "eval_runtime": 44.1522,
5
+ "eval_samples": 708,
6
+ "eval_samples_per_second": 16.035,
7
+ "eval_steps_per_second": 2.016,
8
+ "eval_wer": 0.7141644021739131
9
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 5.8690744920993225,
3
+ "total_flos": 1.2230713692520196e+19,
4
+ "train_loss": 1.001419219970703,
5
+ "train_runtime": 2976.6499,
6
+ "train_samples": 3542,
7
+ "train_samples_per_second": 35.698,
8
+ "train_steps_per_second": 2.227
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6817988753318787,
3
+ "best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-natbed-non-native-model/checkpoint-1000",
4
+ "epoch": 5.8690744920993225,
5
+ "eval_steps": 100,
6
+ "global_step": 1300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.45146726862302483,
13
+ "grad_norm": 2.7790069580078125,
14
+ "learning_rate": 0.00029099999999999997,
15
+ "loss": 3.8975,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.45146726862302483,
20
+ "eval_loss": 1.1342732906341553,
21
+ "eval_runtime": 44.5102,
22
+ "eval_samples_per_second": 15.906,
23
+ "eval_steps_per_second": 2.0,
24
+ "eval_wer": 0.9315557065217391,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.9029345372460497,
29
+ "grad_norm": 1.7560482025146484,
30
+ "learning_rate": 0.00029554364471669214,
31
+ "loss": 1.0247,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.9029345372460497,
36
+ "eval_loss": 0.9810584187507629,
37
+ "eval_runtime": 43.9801,
38
+ "eval_samples_per_second": 16.098,
39
+ "eval_steps_per_second": 2.024,
40
+ "eval_wer": 0.8826426630434783,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 1.3544018058690745,
45
+ "grad_norm": 1.5388858318328857,
46
+ "learning_rate": 0.00029094946401225113,
47
+ "loss": 0.9631,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 1.3544018058690745,
52
+ "eval_loss": 0.810827374458313,
53
+ "eval_runtime": 43.7967,
54
+ "eval_samples_per_second": 16.166,
55
+ "eval_steps_per_second": 2.032,
56
+ "eval_wer": 0.7523777173913043,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 1.8058690744920993,
61
+ "grad_norm": 2.79054856300354,
62
+ "learning_rate": 0.00028635528330781006,
63
+ "loss": 0.8711,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 1.8058690744920993,
68
+ "eval_loss": 0.7748640775680542,
69
+ "eval_runtime": 43.9031,
70
+ "eval_samples_per_second": 16.126,
71
+ "eval_steps_per_second": 2.027,
72
+ "eval_wer": 0.7637567934782609,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 2.2573363431151243,
77
+ "grad_norm": 2.239819288253784,
78
+ "learning_rate": 0.00028176110260336905,
79
+ "loss": 0.7935,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 2.2573363431151243,
84
+ "eval_loss": 0.8008168339729309,
85
+ "eval_runtime": 44.1964,
86
+ "eval_samples_per_second": 16.019,
87
+ "eval_steps_per_second": 2.014,
88
+ "eval_wer": 0.8231997282608695,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 2.708803611738149,
93
+ "grad_norm": 1.130868673324585,
94
+ "learning_rate": 0.000277166921898928,
95
+ "loss": 0.7869,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 2.708803611738149,
100
+ "eval_loss": 0.7333548069000244,
101
+ "eval_runtime": 44.3134,
102
+ "eval_samples_per_second": 15.977,
103
+ "eval_steps_per_second": 2.008,
104
+ "eval_wer": 0.7262228260869565,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 3.160270880361174,
109
+ "grad_norm": 1.1080917119979858,
110
+ "learning_rate": 0.000272572741194487,
111
+ "loss": 0.7265,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 3.160270880361174,
116
+ "eval_loss": 0.7238579988479614,
117
+ "eval_runtime": 44.0849,
118
+ "eval_samples_per_second": 16.06,
119
+ "eval_steps_per_second": 2.019,
120
+ "eval_wer": 0.6949728260869565,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 3.6117381489841986,
125
+ "grad_norm": 1.219346523284912,
126
+ "learning_rate": 0.0002679785604900459,
127
+ "loss": 0.7011,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 3.6117381489841986,
132
+ "eval_loss": 0.70769864320755,
133
+ "eval_runtime": 44.0752,
134
+ "eval_samples_per_second": 16.063,
135
+ "eval_steps_per_second": 2.019,
136
+ "eval_wer": 0.6807065217391305,
137
+ "step": 800
138
+ },
139
+ {
140
+ "epoch": 4.063205417607223,
141
+ "grad_norm": 0.6558771729469299,
142
+ "learning_rate": 0.00026338437978560485,
143
+ "loss": 0.7343,
144
+ "step": 900
145
+ },
146
+ {
147
+ "epoch": 4.063205417607223,
148
+ "eval_loss": 0.6984588503837585,
149
+ "eval_runtime": 44.1425,
150
+ "eval_samples_per_second": 16.039,
151
+ "eval_steps_per_second": 2.016,
152
+ "eval_wer": 0.69140625,
153
+ "step": 900
154
+ },
155
+ {
156
+ "epoch": 4.514672686230249,
157
+ "grad_norm": 1.1139057874679565,
158
+ "learning_rate": 0.00025879019908116383,
159
+ "loss": 0.6624,
160
+ "step": 1000
161
+ },
162
+ {
163
+ "epoch": 4.514672686230249,
164
+ "eval_loss": 0.6817988753318787,
165
+ "eval_runtime": 44.1617,
166
+ "eval_samples_per_second": 16.032,
167
+ "eval_steps_per_second": 2.015,
168
+ "eval_wer": 0.7141644021739131,
169
+ "step": 1000
170
+ },
171
+ {
172
+ "epoch": 4.966139954853273,
173
+ "grad_norm": 0.7626570463180542,
174
+ "learning_rate": 0.0002541960183767228,
175
+ "loss": 0.6642,
176
+ "step": 1100
177
+ },
178
+ {
179
+ "epoch": 4.966139954853273,
180
+ "eval_loss": 0.7096632122993469,
181
+ "eval_runtime": 44.5476,
182
+ "eval_samples_per_second": 15.893,
183
+ "eval_steps_per_second": 1.998,
184
+ "eval_wer": 0.6968410326086957,
185
+ "step": 1100
186
+ },
187
+ {
188
+ "epoch": 5.417607223476298,
189
+ "grad_norm": 0.8168011903762817,
190
+ "learning_rate": 0.00024960183767228176,
191
+ "loss": 0.6064,
192
+ "step": 1200
193
+ },
194
+ {
195
+ "epoch": 5.417607223476298,
196
+ "eval_loss": 0.7194671630859375,
197
+ "eval_runtime": 44.5485,
198
+ "eval_samples_per_second": 15.893,
199
+ "eval_steps_per_second": 1.998,
200
+ "eval_wer": 0.6856317934782609,
201
+ "step": 1200
202
+ },
203
+ {
204
+ "epoch": 5.8690744920993225,
205
+ "grad_norm": 1.018847942352295,
206
+ "learning_rate": 0.0002450076569678407,
207
+ "loss": 0.5867,
208
+ "step": 1300
209
+ },
210
+ {
211
+ "epoch": 5.8690744920993225,
212
+ "eval_loss": 0.6899004578590393,
213
+ "eval_runtime": 43.9975,
214
+ "eval_samples_per_second": 16.092,
215
+ "eval_steps_per_second": 2.023,
216
+ "eval_wer": 0.6880095108695652,
217
+ "step": 1300
218
+ },
219
+ {
220
+ "epoch": 5.8690744920993225,
221
+ "step": 1300,
222
+ "total_flos": 1.2230713692520196e+19,
223
+ "train_loss": 1.001419219970703,
224
+ "train_runtime": 2976.6499,
225
+ "train_samples_per_second": 35.698,
226
+ "train_steps_per_second": 2.227
227
+ }
228
+ ],
229
+ "logging_steps": 100,
230
+ "max_steps": 6630,
231
+ "num_input_tokens_seen": 0,
232
+ "num_train_epochs": 30,
233
+ "save_steps": 200,
234
+ "stateful_callbacks": {
235
+ "EarlyStoppingCallback": {
236
+ "args": {
237
+ "early_stopping_patience": 3,
238
+ "early_stopping_threshold": 0.0
239
+ },
240
+ "attributes": {
241
+ "early_stopping_patience_counter": 2
242
+ }
243
+ },
244
+ "TrainerControl": {
245
+ "args": {
246
+ "should_epoch_stop": false,
247
+ "should_evaluate": false,
248
+ "should_log": false,
249
+ "should_save": true,
250
+ "should_training_stop": false
251
+ },
252
+ "attributes": {}
253
+ }
254
+ },
255
+ "total_flos": 1.2230713692520196e+19,
256
+ "train_batch_size": 8,
257
+ "trial_name": null,
258
+ "trial_params": null
259
+ }