Parsavares commited on
Commit
65439b1
·
verified ·
1 Parent(s): 0ab21d9

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -2,34 +2,26 @@
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
5
  *.ftz filter=lfs diff=lfs merge=lfs -text
6
  *.gz filter=lfs diff=lfs merge=lfs -text
7
  *.h5 filter=lfs diff=lfs merge=lfs -text
8
  *.joblib filter=lfs diff=lfs merge=lfs -text
9
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
10
  *.model filter=lfs diff=lfs merge=lfs -text
11
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
12
  *.onnx filter=lfs diff=lfs merge=lfs -text
13
  *.ot filter=lfs diff=lfs merge=lfs -text
14
  *.parquet filter=lfs diff=lfs merge=lfs -text
15
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
16
  *.pt filter=lfs diff=lfs merge=lfs -text
17
  *.pth filter=lfs diff=lfs merge=lfs -text
18
  *.rar filter=lfs diff=lfs merge=lfs -text
 
19
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
21
  *.tflite filter=lfs diff=lfs merge=lfs -text
22
  *.tgz filter=lfs diff=lfs merge=lfs -text
23
  *.wasm filter=lfs diff=lfs merge=lfs -text
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
Lemswasabi_tuudle_rtl-benchmark_test_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.2309204061578775
2
+ CER: 0.07627700596904548
Lemswasabi_tuudle_rtl-benchmark_validation_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.2394678492239468
2
+ CER: 0.07967653704364372
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - automatic-speech-recognition
4
+ - generated_from_trainer
5
+ license: mit
6
+ language:
7
+ - lb
8
+ metrics:
9
+ - wer
10
+ pipeline_tag: automatic-speech-recognition
11
+
12
+ model-index:
13
+ - name: Lemswasabi/wav2vec2-base-luxembourgish-4h
14
+ results:
15
+ - task:
16
+ type: automatic-speech-recognition # Required. Example: automatic-speech-recognition
17
+ name: Speech Recognition # Optional. Example: Speech Recognition
18
+ metrics:
19
+ - type: wer
20
+ value: 23.95
21
+ name: Dev WER
22
+ - type: wer
23
+ value: 23.09
24
+ name: Test WER
25
+ - type: cer
26
+ value: 7.97
27
+ name: Dev CER
28
+ - type: cer
29
+ value: 7.63
30
+ name: Test CER
31
+ ---
32
+
33
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
34
+ should probably proofread and complete it, then remove this comment. -->
35
+
36
+ #
37
+
38
+ ## Model description
39
+
40
+ We pre-trained a wav2vec 2.0 base model on 842h of unlabelled Luxembourgish speech
41
+ collected from [RTL.lu](https://www.rtl.lu/). Then the model was fine-tuned on 4h of labelled
42
+ Luxembourgish Speech from the same domain.
43
+
44
+ ## Intended uses & limitations
45
+
46
+ More information needed
47
+
48
+ ## Training and evaluation data
49
+
50
+ More information needed
51
+
52
+ ## Training procedure
53
+
54
+ ### Training hyperparameters
55
+
56
+ The following hyperparameters were used during training:
57
+ - learning_rate: 7.5e-05
58
+ - train_batch_size: 3
59
+ - eval_batch_size: 3
60
+ - seed: 42
61
+ - gradient_accumulation_steps: 4
62
+ - total_train_batch_size: 12
63
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
64
+ - lr_scheduler_type: linear
65
+ - lr_scheduler_warmup_steps: 2000
66
+ - num_epochs: 50.0
67
+ - mixed_precision_training: Native AMP
68
+
69
+ ### Framework versions
70
+
71
+ - Transformers 4.20.0.dev0
72
+ - Pytorch 1.11.0+cu113
73
+ - Datasets 2.2.1
74
+ - Tokenizers 0.12.1
75
+
76
+ ## Citation
77
+
78
+ This model is a result of our paper `IMPROVING LUXEMBOURGISH SPEECH RECOGNITION WITH CROSS-LINGUAL SPEECH REPRESENTATIONS` submitted to the [IEEE SLT 2022 workshop](https://slt2022.org/)
79
+
80
+ ```
81
+ @misc{lb-wav2vec2,
82
+ author = {Nguyen, Le Minh and Nayak, Shekhar and Coler, Matt.},
83
+ keywords = {Luxembourgish, multilingual speech recognition, language modelling, wav2vec 2.0 XLSR-53, under-resourced language},
84
+ title = {IMPROVING LUXEMBOURGISH SPEECH RECOGNITION WITH CROSS-LINGUAL SPEECH REPRESENTATIONS},
85
+ year = {2022},
86
+ copyright = {2023 IEEE}
87
+ }
88
+ ```
config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_kernel_size": 3,
4
+ "adapter_stride": 2,
5
+ "add_adapter": false,
6
+ "apply_spec_augment": true,
7
+ "architectures": [
8
+ "Wav2Vec2ForCTC"
9
+ ],
10
+ "attention_dropout": 0.1,
11
+ "bos_token_id": 1,
12
+ "classifier_proj_size": 256,
13
+ "codevector_dim": 256,
14
+ "contrastive_logits_temperature": 0.1,
15
+ "conv_bias": false,
16
+ "conv_dim": [
17
+ 512,
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512
24
+ ],
25
+ "conv_kernel": [
26
+ 10,
27
+ 3,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 2,
32
+ 2
33
+ ],
34
+ "conv_stride": [
35
+ 5,
36
+ 2,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2
42
+ ],
43
+ "ctc_loss_reduction": "sum",
44
+ "ctc_zero_infinity": false,
45
+ "diversity_loss_weight": 0.1,
46
+ "do_stable_layer_norm": false,
47
+ "eos_token_id": 2,
48
+ "feat_extract_activation": "gelu",
49
+ "feat_extract_norm": "group",
50
+ "feat_proj_dropout": 0.1,
51
+ "feat_quantizer_dropout": 0.0,
52
+ "final_dropout": 0.0,
53
+ "freeze_feat_extract_train": true,
54
+ "hidden_act": "gelu",
55
+ "hidden_dropout": 0.1,
56
+ "hidden_size": 768,
57
+ "initializer_range": 0.02,
58
+ "intermediate_size": 3072,
59
+ "layer_norm_eps": 1e-05,
60
+ "layerdrop": 0.0,
61
+ "mask_channel_length": 10,
62
+ "mask_channel_min_space": 1,
63
+ "mask_channel_other": 0.0,
64
+ "mask_channel_prob": 0.0,
65
+ "mask_channel_selection": "static",
66
+ "mask_feature_length": 10,
67
+ "mask_feature_min_masks": 0,
68
+ "mask_feature_prob": 0.0,
69
+ "mask_time_length": 10,
70
+ "mask_time_min_masks": 2,
71
+ "mask_time_min_space": 1,
72
+ "mask_time_other": 0.0,
73
+ "mask_time_prob": 0.05,
74
+ "mask_time_selection": "static",
75
+ "model_type": "wav2vec2",
76
+ "no_mask_channel_overlap": false,
77
+ "no_mask_time_overlap": false,
78
+ "num_adapter_layers": 3,
79
+ "num_attention_heads": 12,
80
+ "num_codevector_groups": 2,
81
+ "num_codevectors_per_group": 320,
82
+ "num_conv_pos_embedding_groups": 16,
83
+ "num_conv_pos_embeddings": 128,
84
+ "num_feat_extract_layers": 7,
85
+ "num_hidden_layers": 12,
86
+ "num_negatives": 100,
87
+ "output_hidden_size": 768,
88
+ "pad_token_id": 0,
89
+ "proj_codevector_dim": 256,
90
+ "tdnn_dilation": [
91
+ 1,
92
+ 2,
93
+ 3,
94
+ 1,
95
+ 1
96
+ ],
97
+ "tdnn_dim": [
98
+ 512,
99
+ 512,
100
+ 512,
101
+ 512,
102
+ 1500
103
+ ],
104
+ "tdnn_kernel": [
105
+ 5,
106
+ 3,
107
+ 3,
108
+ 1,
109
+ 1
110
+ ],
111
+ "torch_dtype": "float32",
112
+ "transformers_version": "4.19.2",
113
+ "use_weighted_layer_sum": false,
114
+ "vocab_size": 41,
115
+ "xvector_output_dim": 512
116
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": false,
9
+ "sampling_rate": 16000
10
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:179049b09966ea61956546865bf33b8d187f397ac91d990c8dccd7fce9157b84
3
+ size 134
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "tokenizer_class": "Wav2Vec2CTCTokenizer", "processor_class": "Wav2Vec2Processor"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 1, "<pad>": 0, "</s>": 2, "<unk>": 3, "|": 4, "e": 5, "n": 6, "r": 7, "t": 8, "a": 9, "s": 10, "i": 11, "d": 12, "l": 13, "o": 14, "h": 15, "u": 16, "m": 17, "c": 18, "g": 19, "w": 20, "é": 21, "f": 22, "k": 23, "p": 24, "z": 25, "b": 26, "v": 27, "ë": 28, "ä": 29, "j": 30, "'": 31, "y": 32, "x": 33, "q": 34, "ü": 35, "è": 36, "à": 37, "ö": 38, "ô": 39, "ê": 40}