Commit
·
39a3709
1
Parent(s):
c0f29f0
Upload 8 files
Browse files- asr.ckpt +3 -0
- asr_hyperparams.yaml +303 -0
- lm.ckpt +3 -0
- lm_hyperparams.yaml +117 -0
- normalizer.ckpt +3 -0
- tokenizer.model +3 -0
- tokenizer.vocab +1000 -0
- tokenizer_hyperparams.yaml +43 -0
asr.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ad7818baa2fdae3e171d17f13a3dab3dcc2279a111d779c6c07b1756995bb83
|
3 |
+
size 479556617
|
asr_hyperparams.yaml
ADDED
@@ -0,0 +1,303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Generated 2022-10-03 from:
|
2 |
+
# /netscratch/sagar/thesis/speechbrain/recipes/CommonVoice_de/ASR-Libri/seq2seq/hparams/train.yaml
|
3 |
+
# yamllint disable
|
4 |
+
# ############################################################################
|
5 |
+
# Model: E2E ASR with attention-based ASR
|
6 |
+
# Encoder: CRDNN model
|
7 |
+
# Decoder: GRU + beamsearch + RNNLM
|
8 |
+
# Tokens: BPE with unigram
|
9 |
+
# losses: CTC+ NLL
|
10 |
+
# Training: Librispeech 960h
|
11 |
+
# Authors: Ju-Chieh Chou, Mirco Ravanelli, Abdel Heba, Peter Plantinga,
|
12 |
+
# Samuele Cornell 2020
|
13 |
+
# ############################################################################
|
14 |
+
|
15 |
+
# Seed needs to be set at top of yaml, before objects with parameters
|
16 |
+
seed: 1200
|
17 |
+
__set_seed: !apply:torch.manual_seed [1200]
|
18 |
+
output_folder: results/CRDNN_BPE_960h_LM/1200
|
19 |
+
wer_file: results/CRDNN_BPE_960h_LM/1200/wer.txt
|
20 |
+
save_folder: results/CRDNN_BPE_960h_LM/1200/save
|
21 |
+
train_log: results/CRDNN_BPE_960h_LM/1200/train_log.txt
|
22 |
+
|
23 |
+
# Language model (LM) pretraining
|
24 |
+
# NB: To avoid mismatch, the speech recognizer must be trained with the same
|
25 |
+
# tokenizer used for LM training. Here, we download everything from the
|
26 |
+
# speechbrain HuggingFace repository. However, a local path pointing to a
|
27 |
+
# directory containing the lm.ckpt and tokenizer.ckpt may also be specified
|
28 |
+
# instead. E.g if you want to use your own LM / tokenizer.
|
29 |
+
# We have bos/eos id 0/0 so we use the same tokenizer and LM that uses bos id and eos id as 0/0.
|
30 |
+
pretrained_tokenizer_path: ../../Tokenizer/results/unigram/
|
31 |
+
pretrained_lm_path: ../../LM/results/RNN/2995/save/CKPT+2022-08-18+18-22-18+00
|
32 |
+
|
33 |
+
# Data files
|
34 |
+
data_folder: ../../CommonVoice # !PLACEHOLDER
|
35 |
+
# e,g./path/to/LibriSpeech
|
36 |
+
# noise/ris dataset will automatically be downloaded
|
37 |
+
|
38 |
+
# Data files
|
39 |
+
train_tsv_file: ../../CommonVoice/train.tsv # Standard CommonVoice .tsv files
|
40 |
+
dev_tsv_file: ../../CommonVoice/dev.tsv # Standard CommonVoice .tsv files
|
41 |
+
test_tsv_file: ../../CommonVoice/test.tsv # Standard CommonVoice .tsv files
|
42 |
+
accented_letters: true
|
43 |
+
language: de
|
44 |
+
ckpt_interval_minutes: 15 # save checkpoint every N min
|
45 |
+
csv_dir: ../../cv_de_acc
|
46 |
+
data_folder_rirs: ../../cv_de_acc # where to store noisy data for augment (change it if needed)
|
47 |
+
train_csv: ../../cv_de_acc/train.csv
|
48 |
+
valid_csv: ../../cv_de_acc/dev.csv
|
49 |
+
test_csv: ../../cv_de_acc/test.csv
|
50 |
+
skip_prep: false
|
51 |
+
|
52 |
+
# Training parameters
|
53 |
+
number_of_epochs: 25
|
54 |
+
number_of_ctc_epochs: 5
|
55 |
+
batch_size: 8
|
56 |
+
valid_batch_size: 8
|
57 |
+
test_batch_size: 8
|
58 |
+
lr: 1.0
|
59 |
+
ctc_weight: 0.5
|
60 |
+
sorting: ascending
|
61 |
+
dynamic_batching: false
|
62 |
+
|
63 |
+
# dynamic batching parameters, if used
|
64 |
+
dynamic_batch_sampler:
|
65 |
+
feats_hop_size: 0.01
|
66 |
+
max_batch_len: 20000 # in terms of frames
|
67 |
+
shuffle_ex: true
|
68 |
+
batch_ordering: random
|
69 |
+
num_buckets: 20
|
70 |
+
|
71 |
+
# Feature parameters
|
72 |
+
sample_rate: 16000
|
73 |
+
n_fft: 400
|
74 |
+
n_mels: 40
|
75 |
+
|
76 |
+
opt_class: !name:torch.optim.Adadelta
|
77 |
+
lr: 1.0
|
78 |
+
rho: 0.95
|
79 |
+
eps: 1.e-8
|
80 |
+
|
81 |
+
# Dataloader options
|
82 |
+
train_dataloader_opts:
|
83 |
+
batch_size: 8
|
84 |
+
|
85 |
+
valid_dataloader_opts:
|
86 |
+
batch_size: 8
|
87 |
+
|
88 |
+
test_dataloader_opts:
|
89 |
+
batch_size: 8
|
90 |
+
|
91 |
+
# Model parameters
|
92 |
+
activation: &id001 !name:torch.nn.LeakyReLU
|
93 |
+
dropout: 0.15
|
94 |
+
cnn_blocks: 2
|
95 |
+
cnn_channels: (128, 256)
|
96 |
+
inter_layer_pooling_size: (2, 2)
|
97 |
+
cnn_kernelsize: (3, 3)
|
98 |
+
time_pooling_size: 4
|
99 |
+
rnn_class: &id002 !name:speechbrain.nnet.RNN.LSTM
|
100 |
+
rnn_layers: 4
|
101 |
+
rnn_neurons: 1024
|
102 |
+
rnn_bidirectional: true
|
103 |
+
dnn_blocks: 2
|
104 |
+
dnn_neurons: 512
|
105 |
+
emb_size: 128
|
106 |
+
dec_neurons: 1024
|
107 |
+
output_neurons: 1000 # Number of tokens (same as LM)
|
108 |
+
blank_index: 0
|
109 |
+
bos_index: 0
|
110 |
+
eos_index: 0
|
111 |
+
|
112 |
+
# Decoding parameters
|
113 |
+
min_decode_ratio: 0.0
|
114 |
+
max_decode_ratio: 1.0
|
115 |
+
valid_beam_size: 80
|
116 |
+
test_beam_size: 80
|
117 |
+
eos_threshold: 1.5
|
118 |
+
using_max_attn_shift: true
|
119 |
+
max_attn_shift: 240
|
120 |
+
lm_weight: 0.50
|
121 |
+
ctc_weight_decode: 0.0
|
122 |
+
coverage_penalty: 1.5
|
123 |
+
temperature: 1.25
|
124 |
+
temperature_lm: 1.25
|
125 |
+
|
126 |
+
epoch_counter: &id013 !new:speechbrain.utils.epoch_loop.EpochCounter
|
127 |
+
|
128 |
+
limit: 25
|
129 |
+
|
130 |
+
normalize: &id008 !new:speechbrain.processing.features.InputNormalization
|
131 |
+
norm_type: global
|
132 |
+
|
133 |
+
compute_features: !new:speechbrain.lobes.features.Fbank
|
134 |
+
sample_rate: 16000
|
135 |
+
n_fft: 400
|
136 |
+
n_mels: 40
|
137 |
+
|
138 |
+
env_corrupt: &id009 !new:speechbrain.lobes.augment.EnvCorrupt
|
139 |
+
openrir_folder: ../../cv_de_acc
|
140 |
+
babble_prob: 0.0
|
141 |
+
reverb_prob: 0.0
|
142 |
+
noise_prob: 1.0
|
143 |
+
noise_snr_low: 0
|
144 |
+
noise_snr_high: 15
|
145 |
+
|
146 |
+
augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
|
147 |
+
sample_rate: 16000
|
148 |
+
speeds: [95, 100, 105]
|
149 |
+
|
150 |
+
enc: &id003 !new:speechbrain.lobes.models.CRDNN.CRDNN
|
151 |
+
input_shape: [null, null, 40]
|
152 |
+
activation: *id001
|
153 |
+
dropout: 0.15
|
154 |
+
cnn_blocks: 2
|
155 |
+
cnn_channels: (128, 256)
|
156 |
+
cnn_kernelsize: (3, 3)
|
157 |
+
inter_layer_pooling_size: (2, 2)
|
158 |
+
time_pooling: true
|
159 |
+
using_2d_pooling: false
|
160 |
+
time_pooling_size: 4
|
161 |
+
rnn_class: *id002
|
162 |
+
rnn_layers: 4
|
163 |
+
rnn_neurons: 1024
|
164 |
+
rnn_bidirectional: true
|
165 |
+
rnn_re_init: true
|
166 |
+
dnn_blocks: 2
|
167 |
+
dnn_neurons: 512
|
168 |
+
use_rnnp: false
|
169 |
+
|
170 |
+
emb: &id004 !new:speechbrain.nnet.embedding.Embedding
|
171 |
+
num_embeddings: 1000
|
172 |
+
embedding_dim: 128
|
173 |
+
|
174 |
+
dec: &id005 !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
|
175 |
+
enc_dim: 512
|
176 |
+
input_size: 128
|
177 |
+
rnn_type: gru
|
178 |
+
attn_type: location
|
179 |
+
hidden_size: 1024
|
180 |
+
attn_dim: 1024
|
181 |
+
num_layers: 1
|
182 |
+
scaling: 1.0
|
183 |
+
channels: 10
|
184 |
+
kernel_size: 100
|
185 |
+
re_init: true
|
186 |
+
dropout: 0.15
|
187 |
+
|
188 |
+
ctc_lin: &id006 !new:speechbrain.nnet.linear.Linear
|
189 |
+
input_size: 512
|
190 |
+
n_neurons: 1000
|
191 |
+
|
192 |
+
seq_lin: &id007 !new:speechbrain.nnet.linear.Linear
|
193 |
+
input_size: 1024
|
194 |
+
n_neurons: 1000
|
195 |
+
|
196 |
+
log_softmax: !new:speechbrain.nnet.activations.Softmax
|
197 |
+
apply_log: true
|
198 |
+
|
199 |
+
ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
|
200 |
+
blank_index: 0
|
201 |
+
|
202 |
+
seq_cost: !name:speechbrain.nnet.losses.nll_loss
|
203 |
+
label_smoothing: 0.1
|
204 |
+
|
205 |
+
# This is the RNNLM that is used according to the Huggingface repository
|
206 |
+
# NB: It has to match the pre-trained RNNLM!!
|
207 |
+
lm_model: &id010 !new:speechbrain.lobes.models.RNNLM.RNNLM
|
208 |
+
|
209 |
+
output_neurons: 1000
|
210 |
+
embedding_dim: 128
|
211 |
+
activation: !name:torch.nn.LeakyReLU
|
212 |
+
dropout: 0.0
|
213 |
+
rnn_layers: 2
|
214 |
+
rnn_neurons: 2048
|
215 |
+
dnn_blocks: 1
|
216 |
+
dnn_neurons: 512
|
217 |
+
return_hidden: true # For inference
|
218 |
+
|
219 |
+
tokenizer: &id014 !new:sentencepiece.SentencePieceProcessor
|
220 |
+
# Models
|
221 |
+
|
222 |
+
modules:
|
223 |
+
enc: *id003
|
224 |
+
emb: *id004
|
225 |
+
dec: *id005
|
226 |
+
ctc_lin: *id006
|
227 |
+
seq_lin: *id007
|
228 |
+
normalize: *id008
|
229 |
+
env_corrupt: *id009
|
230 |
+
lm_model: *id010
|
231 |
+
model: &id011 !new:torch.nn.ModuleList
|
232 |
+
- [*id003, *id004, *id005, *id006, *id007]
|
233 |
+
valid_search: !new:speechbrain.decoders.S2SRNNBeamSearcher
|
234 |
+
embedding: *id004
|
235 |
+
decoder: *id005
|
236 |
+
linear: *id007
|
237 |
+
ctc_linear: *id006
|
238 |
+
bos_index: 0
|
239 |
+
eos_index: 0
|
240 |
+
blank_index: 0
|
241 |
+
min_decode_ratio: 0.0
|
242 |
+
max_decode_ratio: 1.0
|
243 |
+
beam_size: 80
|
244 |
+
eos_threshold: 1.5
|
245 |
+
using_max_attn_shift: true
|
246 |
+
max_attn_shift: 240
|
247 |
+
coverage_penalty: 1.5
|
248 |
+
temperature: 1.25
|
249 |
+
|
250 |
+
test_search: !new:speechbrain.decoders.S2SRNNBeamSearchLM
|
251 |
+
embedding: *id004
|
252 |
+
decoder: *id005
|
253 |
+
linear: *id007
|
254 |
+
ctc_linear: *id006
|
255 |
+
language_model: *id010
|
256 |
+
bos_index: 0
|
257 |
+
eos_index: 0
|
258 |
+
blank_index: 0
|
259 |
+
min_decode_ratio: 0.0
|
260 |
+
max_decode_ratio: 1.0
|
261 |
+
beam_size: 80
|
262 |
+
eos_threshold: 1.5
|
263 |
+
using_max_attn_shift: true
|
264 |
+
max_attn_shift: 240
|
265 |
+
coverage_penalty: 1.5
|
266 |
+
lm_weight: 0.50
|
267 |
+
ctc_weight: 0.0
|
268 |
+
temperature: 1.25
|
269 |
+
temperature_lm: 1.25
|
270 |
+
|
271 |
+
lr_annealing: &id012 !new:speechbrain.nnet.schedulers.NewBobScheduler
|
272 |
+
initial_value: 1.0
|
273 |
+
improvement_threshold: 0.0025
|
274 |
+
annealing_factor: 0.8
|
275 |
+
patient: 0
|
276 |
+
|
277 |
+
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
|
278 |
+
checkpoints_dir: results/CRDNN_BPE_960h_LM/1200/save
|
279 |
+
recoverables:
|
280 |
+
model: *id011
|
281 |
+
scheduler: *id012
|
282 |
+
normalizer: *id008
|
283 |
+
counter: *id013
|
284 |
+
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
|
285 |
+
save_file: results/CRDNN_BPE_960h_LM/1200/train_log.txt
|
286 |
+
|
287 |
+
error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
|
288 |
+
|
289 |
+
cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
|
290 |
+
split_tokens: true
|
291 |
+
|
292 |
+
# The pretrainer allows a mapping between pretrained files and instances that
|
293 |
+
# are declared in the yaml. E.g here, we will download the file lm.ckpt
|
294 |
+
# and it will be loaded into "lm" which is pointing to the <lm_model> defined
|
295 |
+
# before.
|
296 |
+
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
297 |
+
collect_in: results/CRDNN_BPE_960h_LM/1200/save
|
298 |
+
loadables:
|
299 |
+
lm: *id010
|
300 |
+
tokenizer: *id014
|
301 |
+
paths:
|
302 |
+
lm: ../../LM/results/RNN/2995/save/CKPT+2022-08-18+18-22-18+00/model.ckpt
|
303 |
+
tokenizer: ../../Tokenizer/results/unigram//1000_unigram.model
|
lm.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6afb5f689c30562cbe78046a641c5ff2c0184f30de5124e29cbf7ba4ecc34e4
|
3 |
+
size 212419663
|
lm_hyperparams.yaml
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Generated 2022-08-17 from:
|
2 |
+
# /netscratch/sagar/thesis/speechbrain/recipes/CommonVoice_de/LM/hparams/RNNLM.yaml
|
3 |
+
# yamllint disable
|
4 |
+
# ############################################################################
|
5 |
+
# Model: RNNLM of E2E ASR
|
6 |
+
# Tokens: unigram/char
|
7 |
+
# losses: NLL
|
8 |
+
# Training: Librispeech 960h transcript + LM corpus
|
9 |
+
# Authors: Ju-Chieh Chou 2020, Jianyuan Zhong 2021
|
10 |
+
# ############################################################################
|
11 |
+
|
12 |
+
# Seed needs to be set at top of yaml, before objects with parameters are made
|
13 |
+
seed: 2995
|
14 |
+
__set_seed: !apply:torch.manual_seed [2995]
|
15 |
+
output_folder: results/RNN/2995
|
16 |
+
save_folder: results/RNN/2995/save
|
17 |
+
train_log: results/RNN/2995/train_log.txt
|
18 |
+
|
19 |
+
# Data files
|
20 |
+
# The data_folder is needed because we train the LM on the training
|
21 |
+
# transcriptions of LibriSpeech as well.
|
22 |
+
data_folder: ../cv_de_acc/
|
23 |
+
|
24 |
+
# path to the lm_corpus
|
25 |
+
# if set to null, it will automatically download from the internet
|
26 |
+
# in the case when there is no internet access, set this to your local file
|
27 |
+
lm_corpus_path: lm_corpus/orignal_data/de_lm_normalized.txt # 17M sentences
|
28 |
+
|
29 |
+
|
30 |
+
# Tokenizer model
|
31 |
+
tokenizer_file: ../Tokenizer/results/unigram/1000_unigram.model
|
32 |
+
|
33 |
+
# Training parameters
|
34 |
+
number_of_epochs: 20
|
35 |
+
batch_size: 128
|
36 |
+
lr: 0.001
|
37 |
+
accu_steps: 1 # Gradient accumulation to simulate large batch training
|
38 |
+
ckpt_interval_minutes: 15 # save checkpoint every N min
|
39 |
+
|
40 |
+
# Dataloader options
|
41 |
+
train_dataloader_opts:
|
42 |
+
batch_size: 128
|
43 |
+
shuffle: true
|
44 |
+
|
45 |
+
valid_dataloader_opts:
|
46 |
+
batch_size: 1
|
47 |
+
|
48 |
+
test_dataloader_opts:
|
49 |
+
batch_size: 1
|
50 |
+
|
51 |
+
# Model parameters
|
52 |
+
emb_size: 128
|
53 |
+
activation: &id001 !name:torch.nn.LeakyReLU
|
54 |
+
dropout: 0.0
|
55 |
+
rnn_layers: 2
|
56 |
+
rnn_neurons: 2048
|
57 |
+
dnn_blocks: 1
|
58 |
+
dnn_neurons: 512
|
59 |
+
|
60 |
+
# Outputs
|
61 |
+
output_neurons: 1000 # index(blank/eos/bos) = 0 | char: 32 | unigram: 1000
|
62 |
+
# blank_index: 0
|
63 |
+
bos_index: 0
|
64 |
+
eos_index: 0
|
65 |
+
|
66 |
+
|
67 |
+
# Functions
|
68 |
+
model: &id002 !new:speechbrain.lobes.models.RNNLM.RNNLM
|
69 |
+
|
70 |
+
output_neurons: 1000
|
71 |
+
embedding_dim: 128
|
72 |
+
activation: *id001
|
73 |
+
dropout: 0.0
|
74 |
+
rnn_layers: 2
|
75 |
+
rnn_neurons: 2048
|
76 |
+
dnn_blocks: 1
|
77 |
+
dnn_neurons: 512
|
78 |
+
|
79 |
+
modules:
|
80 |
+
model: *id002
|
81 |
+
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
|
82 |
+
checkpoints_dir: results/RNN/2995/save
|
83 |
+
recoverables:
|
84 |
+
model: *id002
|
85 |
+
scheduler: &id003 !new:speechbrain.nnet.schedulers.NewBobScheduler
|
86 |
+
initial_value: 0.001
|
87 |
+
improvement_threshold: 0.0025
|
88 |
+
annealing_factor: 0.8
|
89 |
+
patient: 0
|
90 |
+
|
91 |
+
counter: &id004 !new:speechbrain.utils.epoch_loop.EpochCounter
|
92 |
+
|
93 |
+
limit: 20
|
94 |
+
|
95 |
+
log_softmax: !new:speechbrain.nnet.activations.Softmax
|
96 |
+
apply_log: true
|
97 |
+
|
98 |
+
optimizer: !name:torch.optim.Adam
|
99 |
+
lr: 0.001
|
100 |
+
betas: (0.9, 0.98)
|
101 |
+
eps: 0.000000001
|
102 |
+
|
103 |
+
lr_annealing: *id003
|
104 |
+
epoch_counter: *id004
|
105 |
+
compute_cost: !name:speechbrain.nnet.losses.nll_loss
|
106 |
+
|
107 |
+
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
|
108 |
+
save_file: results/RNN/2995/train_log.txt
|
109 |
+
|
110 |
+
tokenizer: &id005 !new:sentencepiece.SentencePieceProcessor
|
111 |
+
|
112 |
+
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
113 |
+
collect_in: results/RNN/2995/save
|
114 |
+
loadables:
|
115 |
+
tokenizer: *id005
|
116 |
+
paths:
|
117 |
+
tokenizer: ../Tokenizer/results/unigram/1000_unigram.model
|
normalizer.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:35014b7ce8241efd24fbd436fcd0a7b6e4d9171ca6951434cdd8efa8f6251c83
|
3 |
+
size 1383
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc01708d6ac2887c7e7675c3270fe98b9cec1ca88ebb65466d0cbb1958deabfc
|
3 |
+
size 252732
|
tokenizer.vocab
ADDED
@@ -0,0 +1,1000 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<unk> 0
|
2 |
+
▁ -2.81402
|
3 |
+
S -3.37348
|
4 |
+
EN -3.44471
|
5 |
+
E -3.49663
|
6 |
+
T -3.60219
|
7 |
+
N -3.84741
|
8 |
+
▁DIE -4.1672
|
9 |
+
▁DER -4.17853
|
10 |
+
ER -4.2868
|
11 |
+
▁IN -4.57964
|
12 |
+
▁UND -4.69616
|
13 |
+
R -4.7671
|
14 |
+
▁ER -4.76892
|
15 |
+
TE -4.89633
|
16 |
+
K -4.93723
|
17 |
+
M -4.94178
|
18 |
+
L -4.96751
|
19 |
+
▁BE -4.99692
|
20 |
+
D -5.00431
|
21 |
+
B -5.01898
|
22 |
+
▁IST -5.12069
|
23 |
+
▁GE -5.14687
|
24 |
+
F -5.16096
|
25 |
+
Ä -5.16295
|
26 |
+
▁EIN -5.19843
|
27 |
+
▁VER -5.20406
|
28 |
+
▁DAS -5.21437
|
29 |
+
O -5.24075
|
30 |
+
UNG -5.28346
|
31 |
+
G -5.33294
|
32 |
+
▁ZU -5.34273
|
33 |
+
AL -5.3635
|
34 |
+
▁AN -5.36695
|
35 |
+
▁DEN -5.41953
|
36 |
+
▁AUF -5.43824
|
37 |
+
U -5.44031
|
38 |
+
ST -5.44458
|
39 |
+
▁VON -5.44697
|
40 |
+
IN -5.45211
|
41 |
+
▁SIE -5.50361
|
42 |
+
GE -5.50758
|
43 |
+
W -5.53946
|
44 |
+
SCH -5.56713
|
45 |
+
H -5.56933
|
46 |
+
▁IM -5.59393
|
47 |
+
▁MIT -5.61329
|
48 |
+
▁EINE -5.63154
|
49 |
+
▁DES -5.66006
|
50 |
+
I -5.66181
|
51 |
+
Y -5.673
|
52 |
+
▁SICH -5.69423
|
53 |
+
▁ALS -5.73836
|
54 |
+
AN -5.74213
|
55 |
+
A -5.74582
|
56 |
+
▁F -5.76099
|
57 |
+
P -5.76137
|
58 |
+
TEN -5.76531
|
59 |
+
▁AUS -5.76682
|
60 |
+
Z -5.79106
|
61 |
+
Ö -5.796
|
62 |
+
▁ES -5.79856
|
63 |
+
▁K -5.8167
|
64 |
+
▁AUCH -5.81808
|
65 |
+
▁WURDE -5.82156
|
66 |
+
▁WAR -5.83504
|
67 |
+
▁FÜR -5.84409
|
68 |
+
EL -5.85849
|
69 |
+
ES -5.87952
|
70 |
+
LICH -5.91966
|
71 |
+
▁NICHT -5.93235
|
72 |
+
▁NACH -5.96093
|
73 |
+
IG -5.9643
|
74 |
+
Ü -5.97467
|
75 |
+
OR -5.97496
|
76 |
+
RE -5.97651
|
77 |
+
▁DEM -5.99682
|
78 |
+
▁VOR -6.0298
|
79 |
+
UR -6.04022
|
80 |
+
CH -6.06919
|
81 |
+
IS -6.08781
|
82 |
+
IE -6.09834
|
83 |
+
RI -6.10255
|
84 |
+
▁SIND -6.10721
|
85 |
+
DER -6.11684
|
86 |
+
AR -6.12099
|
87 |
+
TER -6.14111
|
88 |
+
▁WERDEN -6.14338
|
89 |
+
LE -6.15162
|
90 |
+
▁P -6.16635
|
91 |
+
▁ÜBER -6.17611
|
92 |
+
ISCHEN -6.19578
|
93 |
+
IL -6.2379
|
94 |
+
▁WIR -6.24367
|
95 |
+
▁W -6.25076
|
96 |
+
DE -6.25081
|
97 |
+
RA -6.26037
|
98 |
+
▁UNTER -6.28653
|
99 |
+
▁WIRD -6.29143
|
100 |
+
EI -6.29901
|
101 |
+
▁BEI -6.32482
|
102 |
+
▁ICH -6.33527
|
103 |
+
▁SO -6.3367
|
104 |
+
▁B -6.34118
|
105 |
+
HR -6.3465
|
106 |
+
▁RE -6.35408
|
107 |
+
▁ST -6.3552
|
108 |
+
BE -6.36674
|
109 |
+
C -6.38773
|
110 |
+
CK -6.38963
|
111 |
+
ION -6.43716
|
112 |
+
IT -6.43929
|
113 |
+
ISCHE -6.45091
|
114 |
+
ON -6.45107
|
115 |
+
▁G -6.45134
|
116 |
+
GEN -6.46007
|
117 |
+
▁H -6.47123
|
118 |
+
LL -6.47317
|
119 |
+
▁SEINE -6.47692
|
120 |
+
UT -6.48694
|
121 |
+
▁SCH -6.49251
|
122 |
+
▁DIESE -6.49312
|
123 |
+
▁UM -6.50123
|
124 |
+
DEN -6.50978
|
125 |
+
IERT -6.52487
|
126 |
+
LI -6.52622
|
127 |
+
▁AB -6.53963
|
128 |
+
▁UN -6.55606
|
129 |
+
LA -6.57196
|
130 |
+
▁SEIN -6.58748
|
131 |
+
▁DURCH -6.58814
|
132 |
+
ACH -6.60234
|
133 |
+
ERN -6.60945
|
134 |
+
NA -6.62245
|
135 |
+
AT -6.62414
|
136 |
+
LO -6.62723
|
137 |
+
AM -6.63566
|
138 |
+
UM -6.64409
|
139 |
+
OL -6.64575
|
140 |
+
▁A -6.67413
|
141 |
+
UNGEN -6.68644
|
142 |
+
ET -6.69927
|
143 |
+
UCH -6.6994
|
144 |
+
▁ZWEI -6.70317
|
145 |
+
RO -6.70527
|
146 |
+
VER -6.71721
|
147 |
+
▁MA -6.72036
|
148 |
+
▁HAT -6.73186
|
149 |
+
CHT -6.73278
|
150 |
+
MAL -6.76197
|
151 |
+
IK -6.77496
|
152 |
+
UN -6.78486
|
153 |
+
ND -6.79079
|
154 |
+
US -6.79295
|
155 |
+
▁AM -6.79516
|
156 |
+
▁ENT -6.79677
|
157 |
+
AU -6.79885
|
158 |
+
▁ZUM -6.80198
|
159 |
+
▁DIESER -6.80819
|
160 |
+
MA -6.80944
|
161 |
+
AND -6.8142
|
162 |
+
▁KA -6.82327
|
163 |
+
▁EINER -6.82733
|
164 |
+
UND -6.84771
|
165 |
+
▁BR -6.84908
|
166 |
+
▁MO -6.86697
|
167 |
+
EIN -6.86893
|
168 |
+
ATION -6.88934
|
169 |
+
ß -6.89136
|
170 |
+
BEN -6.90162
|
171 |
+
ZU -6.90235
|
172 |
+
▁SP -6.90605
|
173 |
+
▁M -6.90876
|
174 |
+
LEI -6.91162
|
175 |
+
SCHAFT -6.91755
|
176 |
+
▁NOCH -6.91836
|
177 |
+
IEN -6.91892
|
178 |
+
DI -6.92121
|
179 |
+
ME -6.94112
|
180 |
+
▁SA -6.95271
|
181 |
+
REICH -6.95333
|
182 |
+
V -6.95497
|
183 |
+
▁MAN -6.9585
|
184 |
+
▁DA -6.96086
|
185 |
+
ZI -6.9616
|
186 |
+
▁WIE -6.96301
|
187 |
+
▁ZUR -6.97449
|
188 |
+
▁ZEIT -6.97511
|
189 |
+
ÄR -6.97734
|
190 |
+
TEIL -6.98472
|
191 |
+
▁RO -6.98922
|
192 |
+
LAND -6.98933
|
193 |
+
IR -6.99018
|
194 |
+
CHEN -7.00101
|
195 |
+
STAND -7.00236
|
196 |
+
▁IHRE -7.00298
|
197 |
+
GEL -7.00307
|
198 |
+
▁WURDEN -7.00349
|
199 |
+
AG -7.00577
|
200 |
+
▁ALLE -7.01897
|
201 |
+
▁NUR -7.02115
|
202 |
+
▁EINEM -7.03701
|
203 |
+
▁NEU -7.03998
|
204 |
+
▁DASS -7.04635
|
205 |
+
J -7.05312
|
206 |
+
HL -7.05618
|
207 |
+
▁BA -7.05819
|
208 |
+
UL -7.06278
|
209 |
+
STE -7.06497
|
210 |
+
ART -7.06645
|
211 |
+
▁KON -7.06717
|
212 |
+
▁BI -7.07281
|
213 |
+
LICHE -7.07988
|
214 |
+
FF -7.08488
|
215 |
+
▁EINEN -7.08624
|
216 |
+
KT -7.0886
|
217 |
+
▁ABER -7.09055
|
218 |
+
TI -7.09289
|
219 |
+
ISCH -7.09315
|
220 |
+
ING -7.09452
|
221 |
+
GER -7.09554
|
222 |
+
▁LE -7.09917
|
223 |
+
IGEN -7.10199
|
224 |
+
▁HABEN -7.10295
|
225 |
+
ZE -7.10316
|
226 |
+
▁HIER -7.10443
|
227 |
+
BER -7.1052
|
228 |
+
NG -7.10687
|
229 |
+
ENDE -7.1094
|
230 |
+
RECHT -7.11372
|
231 |
+
▁BIS -7.119
|
232 |
+
ZEIT -7.12397
|
233 |
+
STELL -7.12862
|
234 |
+
BAR -7.12928
|
235 |
+
▁SE -7.13299
|
236 |
+
IST -7.13807
|
237 |
+
PF -7.14193
|
238 |
+
TR -7.14455
|
239 |
+
▁SPIEL -7.14827
|
240 |
+
AS -7.15021
|
241 |
+
▁Z -7.15028
|
242 |
+
▁DI -7.15058
|
243 |
+
▁HA -7.16676
|
244 |
+
WA -7.16939
|
245 |
+
HE -7.1791
|
246 |
+
▁KANN -7.18713
|
247 |
+
▁ME -7.19814
|
248 |
+
PP -7.20307
|
249 |
+
ÜR -7.20844
|
250 |
+
LICHEN -7.21561
|
251 |
+
▁TEIL -7.21618
|
252 |
+
HEIT -7.21814
|
253 |
+
TH -7.21886
|
254 |
+
UNGS -7.22126
|
255 |
+
▁REG -7.22254
|
256 |
+
▁LAND -7.22934
|
257 |
+
SP -7.23104
|
258 |
+
ID -7.23488
|
259 |
+
BAU -7.24018
|
260 |
+
RICH -7.2516
|
261 |
+
WEI -7.25587
|
262 |
+
▁ODER -7.2581
|
263 |
+
EM -7.26025
|
264 |
+
IM -7.2615
|
265 |
+
HA -7.26185
|
266 |
+
▁RA -7.26553
|
267 |
+
NER -7.27564
|
268 |
+
▁JEDOCH -7.2774
|
269 |
+
▁LI -7.28151
|
270 |
+
▁GROß -7.28315
|
271 |
+
▁DREI -7.28402
|
272 |
+
▁KEINE -7.2867
|
273 |
+
▁HEUTE -7.28772
|
274 |
+
▁PA -7.28956
|
275 |
+
GA -7.29075
|
276 |
+
▁SEINER -7.29267
|
277 |
+
BURG -7.29466
|
278 |
+
▁DEUTSCH -7.29772
|
279 |
+
LER -7.29927
|
280 |
+
OP -7.30063
|
281 |
+
HN -7.302
|
282 |
+
END -7.30438
|
283 |
+
IEREN -7.30703
|
284 |
+
▁LA -7.31811
|
285 |
+
NO -7.32135
|
286 |
+
STEN -7.32578
|
287 |
+
▁JAHR -7.32794
|
288 |
+
▁MEHR -7.3285
|
289 |
+
▁CO -7.3293
|
290 |
+
X -7.33521
|
291 |
+
▁DAR -7.34229
|
292 |
+
▁ANGE -7.34387
|
293 |
+
HER -7.34596
|
294 |
+
SITZ -7.34601
|
295 |
+
IV -7.34659
|
296 |
+
▁WAS -7.35353
|
297 |
+
▁STADT -7.3575
|
298 |
+
WEISE -7.36142
|
299 |
+
WEG -7.36167
|
300 |
+
▁SPÄTER -7.36766
|
301 |
+
IERUNG -7.36795
|
302 |
+
FR -7.37058
|
303 |
+
ELL -7.37301
|
304 |
+
VO -7.38029
|
305 |
+
TZ -7.38052
|
306 |
+
▁C -7.38179
|
307 |
+
PO -7.38837
|
308 |
+
IA -7.39019
|
309 |
+
▁WIEDER -7.3902
|
310 |
+
AD -7.40354
|
311 |
+
AB -7.40796
|
312 |
+
▁PRO -7.41394
|
313 |
+
QU -7.41554
|
314 |
+
▁GIBT -7.41601
|
315 |
+
PR -7.41911
|
316 |
+
AKT -7.42145
|
317 |
+
TRAG -7.42275
|
318 |
+
▁MUSS -7.42339
|
319 |
+
▁HER -7.4242
|
320 |
+
BERG -7.42709
|
321 |
+
▁HIN -7.43672
|
322 |
+
▁IHR -7.43731
|
323 |
+
▁GEGEN -7.44031
|
324 |
+
KA -7.44132
|
325 |
+
WERK -7.44367
|
326 |
+
ALL -7.44462
|
327 |
+
TA -7.4466
|
328 |
+
KEIT -7.45142
|
329 |
+
FORM -7.4518
|
330 |
+
ORT -7.45871
|
331 |
+
IGE -7.45919
|
332 |
+
MITTEL -7.45978
|
333 |
+
FT -7.46196
|
334 |
+
TOR -7.46484
|
335 |
+
▁WEI -7.46769
|
336 |
+
HAUS -7.47285
|
337 |
+
▁FA -7.47737
|
338 |
+
AUS -7.47749
|
339 |
+
ELT -7.48205
|
340 |
+
HALTEN -7.48288
|
341 |
+
ELLE -7.48359
|
342 |
+
▁SEHR -7.48396
|
343 |
+
▁AR -7.4854
|
344 |
+
FA -7.48871
|
345 |
+
WI -7.48904
|
346 |
+
▁SCHW -7.49133
|
347 |
+
GAB -7.49192
|
348 |
+
▁BO -7.49256
|
349 |
+
▁ARBEIT -7.49355
|
350 |
+
▁KO -7.4967
|
351 |
+
PAR -7.50197
|
352 |
+
ICH -7.50466
|
353 |
+
▁TA -7.50827
|
354 |
+
PH -7.50979
|
355 |
+
FE -7.52086
|
356 |
+
KO -7.52163
|
357 |
+
UB -7.52553
|
358 |
+
▁DIESEM -7.52667
|
359 |
+
▁DORT -7.52837
|
360 |
+
DEM -7.52981
|
361 |
+
▁GR -7.53465
|
362 |
+
TO -7.53722
|
363 |
+
▁FOR -7.54382
|
364 |
+
▁WO -7.54408
|
365 |
+
IERTE -7.54478
|
366 |
+
▁HATTE -7.5461
|
367 |
+
NATIONAL -7.54619
|
368 |
+
▁KÖNNEN -7.55423
|
369 |
+
▁HERR -7.55438
|
370 |
+
▁RU -7.55716
|
371 |
+
▁DE -7.56026
|
372 |
+
▁THE -7.5604
|
373 |
+
▁LIEGT -7.56441
|
374 |
+
LAGE -7.56464
|
375 |
+
FER -7.57119
|
376 |
+
RAT -7.57626
|
377 |
+
KEN -7.57875
|
378 |
+
▁SEIT -7.58297
|
379 |
+
▁HE -7.59162
|
380 |
+
▁ORT -7.59262
|
381 |
+
▁MAR -7.59434
|
382 |
+
▁IHN -7.59492
|
383 |
+
DA -7.60036
|
384 |
+
AUF -7.60419
|
385 |
+
▁EINIGE -7.60845
|
386 |
+
HI -7.61432
|
387 |
+
STAAT -7.61558
|
388 |
+
▁TRA -7.61597
|
389 |
+
▁HAUPT -7.62225
|
390 |
+
NEHMEN -7.62347
|
391 |
+
BAHN -7.62394
|
392 |
+
▁JA -7.63351
|
393 |
+
▁VIER -7.63533
|
394 |
+
OM -7.63665
|
395 |
+
▁ANDERE -7.63861
|
396 |
+
▁KOM -7.63867
|
397 |
+
▁DABEI -7.63896
|
398 |
+
HOF -7.64177
|
399 |
+
ORD -7.64558
|
400 |
+
FOLGE -7.64755
|
401 |
+
VA -7.64778
|
402 |
+
GEBIET -7.6479
|
403 |
+
STATT -7.65414
|
404 |
+
MI -7.65582
|
405 |
+
MIN -7.66195
|
406 |
+
VOR -7.66467
|
407 |
+
▁AUSGE -7.67159
|
408 |
+
▁SÜD -7.67427
|
409 |
+
VI -7.67691
|
410 |
+
▁MÜSSEN -7.68092
|
411 |
+
TRA -7.68135
|
412 |
+
CHE -7.683
|
413 |
+
▁WAREN -7.68308
|
414 |
+
KOMMEN -7.68576
|
415 |
+
KLA -7.68759
|
416 |
+
GI -7.69088
|
417 |
+
STER -7.6941
|
418 |
+
WIRTSCHAFT -7.69573
|
419 |
+
PUNKT -7.69708
|
420 |
+
HO -7.69943
|
421 |
+
LIN -7.7022
|
422 |
+
SON -7.70399
|
423 |
+
RÜCK -7.70401
|
424 |
+
IGT -7.7077
|
425 |
+
▁LEBEN -7.70944
|
426 |
+
ARBEIT -7.71015
|
427 |
+
▁BEKANNT -7.71126
|
428 |
+
BO -7.7196
|
429 |
+
▁JAHRE -7.72565
|
430 |
+
IZ -7.73218
|
431 |
+
▁WEITERE -7.73956
|
432 |
+
LÜ -7.74306
|
433 |
+
ITÄT -7.7448
|
434 |
+
HOL -7.74498
|
435 |
+
KIRCHE -7.7458
|
436 |
+
▁EX -7.74857
|
437 |
+
FALL -7.74922
|
438 |
+
▁JE -7.75625
|
439 |
+
▁GRUND -7.76621
|
440 |
+
▁UNS -7.76659
|
441 |
+
HANDEL -7.76694
|
442 |
+
▁GLEICH -7.76852
|
443 |
+
GANG -7.77181
|
444 |
+
IC -7.77261
|
445 |
+
▁VIELE -7.77522
|
446 |
+
SICHT -7.77755
|
447 |
+
▁DO -7.77987
|
448 |
+
PLA -7.78437
|
449 |
+
▁OF -7.78497
|
450 |
+
ZEN -7.78531
|
451 |
+
▁VIEL -7.78562
|
452 |
+
▁WEITER -7.79012
|
453 |
+
▁SOLLTE -7.79054
|
454 |
+
▁KLEIN -7.79286
|
455 |
+
▁VOM -7.7929
|
456 |
+
▁BAU -7.79403
|
457 |
+
▁EUROPÄ -7.79828
|
458 |
+
LIEß -7.79978
|
459 |
+
▁DIESES -7.80108
|
460 |
+
▁STA -7.80235
|
461 |
+
▁GEHÖRT -7.80323
|
462 |
+
TAG -7.80752
|
463 |
+
MENT -7.80813
|
464 |
+
▁WICHTIG -7.8113
|
465 |
+
ÄU -7.8152
|
466 |
+
▁MITGLIED -7.817
|
467 |
+
WISSEN -7.81753
|
468 |
+
▁FREI -7.81889
|
469 |
+
MO -7.82537
|
470 |
+
▁ALT -7.82845
|
471 |
+
GEBEN -7.83064
|
472 |
+
VE -7.83656
|
473 |
+
▁DAMIT -7.83863
|
474 |
+
▁BERICHT -7.83966
|
475 |
+
▁ZUSAMMEN -7.83989
|
476 |
+
PORT -7.84006
|
477 |
+
STEHEN -7.84319
|
478 |
+
TRIEB -7.84597
|
479 |
+
HM -7.84982
|
480 |
+
ANZ -7.85732
|
481 |
+
SCHIED -7.86109
|
482 |
+
▁ERSTEN -7.86173
|
483 |
+
RING -7.8654
|
484 |
+
▁HO -7.86636
|
485 |
+
▁ERFOLG -7.8685
|
486 |
+
▁NORD -7.87121
|
487 |
+
▁LANG -7.87144
|
488 |
+
LANG -7.87439
|
489 |
+
▁GUT -7.87609
|
490 |
+
ISCHER -7.87625
|
491 |
+
▁FRAU -7.87847
|
492 |
+
ILL -7.88034
|
493 |
+
▁GEMEINDE -7.88084
|
494 |
+
FL -7.88109
|
495 |
+
▁KONNTE -7.88394
|
496 |
+
TON -7.88449
|
497 |
+
▁ZURÜCK -7.88586
|
498 |
+
▁DU -7.88609
|
499 |
+
▁NE -7.88775
|
500 |
+
▁DANN -7.89183
|
501 |
+
NDE -7.89775
|
502 |
+
MANN -7.899
|
503 |
+
▁KAM -7.89958
|
504 |
+
SPIEL -7.90103
|
505 |
+
▁SELBST -7.90247
|
506 |
+
TION -7.90404
|
507 |
+
GO -7.90693
|
508 |
+
HEIM -7.91029
|
509 |
+
SEHEN -7.92173
|
510 |
+
▁MEIST -7.92587
|
511 |
+
MAN -7.92657
|
512 |
+
▁IHM -7.93021
|
513 |
+
▁EIGEN -7.93178
|
514 |
+
▁CA -7.936
|
515 |
+
▁INTER -7.93662
|
516 |
+
▁WELT -7.93698
|
517 |
+
LOS -7.93826
|
518 |
+
SETZT -7.93888
|
519 |
+
▁ZWISCHEN -7.93916
|
520 |
+
▁BEFINDET -7.94185
|
521 |
+
TRO -7.94356
|
522 |
+
▁MÖCHTE -7.94566
|
523 |
+
▁VI -7.94676
|
524 |
+
MER -7.95046
|
525 |
+
▁VEREIN -7.95058
|
526 |
+
GESETZ -7.95101
|
527 |
+
▁MEHRERE -7.9517
|
528 |
+
▁EUROPA -7.95194
|
529 |
+
SATZ -7.95368
|
530 |
+
▁CH -7.95536
|
531 |
+
BL -7.9557
|
532 |
+
STEIN -7.95874
|
533 |
+
ANT -7.95884
|
534 |
+
GEHEN -7.96025
|
535 |
+
LASSEN -7.96237
|
536 |
+
ARD -7.96695
|
537 |
+
▁OBER -7.96906
|
538 |
+
▁WEST -7.97181
|
539 |
+
MEISTER -7.97712
|
540 |
+
▁MÜ -7.97964
|
541 |
+
▁FILM -7.97967
|
542 |
+
LAUF -7.98074
|
543 |
+
▁ERSTE -7.98541
|
544 |
+
ÖFFENTLICH -7.99078
|
545 |
+
WOHN -7.9908
|
546 |
+
▁ETWA -7.99223
|
547 |
+
WERT -7.99335
|
548 |
+
▁AUßERDEM -7.99613
|
549 |
+
LEGEN -7.99776
|
550 |
+
LEGT -7.9978
|
551 |
+
▁WÄHREND -7.99851
|
552 |
+
GAR -8.00001
|
553 |
+
FRIED -8.00175
|
554 |
+
▁DR -8.00301
|
555 |
+
▁WENIG -8.01057
|
556 |
+
PLATZ -8.01775
|
557 |
+
▁KINDER -8.02039
|
558 |
+
IGKEIT -8.02349
|
559 |
+
▁IMMER -8.02645
|
560 |
+
▁ABGE -8.02746
|
561 |
+
▁JU -8.03748
|
562 |
+
▁LU -8.03773
|
563 |
+
▁MEINE -8.04964
|
564 |
+
STELLUNG -8.05194
|
565 |
+
GESTELLT -8.05238
|
566 |
+
WIN -8.05399
|
567 |
+
▁NEBEN -8.05628
|
568 |
+
KOMMISSION -8.05643
|
569 |
+
▁UNSERE -8.05667
|
570 |
+
WAND -8.05891
|
571 |
+
ZOG -8.06013
|
572 |
+
SCHRIFT -8.06202
|
573 |
+
DORF -8.0627
|
574 |
+
ANNT -8.06374
|
575 |
+
CHER -8.06735
|
576 |
+
▁FRAGE -8.06828
|
577 |
+
▁KURZ -8.07666
|
578 |
+
SCHLAG -8.07677
|
579 |
+
DIG -8.08106
|
580 |
+
▁MÖGLICH -8.08288
|
581 |
+
MMER -8.08451
|
582 |
+
GRUPPE -8.09052
|
583 |
+
BÜ -8.09201
|
584 |
+
▁SOLL -8.09996
|
585 |
+
▁BEREITS -8.10018
|
586 |
+
▁GANZ -8.10036
|
587 |
+
▁LETZTE -8.10408
|
588 |
+
SCHULE -8.10637
|
589 |
+
ZAHL -8.10733
|
590 |
+
FIN -8.11003
|
591 |
+
NET -8.1108
|
592 |
+
▁JEDE -8.11236
|
593 |
+
▁MUSIK -8.11283
|
594 |
+
ENDEN -8.11472
|
595 |
+
▁IHRER -8.11983
|
596 |
+
UG -8.124
|
597 |
+
SCHIEDENE -8.12615
|
598 |
+
▁DIESEN -8.12705
|
599 |
+
▁NAMEN -8.12836
|
600 |
+
STRAßE -8.13374
|
601 |
+
▁WENN -8.14025
|
602 |
+
PI -8.14585
|
603 |
+
▁FEST -8.14747
|
604 |
+
▁FÜNF -8.15182
|
605 |
+
▁BEIDEN -8.16002
|
606 |
+
▁GRA -8.16017
|
607 |
+
RÜ -8.1607
|
608 |
+
▁KRIEG -8.16227
|
609 |
+
ALLERDINGS -8.16802
|
610 |
+
▁FÜHRT -8.17075
|
611 |
+
▁FRÜH -8.17119
|
612 |
+
ITZ -8.1756
|
613 |
+
ATIV -8.17607
|
614 |
+
▁MICH -8.17774
|
615 |
+
▁DANACH -8.17805
|
616 |
+
▁OB -8.17873
|
617 |
+
WAR -8.17929
|
618 |
+
STÄNDIG -8.18037
|
619 |
+
▁ZAHLREICH -8.18109
|
620 |
+
▁SCHON -8.18512
|
621 |
+
▁JO -8.18569
|
622 |
+
ICK -8.19839
|
623 |
+
STADT -8.19853
|
624 |
+
▁DAZU -8.20098
|
625 |
+
GRA -8.2017
|
626 |
+
KREIS -8.20389
|
627 |
+
▁DAHER -8.20727
|
628 |
+
▁FAMILIE -8.20828
|
629 |
+
▁BU -8.21396
|
630 |
+
▁PRE -8.21537
|
631 |
+
GRIFF -8.22442
|
632 |
+
FANG -8.22499
|
633 |
+
NAHME -8.22629
|
634 |
+
▁PARLAMENT -8.22867
|
635 |
+
LIEB -8.23413
|
636 |
+
▁SCHUL -8.24247
|
637 |
+
▁ZWEITEN -8.24284
|
638 |
+
▁LEBT -8.24982
|
639 |
+
WOHL -8.2526
|
640 |
+
▁SCHL -8.25446
|
641 |
+
▁ANDEREN -8.25551
|
642 |
+
▁SU -8.25556
|
643 |
+
ZEUG -8.25863
|
644 |
+
▁AUTO -8.25883
|
645 |
+
HALB -8.26216
|
646 |
+
FAHREN -8.26339
|
647 |
+
▁ZUNÄCHST -8.26474
|
648 |
+
DEL -8.27115
|
649 |
+
IVERSITÄT -8.27264
|
650 |
+
▁PROBLEM -8.27302
|
651 |
+
▁NUN -8.27386
|
652 |
+
PRÄSIDENT -8.27567
|
653 |
+
TRITT -8.279
|
654 |
+
▁MENSCHEN -8.28217
|
655 |
+
▁STAMM -8.28233
|
656 |
+
FELD -8.28456
|
657 |
+
MUS -8.28535
|
658 |
+
GESCHICHTE -8.29057
|
659 |
+
▁BUNDES -8.29138
|
660 |
+
TRU -8.29341
|
661 |
+
ISMUS -8.29436
|
662 |
+
GERICHT -8.29663
|
663 |
+
BACH -8.29811
|
664 |
+
▁EBENFALLS -8.30216
|
665 |
+
POLITIK -8.30449
|
666 |
+
RAUM -8.30671
|
667 |
+
▁TÄTIG -8.30934
|
668 |
+
▁HABE -8.3123
|
669 |
+
SCHLIEßEND -8.31707
|
670 |
+
OTT -8.32467
|
671 |
+
▁HOCH -8.32589
|
672 |
+
ÄNDER -8.32642
|
673 |
+
▁GILT -8.32821
|
674 |
+
SYSTEM -8.3362
|
675 |
+
▁SOHN -8.34141
|
676 |
+
▁DOCH -8.35176
|
677 |
+
ISSE -8.35177
|
678 |
+
▁SOWIE -8.35713
|
679 |
+
▁BERLIN -8.35892
|
680 |
+
STRE -8.36039
|
681 |
+
GEFÜHRT -8.36228
|
682 |
+
▁ALLEM -8.36301
|
683 |
+
▁STUDIERTE -8.36311
|
684 |
+
RICHTUNG -8.36844
|
685 |
+
GRUND -8.37068
|
686 |
+
ESSEN -8.37495
|
687 |
+
▁NIEDER -8.37832
|
688 |
+
LING -8.37933
|
689 |
+
FLUG -8.38005
|
690 |
+
RUF -8.38257
|
691 |
+
GEBÄUDE -8.38274
|
692 |
+
▁BEIDE -8.38492
|
693 |
+
▁STEHT -8.38744
|
694 |
+
HEIRATET -8.38993
|
695 |
+
MANNSCHAFT -8.39635
|
696 |
+
ZER -8.3974
|
697 |
+
LÄNDER -8.39883
|
698 |
+
▁POLITISCH -8.39932
|
699 |
+
▁NIE -8.40128
|
700 |
+
GRÜNDE -8.40313
|
701 |
+
▁WELTKRIEG -8.41749
|
702 |
+
PRODUKT -8.41793
|
703 |
+
▁GEHT -8.42562
|
704 |
+
▁OST -8.42583
|
705 |
+
WECHSEL -8.43023
|
706 |
+
SCHLUSS -8.43445
|
707 |
+
DECK -8.4401
|
708 |
+
SCHUTZ -8.4428
|
709 |
+
IUM -8.44375
|
710 |
+
SORG -8.44416
|
711 |
+
▁EINZEL -8.44628
|
712 |
+
UNTERSTÜTZ -8.44664
|
713 |
+
GENOMMEN -8.44808
|
714 |
+
VERKEHR -8.44996
|
715 |
+
▁WILL -8.45368
|
716 |
+
BRU -8.45446
|
717 |
+
DIENST -8.46257
|
718 |
+
▁ERHIELT -8.46306
|
719 |
+
ISTISCH -8.46409
|
720 |
+
▁WEIß -8.47386
|
721 |
+
▁BLIEB -8.47629
|
722 |
+
SCHIFF -8.47817
|
723 |
+
GLEICH -8.47818
|
724 |
+
▁GRÜN -8.4847
|
725 |
+
▁BESTEHT -8.48803
|
726 |
+
STIMMT -8.48992
|
727 |
+
▁MACHT -8.49128
|
728 |
+
▁DESHALB -8.49527
|
729 |
+
CHLIEßLICH -8.4959
|
730 |
+
▁BEISPIEL -8.49979
|
731 |
+
▁OHNE -8.50317
|
732 |
+
SCHLOSSEN -8.50325
|
733 |
+
SCHREI -8.5088
|
734 |
+
▁JETZT -8.51321
|
735 |
+
▁DARAUF -8.51383
|
736 |
+
▁BÜRGER -8.51414
|
737 |
+
▁STARK -8.51643
|
738 |
+
▁VATER -8.51982
|
739 |
+
▁EHE -8.52122
|
740 |
+
▁DANK -8.52502
|
741 |
+
OLOGIE -8.52772
|
742 |
+
▁LÖ -8.52823
|
743 |
+
TRETEN -8.53061
|
744 |
+
▁ARBEITETE -8.53788
|
745 |
+
▁GING -8.53852
|
746 |
+
▁ZIEL -8.53991
|
747 |
+
SETZUNG -8.54149
|
748 |
+
▁HINTER -8.54754
|
749 |
+
WALD -8.54883
|
750 |
+
EINANDER -8.55555
|
751 |
+
VERWALTUNG -8.55687
|
752 |
+
▁SECHS -8.55844
|
753 |
+
SELLSCHAFT -8.5585
|
754 |
+
FÜHREN -8.56097
|
755 |
+
▁VERWENDET -8.56595
|
756 |
+
▁SOLCHE -8.57387
|
757 |
+
DRUCK -8.57518
|
758 |
+
▁GEMEINSAM -8.5792
|
759 |
+
▁DAFÜR -8.58025
|
760 |
+
XI -8.58701
|
761 |
+
GRÖßE -8.59013
|
762 |
+
▁MIR -8.59534
|
763 |
+
ÜSSE -8.59568
|
764 |
+
FÜHRUNG -8.59785
|
765 |
+
▁DARÜBER -8.60362
|
766 |
+
REIF -8.60523
|
767 |
+
HAFT -8.60563
|
768 |
+
▁BEGANN -8.60733
|
769 |
+
▁HÄUFIG -8.61685
|
770 |
+
PASS -8.61936
|
771 |
+
HÄLT -8.62361
|
772 |
+
GRAF -8.62744
|
773 |
+
▁KÖNIG -8.62919
|
774 |
+
▁DRITT -8.62968
|
775 |
+
STIMME -8.64956
|
776 |
+
MARKT -8.65707
|
777 |
+
▁KOMMT -8.65929
|
778 |
+
▁ENTWICKL -8.66153
|
779 |
+
▁SOZIAL -8.66158
|
780 |
+
ZEICHNET -8.66178
|
781 |
+
▁GESAMT -8.66233
|
782 |
+
LIEF -8.67002
|
783 |
+
▁ENTSCHEID -8.67443
|
784 |
+
AMERIKA -8.68308
|
785 |
+
▁BENANNT -8.68688
|
786 |
+
▁FINDEN -8.68785
|
787 |
+
▁HOHE -8.68853
|
788 |
+
VERSUCH -8.68987
|
789 |
+
▁HERAUS -8.69624
|
790 |
+
STRECKE -8.69636
|
791 |
+
HÖHE -8.69644
|
792 |
+
▁WORDEN -8.69757
|
793 |
+
▁NAHM -8.69919
|
794 |
+
▁AKTIV -8.70379
|
795 |
+
▁TOD -8.70455
|
796 |
+
WACHS -8.70743
|
797 |
+
▁EINFACH -8.71404
|
798 |
+
HILF -8.71418
|
799 |
+
▁JUNG -8.71822
|
800 |
+
▁URSPRÜNG -8.72123
|
801 |
+
▁OFFEN -8.72833
|
802 |
+
▁WÜRDE -8.7286
|
803 |
+
SPRACHE -8.72907
|
804 |
+
KRANK -8.73279
|
805 |
+
▁BEDEUTUNG -8.73493
|
806 |
+
PROGRAMM -8.73854
|
807 |
+
▁BESONDERS -8.74397
|
808 |
+
STOFF -8.74454
|
809 |
+
WESEN -8.74733
|
810 |
+
▁KUNST -8.75386
|
811 |
+
▁TITEL -8.75748
|
812 |
+
SÄCHLICH -8.76046
|
813 |
+
GEORDNET -8.76369
|
814 |
+
ZEICHNUNG -8.766
|
815 |
+
▁DIREKT -8.76726
|
816 |
+
▁PRÄ -8.77056
|
817 |
+
▁SPRACH -8.77061
|
818 |
+
▁GENAU -8.77257
|
819 |
+
▁CHRIST -8.77527
|
820 |
+
▁ERREICHT -8.78452
|
821 |
+
▁WAHR -8.78661
|
822 |
+
STÜCK -8.79091
|
823 |
+
▁ENTWICKEL -8.79169
|
824 |
+
GESCHÄFT -8.79615
|
825 |
+
▁NATUR -8.80003
|
826 |
+
▁INTERESS -8.8026
|
827 |
+
HANDLUNG -8.80319
|
828 |
+
SCHAUSPIEL -8.80647
|
829 |
+
▁PERSON -8.8065
|
830 |
+
▁WELCHE -8.81135
|
831 |
+
SCHRITT -8.81299
|
832 |
+
MINISTER -8.81553
|
833 |
+
POSITION -8.81936
|
834 |
+
▁KULTUR -8.82132
|
835 |
+
OLOGISCH -8.82531
|
836 |
+
▁SCHWARZ -8.82539
|
837 |
+
▁WAHL -8.82952
|
838 |
+
▁ORGANIS -8.8325
|
839 |
+
▁NATÜRLICH -8.83382
|
840 |
+
▁HISTOR -8.83586
|
841 |
+
ORDNUNG -8.83719
|
842 |
+
SELBEN -8.84154
|
843 |
+
BEZIRK -8.8425
|
844 |
+
▁SCHLOSS -8.84718
|
845 |
+
▁GRENZ -8.8519
|
846 |
+
▁FEHL -8.8521
|
847 |
+
▁GEWANN -8.85458
|
848 |
+
▁FOLGTE -8.85525
|
849 |
+
▁FINDET -8.85863
|
850 |
+
▁KÜNSTLER -8.86816
|
851 |
+
▁WÄRE -8.86875
|
852 |
+
▁WEITERHIN -8.87099
|
853 |
+
▁DADURCH -8.87435
|
854 |
+
ÖSTERREICH -8.87505
|
855 |
+
▁FRANZÖSI -8.87988
|
856 |
+
INDUSTRIE -8.87988
|
857 |
+
BEZEICHNET -8.88374
|
858 |
+
▁SCHNELL -8.88464
|
859 |
+
STIEG -8.89184
|
860 |
+
▁EMP -8.89543
|
861 |
+
▁JUGEND -8.89671
|
862 |
+
▁FUNKTION -8.89807
|
863 |
+
JAHR -8.89987
|
864 |
+
▁GENANNT -8.90228
|
865 |
+
DAUER -8.90797
|
866 |
+
▁TOUR -8.91162
|
867 |
+
LEISTUNG -8.91433
|
868 |
+
SETZEN -8.91569
|
869 |
+
▁ITALIEN -8.9166
|
870 |
+
ZENTRUM -8.92166
|
871 |
+
▁EINZIGE -8.92192
|
872 |
+
▁NUTZ -8.9247
|
873 |
+
▁NÄCHST -8.92527
|
874 |
+
▁PRIVAT -8.92599
|
875 |
+
SPRECHEN -8.9265
|
876 |
+
▁ÄNDERUNG -8.92661
|
877 |
+
KARRIERE -8.93401
|
878 |
+
▁BEFINDEN -8.94138
|
879 |
+
▁ZEHN -8.94476
|
880 |
+
KREUZ -8.9458
|
881 |
+
AUSBILDUNG -8.94726
|
882 |
+
▁BEGINN -8.94804
|
883 |
+
SBESONDERE -8.94875
|
884 |
+
FLÄCHE -8.95005
|
885 |
+
▁DAVON -8.95035
|
886 |
+
SCHNITT -8.95261
|
887 |
+
ZIEHUNG -8.95399
|
888 |
+
▁VOLKS -8.9562
|
889 |
+
STEUER -8.96814
|
890 |
+
▁LEICHT -8.97455
|
891 |
+
LÄNGE -8.9791
|
892 |
+
▁AUßER -8.98527
|
893 |
+
▁GENUTZT -8.99123
|
894 |
+
BEREIT -8.99142
|
895 |
+
DEMOKRAT -8.99278
|
896 |
+
▁ZEIGT -8.99554
|
897 |
+
SICHERHEIT -8.99856
|
898 |
+
▁TEXT -8.99928
|
899 |
+
▁SAISON -9.00689
|
900 |
+
▁TOCHTER -9.01159
|
901 |
+
▁FINANZ -9.01357
|
902 |
+
GEBRACHT -9.01476
|
903 |
+
▁MUTTER -9.01639
|
904 |
+
VERBINDUNG -9.01793
|
905 |
+
WIEGEND -9.02199
|
906 |
+
KRAFT -9.02392
|
907 |
+
KOLLEG -9.02829
|
908 |
+
HAUPT -9.02987
|
909 |
+
▁FOLGENDEN -9.03723
|
910 |
+
KÖRPER -9.03802
|
911 |
+
FAHRT -9.03902
|
912 |
+
▁CHA -9.03907
|
913 |
+
▁DEUTLICH -9.04133
|
914 |
+
FASSUNG -9.04566
|
915 |
+
▁ENTSPRECH -9.04747
|
916 |
+
PROZESS -9.04959
|
917 |
+
▁ELEKTR -9.0503
|
918 |
+
▁MILITÄR -9.0511
|
919 |
+
▁GLAUBE -9.05772
|
920 |
+
LEHRER -9.05834
|
921 |
+
▁DANEBEN -9.06103
|
922 |
+
▁WIRKLICH -9.06168
|
923 |
+
▁GEBOREN -9.06189
|
924 |
+
TREFF -9.06493
|
925 |
+
WESENTLICH -9.06581
|
926 |
+
MAßNAHMEN -9.06602
|
927 |
+
STRUKTUR -9.06853
|
928 |
+
▁TROTZ -9.06868
|
929 |
+
▁KÖNNTE -9.06937
|
930 |
+
▁THEATER -9.07286
|
931 |
+
▁EHEMALIG -9.07361
|
932 |
+
SCHÜTZ -9.07386
|
933 |
+
▁ENGLISCH -9.07527
|
934 |
+
▁FERNSEH -9.07545
|
935 |
+
▁LIEGEN -9.077
|
936 |
+
ANSTALT -9.07788
|
937 |
+
SIEDLUNG -9.07895
|
938 |
+
▁GOLD -9.0809
|
939 |
+
FÄLLE -9.08139
|
940 |
+
▁HEUTIGE -9.08566
|
941 |
+
▁ÄUßER -9.08627
|
942 |
+
▁WOLLEN -9.09681
|
943 |
+
STUDIUM -9.09913
|
944 |
+
WETTBEWERB -9.09999
|
945 |
+
MÖGLICH -9.10454
|
946 |
+
▁ZENTRAL -9.10869
|
947 |
+
ABHÄNGIG -9.11216
|
948 |
+
▁DARAUFHIN -9.11309
|
949 |
+
VERANTWORT -9.11742
|
950 |
+
▁NÄHE -9.11744
|
951 |
+
SCHRIEBEN -9.12182
|
952 |
+
SCHAFFEN -9.12184
|
953 |
+
BESCHÄFTIG -9.12448
|
954 |
+
▁JOHANN -9.13606
|
955 |
+
FOLGREICH -9.13908
|
956 |
+
▁REIHE -9.14795
|
957 |
+
▁BEVÖLKER -9.15505
|
958 |
+
MEINSCHAFT -9.15517
|
959 |
+
BRAUN -9.15594
|
960 |
+
▁NÖRDLICH -9.15687
|
961 |
+
▁FLUSS -9.15781
|
962 |
+
STÄNDE -9.15975
|
963 |
+
AUSSCHUSS -9.17259
|
964 |
+
GEMEINDE -9.17429
|
965 |
+
▁GEGENÜBER -9.17535
|
966 |
+
KAMPF -9.1796
|
967 |
+
GESPROCHEN -9.18094
|
968 |
+
SAMMLUNG -9.18375
|
969 |
+
PRÜF -9.18913
|
970 |
+
ZUSÄTZLICH -9.19509
|
971 |
+
BLÄTTER -9.19604
|
972 |
+
GRENZE -9.20184
|
973 |
+
▁SCHWEIZ -9.2088
|
974 |
+
▁ÄHNLICH -9.21819
|
975 |
+
▁ALLGEMEIN -9.2201
|
976 |
+
▁KOMMISSAR -9.22108
|
977 |
+
▁INSGESAMT -9.22499
|
978 |
+
▁ÜBERNAHM -9.22545
|
979 |
+
▁ALBUM -9.22893
|
980 |
+
WIRKUNG -9.23026
|
981 |
+
FAHRZEUG -9.23091
|
982 |
+
▁ANTWORT -9.23681
|
983 |
+
▁BEDEUTET -9.24376
|
984 |
+
▁BEDEUTEND -9.25178
|
985 |
+
HUNDERT -9.25302
|
986 |
+
▁INNERHALB -9.25482
|
987 |
+
▁FUßBALL -9.2558
|
988 |
+
▁ENERGIE -9.26292
|
989 |
+
▁AUßEN -9.26394
|
990 |
+
HÄUSER -9.26502
|
991 |
+
▁KLOSTER -9.2701
|
992 |
+
▁GEHÖREN -9.2762
|
993 |
+
▁NOTWENDIG -9.2762
|
994 |
+
REGELMÄßIG -9.27728
|
995 |
+
UNTERRICHT -9.27824
|
996 |
+
▁PROVINZ -9.28033
|
997 |
+
ZIEHEN -9.28043
|
998 |
+
▁EBENSO -9.28137
|
999 |
+
▁TRADITION -9.28864
|
1000 |
+
Q -10.642
|
tokenizer_hyperparams.yaml
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Generated 2022-08-16 from:
|
2 |
+
# /netscratch/sagar/thesis/speechbrain/recipes/CommonVoice_de/Tokenizer/hparams/1K_unigram_subword_bpe.yaml
|
3 |
+
# yamllint disable
|
4 |
+
# ############################################################################
|
5 |
+
# Tokenizer: subword BPE with unigram 1K
|
6 |
+
# Training: German CommonVoice 1,211 hrs
|
7 |
+
# Authors: Abdel Heba 2021
|
8 |
+
# ############################################################################
|
9 |
+
|
10 |
+
token_type: unigram # ["unigram", "bpe", "char"]
|
11 |
+
output_folder: results/unigram
|
12 |
+
train_log: results/unigram/train_log.txt
|
13 |
+
|
14 |
+
# Data files
|
15 |
+
data_folder: ../CommonVoice/
|
16 |
+
csv_dir: ../cv_de_acc
|
17 |
+
train_tsv_file: ../CommonVoice//train.tsv
|
18 |
+
dev_tsv_file: ../CommonVoice//dev.tsv
|
19 |
+
test_tsv_file: ../CommonVoice//test.tsv
|
20 |
+
accented_letters: true
|
21 |
+
language: de
|
22 |
+
skip_prep: false
|
23 |
+
|
24 |
+
# train_splits: ["train-clean-100", "train-clean-360", "train-other-500"]
|
25 |
+
# dev_splits: ["dev-clean"]
|
26 |
+
# test_splits: ["test-clean", "test-other"]
|
27 |
+
train_csv: ../cv_de_acc/train.csv
|
28 |
+
valid_csv: ../cv_de_acc/dev.csv
|
29 |
+
|
30 |
+
# Training parameters
|
31 |
+
token_output: 5000 # index(blank/eos/bos/unk) = 0
|
32 |
+
character_coverage: 1.0
|
33 |
+
csv_read: wrd
|
34 |
+
|
35 |
+
|
36 |
+
tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
|
37 |
+
model_dir: results/unigram
|
38 |
+
vocab_size: 5000
|
39 |
+
annotation_train: ../cv_de_acc/train.csv
|
40 |
+
annotation_read: wrd
|
41 |
+
model_type: unigram # ["unigram", "bpe", "char"]
|
42 |
+
character_coverage: 1.0
|
43 |
+
annotation_list_to_check: [../cv_de_acc/train.csv, ../cv_de_acc/dev.csv]
|