qingzhengwang commited on
Commit
ddd6d43
·
1 Parent(s): 7391343

Update model with LFS support

Browse files
Files changed (24) hide show
  1. README.md +314 -3
  2. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/config.yaml +239 -0
  3. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/accuracy.png +0 -0
  4. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/backward_time.png +0 -0
  5. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/class_loss.png +0 -0
  6. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/clip.png +0 -0
  7. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/forward_time.png +0 -0
  8. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/geo_loss_all.png +0 -0
  9. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/geo_loss_downstream.png +0 -0
  10. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/gpu_max_cached_mem_GB.png +0 -0
  11. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/grad_norm.png +0 -0
  12. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer32.png +0 -0
  13. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer36.png +0 -0
  14. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer40.png +0 -0
  15. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer44.png +0 -0
  16. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_mean.png +0 -0
  17. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/iter_time.png +0 -0
  18. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/loss.png +0 -0
  19. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/loss_scale.png +0 -0
  20. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/optim0_lr0.png +0 -0
  21. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/optim_step_time.png +0 -0
  22. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/train_time.png +0 -0
  23. exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/valid.accuracy.best.pth +3 -0
  24. meta.yaml +8 -0
README.md CHANGED
@@ -1,3 +1,314 @@
1
- ---
2
- license: cc-by-4.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - language-identification
6
+ language: multilingual
7
+ datasets:
8
+ - geolid
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 LID model
13
+
14
+ ### `espnet/geolid_combined_shared_trainable`
15
+
16
+ This model was trained by Qingzheng-Wang using geolid recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 77e4293952083b9e32bc19a5ddc19efe45e70e4a
26
+ pip install -e .
27
+ cd egs2/geolid/lid1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/geolid_combined_shared_trainable
29
+ ```
30
+
31
+
32
+
33
+ ## LID config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: /work/nvme/bbjs/qwang20/espnet/egs2/lid_delta/lid1/conf/mms_1b_all_no_filter_balanced_dataset/mms_ecapa_upcon_32_44_it0.4_sharedCondProj_butUpdate_50k_lr1e-5_datasetup0.3_backup_33epoch.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ drop_last_iter: false
42
+ dry_run: false
43
+ iterator_type: category
44
+ valid_iterator_type: category
45
+ output_dir: exp_all_no_filter_raw/spk_mms_ecapa_upcon_32_44_it0.4_sharedCondProj_butUpdate_50k_lr1e-5_datasetup0.3_backup_33epoch_raw
46
+ ngpu: 1
47
+ seed: 3702
48
+ num_workers: 8
49
+ num_att_plot: 0
50
+ dist_backend: nccl
51
+ dist_init_method: env://
52
+ dist_world_size: null
53
+ dist_rank: null
54
+ local_rank: 0
55
+ dist_master_addr: null
56
+ dist_master_port: null
57
+ dist_launcher: null
58
+ multiprocessing_distributed: false
59
+ unused_parameters: true
60
+ sharded_ddp: false
61
+ use_deepspeed: false
62
+ deepspeed_config: null
63
+ cudnn_enabled: true
64
+ cudnn_benchmark: true
65
+ cudnn_deterministic: false
66
+ use_tf32: false
67
+ collect_stats: false
68
+ write_collected_feats: false
69
+ max_epoch: 33
70
+ patience: null
71
+ val_scheduler_criterion:
72
+ - valid
73
+ - loss
74
+ early_stopping_criterion:
75
+ - valid
76
+ - loss
77
+ - min
78
+ best_model_criterion:
79
+ - - valid
80
+ - accuracy
81
+ - max
82
+ keep_nbest_models: 2
83
+ nbest_averaging_interval: 0
84
+ grad_clip: 9999
85
+ grad_clip_type: 2.0
86
+ grad_noise: false
87
+ accum_grad: 4
88
+ no_forward_run: false
89
+ resume: true
90
+ train_dtype: float32
91
+ use_amp: true
92
+ log_interval: 100
93
+ use_matplotlib: true
94
+ use_tensorboard: true
95
+ create_graph_in_tensorboard: false
96
+ use_wandb: true
97
+ wandb_project: lid
98
+ wandb_id: null
99
+ wandb_entity: qingzhew-carnegie-mellon-university
100
+ wandb_name: null
101
+ wandb_model_log_interval: -1
102
+ detect_anomaly: false
103
+ use_adapter: false
104
+ adapter: lora
105
+ save_strategy: all
106
+ adapter_conf: {}
107
+ pretrain_path: null
108
+ init_param: []
109
+ ignore_init_mismatch: false
110
+ freeze_param: []
111
+ num_iters_per_epoch: 2000
112
+ batch_size: 20
113
+ valid_batch_size: null
114
+ batch_bins: 1440000
115
+ valid_batch_bins: null
116
+ category_sample_size: 10
117
+ train_shape_file:
118
+ - exp_all_no_filter_raw/spk_stats_16k/train/speech_shape
119
+ valid_shape_file:
120
+ - exp_all_no_filter_raw/spk_stats_16k/valid/speech_shape
121
+ batch_type: catpow_balance_dataset
122
+ upsampling_factor: 0.5
123
+ language_upsampling_factor: 0.5
124
+ dataset_upsampling_factor: 0.3
125
+ dataset_scaling_factor: 1.2
126
+ max_batch_size: 6
127
+ valid_batch_type: null
128
+ fold_length:
129
+ - 120000
130
+ sort_in_batch: descending
131
+ shuffle_within_batch: false
132
+ sort_batch: descending
133
+ multiple_iterator: false
134
+ chunk_length: 500
135
+ chunk_shift_ratio: 0.5
136
+ num_cache_chunks: 1024
137
+ chunk_excluded_key_prefixes: []
138
+ chunk_default_fs: null
139
+ chunk_max_abs_length: null
140
+ chunk_discard_short_samples: true
141
+ train_data_path_and_name_and_type:
142
+ - - dump/raw/train_all_no_filter_lang/wav.scp
143
+ - speech
144
+ - sound
145
+ - - dump/raw/train_all_no_filter_lang/utt2spk
146
+ - lid_labels
147
+ - text
148
+ valid_data_path_and_name_and_type:
149
+ - - dump/raw/dev_ml_superb2_lang/wav.scp
150
+ - speech
151
+ - sound
152
+ - - dump/raw/dev_ml_superb2_lang/utt2spk
153
+ - lid_labels
154
+ - text
155
+ multi_task_dataset: false
156
+ allow_variable_data_keys: false
157
+ max_cache_size: 0.0
158
+ max_cache_fd: 32
159
+ allow_multi_rates: false
160
+ valid_max_cache_size: null
161
+ exclude_weight_decay: false
162
+ exclude_weight_decay_conf: {}
163
+ optim: adam
164
+ optim_conf:
165
+ lr: 1.0e-05
166
+ betas:
167
+ - 0.9
168
+ - 0.98
169
+ scheduler: tristagelr
170
+ scheduler_conf:
171
+ max_steps: 12500
172
+ warmup_ratio: 0.1
173
+ hold_ratio: 0.4
174
+ decay_ratio: 0.5
175
+ init_lr_scale: 0.6
176
+ final_lr_scale: 0.1
177
+ init: null
178
+ use_preprocessor: true
179
+ input_size: null
180
+ target_duration: 3.0
181
+ spk2utt: dump/raw/train_all_no_filter_lang/spk2utt
182
+ spk_num: 157
183
+ sample_rate: 16000
184
+ num_eval: 10
185
+ rir_scp: ''
186
+ model: upstream_condition
187
+ model_conf:
188
+ lang2vec_conditioning_layers:
189
+ - 32
190
+ - 36
191
+ - 40
192
+ - 44
193
+ lid_conditioning_layers: []
194
+ frozen_ecapa: false
195
+ apply_intermediate_lang2vec_loss: true
196
+ apply_intermediate_lid_class_loss: false
197
+ apply_intermediate_lang2vec_condition: true
198
+ apply_intermediate_lid_class_condition: false
199
+ inter_lang2vec_loss_weight: 0.4
200
+ inter_lid_class_loss_weight: 0.0
201
+ cutoff_gradient_from_backbone: false
202
+ cutoff_gradient_before_condtrans: true
203
+ independent_module: true
204
+ use_gate: false
205
+ gate_type: null
206
+ shared_conditioning_proj: true
207
+ frontend: s3prl_condition
208
+ frontend_conf:
209
+ frontend_conf:
210
+ upstream: hf_wav2vec2_condition
211
+ path_or_url: facebook/mms-1b
212
+ download_dir: ./hub
213
+ multilayer_feature: true
214
+ specaug: null
215
+ specaug_conf: {}
216
+ normalize: utterance_mvn
217
+ normalize_conf:
218
+ norm_vars: false
219
+ encoder: ecapa_tdnn
220
+ encoder_conf:
221
+ model_scale: 8
222
+ ndim: 512
223
+ output_size: 1536
224
+ pooling: chn_attn_stat
225
+ pooling_conf: {}
226
+ projector: rawnet3
227
+ projector_conf:
228
+ output_size: 192
229
+ encoder_condition: identity
230
+ encoder_condition_conf: {}
231
+ pooling_condition: chn_attn_stat
232
+ pooling_condition_conf: {}
233
+ projector_condition: rawnet3
234
+ projector_condition_conf: {}
235
+ preprocessor: lid
236
+ preprocessor_conf:
237
+ fix_duration: false
238
+ sample_rate: 16000
239
+ noise_apply_prob: 0.0
240
+ noise_info:
241
+ - - 1.0
242
+ - dump/raw/musan_speech.scp
243
+ - - 4
244
+ - 7
245
+ - - 13
246
+ - 20
247
+ - - 1.0
248
+ - dump/raw/musan_noise.scp
249
+ - - 1
250
+ - 1
251
+ - - 0
252
+ - 15
253
+ - - 1.0
254
+ - dump/raw/musan_music.scp
255
+ - - 1
256
+ - 1
257
+ - - 5
258
+ - 15
259
+ rir_apply_prob: 0.0
260
+ rir_scp: dump/raw/rirs.scp
261
+ use_lang2vec: true
262
+ lang2vec_type: geo
263
+ loss: aamsoftmax_sc_topk_lang2vec
264
+ loss_conf:
265
+ margin: 0.5
266
+ scale: 30
267
+ K: 3
268
+ mp: 0.06
269
+ k_top: 5
270
+ lang2vec_dim: 299
271
+ lang2vec_type: geo
272
+ lang2vec_weight: 0.2
273
+ required:
274
+ - output_dir
275
+ version: '202412'
276
+ distributed: false
277
+ ```
278
+
279
+ </details>
280
+
281
+
282
+
283
+ ### Citing ESPnet
284
+
285
+ ```BibTex
286
+ @inproceedings{watanabe2018espnet,
287
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
288
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
289
+ year={2018},
290
+ booktitle={Proceedings of Interspeech},
291
+ pages={2207--2211},
292
+ doi={10.21437/Interspeech.2018-1456},
293
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
294
+ }
295
+
296
+
297
+
298
+
299
+
300
+
301
+ ```
302
+
303
+ or arXiv:
304
+
305
+ ```bibtex
306
+ @misc{watanabe2018espnet,
307
+ title={ESPnet: End-to-End Speech Processing Toolkit},
308
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
309
+ year={2018},
310
+ eprint={1804.00015},
311
+ archivePrefix={arXiv},
312
+ primaryClass={cs.CL}
313
+ }
314
+ ```
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/config.yaml ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: /work/nvme/bbjs/qwang20/espnet/egs2/lid_delta/lid1/conf/mms_1b_all_no_filter_balanced_dataset/mms_ecapa_upcon_32_44_it0.4_sharedCondProj_butUpdate_50k_lr1e-5_datasetup0.3_backup_33epoch.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: category
7
+ valid_iterator_type: category
8
+ output_dir: exp_all_no_filter_raw/spk_mms_ecapa_upcon_32_44_it0.4_sharedCondProj_butUpdate_50k_lr1e-5_datasetup0.3_backup_33epoch_raw
9
+ ngpu: 1
10
+ seed: 3702
11
+ num_workers: 8
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ cudnn_enabled: true
27
+ cudnn_benchmark: true
28
+ cudnn_deterministic: false
29
+ use_tf32: false
30
+ collect_stats: false
31
+ write_collected_feats: false
32
+ max_epoch: 33
33
+ patience: null
34
+ val_scheduler_criterion:
35
+ - valid
36
+ - loss
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ best_model_criterion:
42
+ - - valid
43
+ - accuracy
44
+ - max
45
+ keep_nbest_models: 2
46
+ nbest_averaging_interval: 0
47
+ grad_clip: 9999
48
+ grad_clip_type: 2.0
49
+ grad_noise: false
50
+ accum_grad: 4
51
+ no_forward_run: false
52
+ resume: true
53
+ train_dtype: float32
54
+ use_amp: true
55
+ log_interval: 100
56
+ use_matplotlib: true
57
+ use_tensorboard: true
58
+ create_graph_in_tensorboard: false
59
+ use_wandb: true
60
+ wandb_project: lid
61
+ wandb_id: null
62
+ wandb_entity: qingzhew-carnegie-mellon-university
63
+ wandb_name: null
64
+ wandb_model_log_interval: -1
65
+ detect_anomaly: false
66
+ use_adapter: false
67
+ adapter: lora
68
+ save_strategy: all
69
+ adapter_conf: {}
70
+ pretrain_path: null
71
+ init_param: []
72
+ ignore_init_mismatch: false
73
+ freeze_param: []
74
+ num_iters_per_epoch: 2000
75
+ batch_size: 20
76
+ valid_batch_size: null
77
+ batch_bins: 1440000
78
+ valid_batch_bins: null
79
+ category_sample_size: 10
80
+ train_shape_file:
81
+ - exp_all_no_filter_raw/spk_stats_16k/train/speech_shape
82
+ valid_shape_file:
83
+ - exp_all_no_filter_raw/spk_stats_16k/valid/speech_shape
84
+ batch_type: catpow_balance_dataset
85
+ upsampling_factor: 0.5
86
+ language_upsampling_factor: 0.5
87
+ dataset_upsampling_factor: 0.3
88
+ dataset_scaling_factor: 1.2
89
+ max_batch_size: 6
90
+ valid_batch_type: null
91
+ fold_length:
92
+ - 120000
93
+ sort_in_batch: descending
94
+ shuffle_within_batch: false
95
+ sort_batch: descending
96
+ multiple_iterator: false
97
+ chunk_length: 500
98
+ chunk_shift_ratio: 0.5
99
+ num_cache_chunks: 1024
100
+ chunk_excluded_key_prefixes: []
101
+ chunk_default_fs: null
102
+ chunk_max_abs_length: null
103
+ chunk_discard_short_samples: true
104
+ train_data_path_and_name_and_type:
105
+ - - dump/raw/train_all_no_filter_lang/wav.scp
106
+ - speech
107
+ - sound
108
+ - - dump/raw/train_all_no_filter_lang/utt2spk
109
+ - lid_labels
110
+ - text
111
+ valid_data_path_and_name_and_type:
112
+ - - dump/raw/dev_ml_superb2_lang/wav.scp
113
+ - speech
114
+ - sound
115
+ - - dump/raw/dev_ml_superb2_lang/utt2spk
116
+ - lid_labels
117
+ - text
118
+ multi_task_dataset: false
119
+ allow_variable_data_keys: false
120
+ max_cache_size: 0.0
121
+ max_cache_fd: 32
122
+ allow_multi_rates: false
123
+ valid_max_cache_size: null
124
+ exclude_weight_decay: false
125
+ exclude_weight_decay_conf: {}
126
+ optim: adam
127
+ optim_conf:
128
+ lr: 1.0e-05
129
+ betas:
130
+ - 0.9
131
+ - 0.98
132
+ scheduler: tristagelr
133
+ scheduler_conf:
134
+ max_steps: 12500
135
+ warmup_ratio: 0.1
136
+ hold_ratio: 0.4
137
+ decay_ratio: 0.5
138
+ init_lr_scale: 0.6
139
+ final_lr_scale: 0.1
140
+ init: null
141
+ use_preprocessor: true
142
+ input_size: null
143
+ target_duration: 3.0
144
+ spk2utt: dump/raw/train_all_no_filter_lang/spk2utt
145
+ spk_num: 157
146
+ sample_rate: 16000
147
+ num_eval: 10
148
+ rir_scp: ''
149
+ model: upstream_condition
150
+ model_conf:
151
+ lang2vec_conditioning_layers:
152
+ - 32
153
+ - 36
154
+ - 40
155
+ - 44
156
+ lid_conditioning_layers: []
157
+ frozen_ecapa: false
158
+ apply_intermediate_lang2vec_loss: true
159
+ apply_intermediate_lid_class_loss: false
160
+ apply_intermediate_lang2vec_condition: true
161
+ apply_intermediate_lid_class_condition: false
162
+ inter_lang2vec_loss_weight: 0.4
163
+ inter_lid_class_loss_weight: 0.0
164
+ cutoff_gradient_from_backbone: false
165
+ cutoff_gradient_before_condtrans: true
166
+ independent_module: true
167
+ use_gate: false
168
+ gate_type: null
169
+ shared_conditioning_proj: true
170
+ frontend: s3prl_condition
171
+ frontend_conf:
172
+ frontend_conf:
173
+ upstream: hf_wav2vec2_condition
174
+ path_or_url: facebook/mms-1b
175
+ download_dir: ./hub
176
+ multilayer_feature: true
177
+ specaug: null
178
+ specaug_conf: {}
179
+ normalize: utterance_mvn
180
+ normalize_conf:
181
+ norm_vars: false
182
+ encoder: ecapa_tdnn
183
+ encoder_conf:
184
+ model_scale: 8
185
+ ndim: 512
186
+ output_size: 1536
187
+ pooling: chn_attn_stat
188
+ pooling_conf: {}
189
+ projector: rawnet3
190
+ projector_conf:
191
+ output_size: 192
192
+ encoder_condition: identity
193
+ encoder_condition_conf: {}
194
+ pooling_condition: chn_attn_stat
195
+ pooling_condition_conf: {}
196
+ projector_condition: rawnet3
197
+ projector_condition_conf: {}
198
+ preprocessor: lid
199
+ preprocessor_conf:
200
+ fix_duration: false
201
+ sample_rate: 16000
202
+ noise_apply_prob: 0.0
203
+ noise_info:
204
+ - - 1.0
205
+ - dump/raw/musan_speech.scp
206
+ - - 4
207
+ - 7
208
+ - - 13
209
+ - 20
210
+ - - 1.0
211
+ - dump/raw/musan_noise.scp
212
+ - - 1
213
+ - 1
214
+ - - 0
215
+ - 15
216
+ - - 1.0
217
+ - dump/raw/musan_music.scp
218
+ - - 1
219
+ - 1
220
+ - - 5
221
+ - 15
222
+ rir_apply_prob: 0.0
223
+ rir_scp: dump/raw/rirs.scp
224
+ use_lang2vec: true
225
+ lang2vec_type: geo
226
+ loss: aamsoftmax_sc_topk_lang2vec
227
+ loss_conf:
228
+ margin: 0.5
229
+ scale: 30
230
+ K: 3
231
+ mp: 0.06
232
+ k_top: 5
233
+ lang2vec_dim: 299
234
+ lang2vec_type: geo
235
+ lang2vec_weight: 0.2
236
+ required:
237
+ - output_dir
238
+ version: '202412'
239
+ distributed: false
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/accuracy.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/backward_time.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/class_loss.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/clip.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/forward_time.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/geo_loss_all.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/geo_loss_downstream.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/gpu_max_cached_mem_GB.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/grad_norm.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer32.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer36.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer40.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_layer44.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/inter_geo_loss_mean.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/iter_time.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/loss.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/loss_scale.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/optim0_lr0.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/optim_step_time.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/images/train_time.png ADDED
exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/valid.accuracy.best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0c70504e1a4a6586fe18867fc2bf3d8f0ec17e4afa31a1e4cd7ea295c5c5057
3
+ size 3909192549
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202506'
2
+ files:
3
+ model_file: exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/valid.accuracy.best.pth
4
+ python: 3.11.8 | packaged by conda-forge | (main, Feb 16 2024, 20:53:32) [GCC 12.3.0]
5
+ timestamp: 1755503509.802293
6
+ torch: 2.4.0+cu118
7
+ yaml_files:
8
+ train_config: exp_combined/lid_mms_ecapa_upcon_32_44_it0.4_shared_trainable_raw/config.yaml