Update wavtokenizer_mediumdata_music_audio_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
Browse files
wavtokenizer_mediumdata_music_audio_frame75_3s_nq1_code4096_dim512_kmeans200_attn.yaml
CHANGED
@@ -1,24 +1,24 @@
|
|
1 |
seed_everything: 3407
|
2 |
|
3 |
data:
|
4 |
-
class_path:
|
5 |
init_args:
|
6 |
train_params:
|
7 |
-
filelist_path:
|
8 |
sampling_rate: 24000
|
9 |
num_samples: 72000
|
10 |
batch_size: 39 #18
|
11 |
num_workers: 8
|
12 |
|
13 |
val_params:
|
14 |
-
filelist_path:
|
15 |
sampling_rate: 24000
|
16 |
num_samples: 72000
|
17 |
batch_size: 2 # 10
|
18 |
num_workers: 8
|
19 |
|
20 |
model:
|
21 |
-
class_path:
|
22 |
init_args:
|
23 |
sample_rate: 24000
|
24 |
initial_learning_rate: 2e-4
|
@@ -32,12 +32,12 @@ model:
|
|
32 |
evaluate_pesq: true
|
33 |
evaluate_periodicty: true
|
34 |
|
35 |
-
resume:
|
36 |
resume_config: /cpfs_speech/jishengpeng/Code/WavTokenizer/configs/wavtokenizer_smalldata_frame75_3s_nq1_code16384_dim512_kmeans800_attn.yaml
|
37 |
-
resume_model: /cpfs_speech/jishengpeng/Code/WavTokenizer/result/train/wavtokenizer_mediumdata_music_audio_frame75_3s_nq1_code4096_dim512_kmeans200_attn/lightning_logs/version_2/checkpoints/
|
38 |
|
39 |
feature_extractor:
|
40 |
-
class_path:
|
41 |
init_args:
|
42 |
encodec_model: encodec_24khz
|
43 |
bandwidths: [6.6, 6.6, 6.6, 6.6]
|
@@ -48,7 +48,7 @@ model:
|
|
48 |
vq_kmeans: 200
|
49 |
|
50 |
backbone:
|
51 |
-
class_path:
|
52 |
init_args:
|
53 |
input_channels: 512
|
54 |
dim: 768
|
@@ -57,18 +57,18 @@ model:
|
|
57 |
adanorm_num_embeddings: 4 # len(bandwidths)
|
58 |
|
59 |
head:
|
60 |
-
class_path:
|
61 |
init_args:
|
62 |
dim: 768
|
63 |
-
n_fft: 1280
|
64 |
-
hop_length: 320
|
65 |
padding: same
|
66 |
|
67 |
trainer:
|
68 |
logger:
|
69 |
class_path: pytorch_lightning.loggers.TensorBoardLogger
|
70 |
init_args:
|
71 |
-
save_dir:
|
72 |
callbacks:
|
73 |
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
|
74 |
- class_path: pytorch_lightning.callbacks.ModelSummary
|
@@ -77,10 +77,10 @@ trainer:
|
|
77 |
- class_path: pytorch_lightning.callbacks.ModelCheckpoint
|
78 |
init_args:
|
79 |
monitor: val_loss
|
80 |
-
filename:
|
81 |
save_top_k: 10
|
82 |
save_last: true
|
83 |
-
- class_path:
|
84 |
|
85 |
# Lightning calculates max_steps across all optimizer steps (rather than number of batches)
|
86 |
# This equals to 1M steps per generator and 1M per discriminator
|
|
|
1 |
seed_everything: 3407
|
2 |
|
3 |
data:
|
4 |
+
class_path: decoder.dataset.VocosDataModule
|
5 |
init_args:
|
6 |
train_params:
|
7 |
+
filelist_path: ./WavTokenizer/medium_train_audio_music
|
8 |
sampling_rate: 24000
|
9 |
num_samples: 72000
|
10 |
batch_size: 39 #18
|
11 |
num_workers: 8
|
12 |
|
13 |
val_params:
|
14 |
+
filelist_path: ./WavTokenizer/medium_test_audio_music
|
15 |
sampling_rate: 24000
|
16 |
num_samples: 72000
|
17 |
batch_size: 2 # 10
|
18 |
num_workers: 8
|
19 |
|
20 |
model:
|
21 |
+
class_path: decoder.experiment.VocosEncodecExp
|
22 |
init_args:
|
23 |
sample_rate: 24000
|
24 |
initial_learning_rate: 2e-4
|
|
|
32 |
evaluate_pesq: true
|
33 |
evaluate_periodicty: true
|
34 |
|
35 |
+
resume: false
|
36 |
resume_config: /cpfs_speech/jishengpeng/Code/WavTokenizer/configs/wavtokenizer_smalldata_frame75_3s_nq1_code16384_dim512_kmeans800_attn.yaml
|
37 |
+
resume_model: /cpfs_speech/jishengpeng/Code/WavTokenizer/result/train/wavtokenizer_mediumdata_music_audio_frame75_3s_nq1_code4096_dim512_kmeans200_attn/lightning_logs/version_2/checkpoints/example.ckpt
|
38 |
|
39 |
feature_extractor:
|
40 |
+
class_path: decoder.feature_extractors.EncodecFeatures
|
41 |
init_args:
|
42 |
encodec_model: encodec_24khz
|
43 |
bandwidths: [6.6, 6.6, 6.6, 6.6]
|
|
|
48 |
vq_kmeans: 200
|
49 |
|
50 |
backbone:
|
51 |
+
class_path: decoder.models.VocosBackbone
|
52 |
init_args:
|
53 |
input_channels: 512
|
54 |
dim: 768
|
|
|
57 |
adanorm_num_embeddings: 4 # len(bandwidths)
|
58 |
|
59 |
head:
|
60 |
+
class_path: decoder.heads.ISTFTHead
|
61 |
init_args:
|
62 |
dim: 768
|
63 |
+
n_fft: 1280
|
64 |
+
hop_length: 320
|
65 |
padding: same
|
66 |
|
67 |
trainer:
|
68 |
logger:
|
69 |
class_path: pytorch_lightning.loggers.TensorBoardLogger
|
70 |
init_args:
|
71 |
+
save_dir: ./WavTokenizer/result/train/wavtokenizer_mediumdata_music_audio_frame75_3s_nq1_code4096_dim512_kmeans200_attn/
|
72 |
callbacks:
|
73 |
- class_path: pytorch_lightning.callbacks.LearningRateMonitor
|
74 |
- class_path: pytorch_lightning.callbacks.ModelSummary
|
|
|
77 |
- class_path: pytorch_lightning.callbacks.ModelCheckpoint
|
78 |
init_args:
|
79 |
monitor: val_loss
|
80 |
+
filename: wavtokenizer_checkpoint_{epoch}_{step}_{val_loss:.4f}
|
81 |
save_top_k: 10
|
82 |
save_last: true
|
83 |
+
- class_path: decoder.helpers.GradNormCallback
|
84 |
|
85 |
# Lightning calculates max_steps across all optimizer steps (rather than number of batches)
|
86 |
# This equals to 1M steps per generator and 1M per discriminator
|