Symlon commited on
Commit
e1e2753
·
1 Parent(s): 5cf7a79

Add model checkpoint and config files

Browse files
Files changed (5) hide show
  1. checkpoint.pt +3 -0
  2. checkpoint_.pt +3 -0
  3. config.yaml +106 -0
  4. log.txt +0 -0
  5. model.txt +47 -0
checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5262e8184d15d3876a7376e559e6a5643cad669864495687214397133875d3
3
+ size 10801103890
checkpoint_.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5262e8184d15d3876a7376e559e6a5643cad669864495687214397133875d3
3
+ size 10801103890
config.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_dir: .exp/diffusion/imagenet_512/dc_ae_f32c32_in_1.0/dit_xl_1/bs_1024_lr_2e-4_fp16
2
+ seed: 0
3
+ allow_tf32: true
4
+ resolution: 512
5
+ amp: fp16
6
+ cfg_scale: 1.0
7
+ evaluate_split: test
8
+ evaluate_dir_name: null
9
+ num_save_images: 64
10
+ save_all_images: false
11
+ save_images_at_all_procs: false
12
+ evaluate_dataset: sample_class
13
+ sample_class:
14
+ name: sample_class
15
+ batch_size: 32
16
+ n_worker: 8
17
+ train_drop_last: true
18
+ seed: 0
19
+ num_classes: 1000
20
+ num_samples: 50000
21
+ autoencoder: dc-ae-f32c32-in-1.0
22
+ autoencoder_dtype: fp32
23
+ scaling_factor: 0.3189
24
+ model: dit
25
+ dit:
26
+ name: DiT
27
+ input_size: 16
28
+ patch_size: 1
29
+ in_channels: 32
30
+ hidden_size: 1152
31
+ depth: 28
32
+ num_heads: 16
33
+ mlp_ratio: 4.0
34
+ post_norm: false
35
+ class_dropout_prob: 0.1
36
+ num_classes: 1000
37
+ learn_sigma: true
38
+ unconditional: false
39
+ use_checkpoint: true
40
+ pretrained_path: null
41
+ pretrained_source: dc-ae
42
+ eval_scheduler: GaussianDiffusion
43
+ num_inference_steps: 250
44
+ train_scheduler: GaussianDiffusion
45
+ uvit:
46
+ name: UViT
47
+ input_size: 32
48
+ patch_size: 2
49
+ in_channels: 4
50
+ hidden_size: 1152
51
+ depth: 28
52
+ num_heads: 16
53
+ mlp_ratio: 4.0
54
+ mlp_time_embed: false
55
+ qkv_bias: false
56
+ act_layer: gelu
57
+ use_checkpoint: true
58
+ class_dropout_prob: 0.1
59
+ num_classes: 1000
60
+ pretrained_path: null
61
+ pretrained_source: dc-ae
62
+ eval_scheduler: DPM_Solver
63
+ num_inference_steps: 30
64
+ train_scheduler: DPM_Solver
65
+ attn_mode: null
66
+ compute_fid: true
67
+ fid:
68
+ save_path: null
69
+ ref_path: assets/data/fid/imagenet_512_train.npz
70
+ compute_inception_score: true
71
+ inception_score: {}
72
+ train_dataset: latent_imagenet
73
+ latent_imagenet:
74
+ name: latent_imagenet
75
+ batch_size: 32
76
+ n_worker: 8
77
+ train_drop_last: true
78
+ seed: 0
79
+ data_dir: assets/data/latent/dc_ae_f32c32_in_1.0/imagenet_512
80
+ resume: true
81
+ resume_path: null
82
+ resume_schedule: true
83
+ num_epochs: null
84
+ max_steps: 3000000
85
+ clip_grad: null
86
+ num_store_images: 64
87
+ save_checkpoint_steps: 1000
88
+ evaluate_steps: 20000
89
+ optimizer:
90
+ name: adamw
91
+ lr: 0.0002
92
+ warmup_lr: 0.0
93
+ weight_decay: 0.0
94
+ no_wd_keys: []
95
+ betas:
96
+ - 0.9
97
+ - 0.999
98
+ lr_scheduler:
99
+ name: constant
100
+ warmup_steps: 1000
101
+ log: true
102
+ wandb_entity: null
103
+ wandb_project: null
104
+ ema_decay: 0.9999
105
+ ema_warmup_steps: 2000
106
+ evaluate_ema: true
log.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.txt ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DiT(
2
+ (x_embedder): PatchEmbed(
3
+ (proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1))
4
+ (norm): Identity()
5
+ )
6
+ (t_embedder): TimestepEmbedder(
7
+ (mlp): Sequential(
8
+ (0): Linear(in_features=256, out_features=1152, bias=True)
9
+ (1): SiLU()
10
+ (2): Linear(in_features=1152, out_features=1152, bias=True)
11
+ )
12
+ )
13
+ (y_embedder): LabelEmbedder(
14
+ (embedding_table): Embedding(1001, 1152)
15
+ )
16
+ (blocks): ModuleList(
17
+ (0-27): 28 x DiTBlock(
18
+ (norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
19
+ (attn): Attention(
20
+ (qkv): Linear(in_features=1152, out_features=3456, bias=True)
21
+ (attn_drop): Dropout(p=0.0, inplace=False)
22
+ (proj): Linear(in_features=1152, out_features=1152, bias=True)
23
+ (proj_drop): Dropout(p=0.0, inplace=False)
24
+ )
25
+ (norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
26
+ (mlp): Mlp(
27
+ (fc1): Linear(in_features=1152, out_features=4608, bias=True)
28
+ (act): GELU(approximate='tanh')
29
+ (drop1): Dropout(p=0, inplace=False)
30
+ (fc2): Linear(in_features=4608, out_features=1152, bias=True)
31
+ (drop2): Dropout(p=0, inplace=False)
32
+ )
33
+ (adaLN_modulation): Sequential(
34
+ (0): SiLU()
35
+ (1): Linear(in_features=1152, out_features=6912, bias=True)
36
+ )
37
+ )
38
+ )
39
+ (final_layer): FinalLayer(
40
+ (norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False)
41
+ (linear): Linear(in_features=1152, out_features=64, bias=True)
42
+ (adaLN_modulation): Sequential(
43
+ (0): SiLU()
44
+ (1): Linear(in_features=1152, out_features=2304, bias=True)
45
+ )
46
+ )
47
+ )