|
DiT( |
|
(x_embedder): PatchEmbed( |
|
(proj): Conv2d(32, 1152, kernel_size=(1, 1), stride=(1, 1)) |
|
(norm): Identity() |
|
) |
|
(t_embedder): TimestepEmbedder( |
|
(mlp): Sequential( |
|
(0): Linear(in_features=256, out_features=1152, bias=True) |
|
(1): SiLU() |
|
(2): Linear(in_features=1152, out_features=1152, bias=True) |
|
) |
|
) |
|
(y_embedder): LabelEmbedder( |
|
(embedding_table): Embedding(1001, 1152) |
|
) |
|
(blocks): ModuleList( |
|
(0-27): 28 x DiTBlock( |
|
(norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=False) |
|
(attn): Attention( |
|
(qkv): Linear(in_features=1152, out_features=3456, bias=True) |
|
(attn_drop): Dropout(p=0.0, inplace=False) |
|
(proj): Linear(in_features=1152, out_features=1152, bias=True) |
|
(proj_drop): Dropout(p=0.0, inplace=False) |
|
) |
|
(norm2): LayerNorm((1152,), eps=1e-06, elementwise_affine=False) |
|
(mlp): Mlp( |
|
(fc1): Linear(in_features=1152, out_features=4608, bias=True) |
|
(act): GELU(approximate='tanh') |
|
(drop1): Dropout(p=0, inplace=False) |
|
(fc2): Linear(in_features=4608, out_features=1152, bias=True) |
|
(drop2): Dropout(p=0, inplace=False) |
|
) |
|
(adaLN_modulation): Sequential( |
|
(0): SiLU() |
|
(1): Linear(in_features=1152, out_features=6912, bias=True) |
|
) |
|
) |
|
) |
|
(final_layer): FinalLayer( |
|
(norm_final): LayerNorm((1152,), eps=1e-06, elementwise_affine=False) |
|
(linear): Linear(in_features=1152, out_features=64, bias=True) |
|
(adaLN_modulation): Sequential( |
|
(0): SiLU() |
|
(1): Linear(in_features=1152, out_features=2304, bias=True) |
|
) |
|
) |
|
) |