zhiyang1 commited on
Commit
f85a03b
·
verified ·
1 Parent(s): fbf2450

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -2522,3 +2522,5 @@ checkpoint-48000/images_steps-40_cfg-5/imagenet_9_ostrich,[[:space:]]Struthio[[:
2522
  checkpoint-48000/images_steps-40_cfg-5/imagenet_9_ostrich,[[:space:]]Struthio[[:space:]]camelus_6.png filter=lfs diff=lfs merge=lfs -text
2523
  checkpoint-48000/images_steps-40_cfg-5/imagenet_9_ostrich,[[:space:]]Struthio[[:space:]]camelus_8.png filter=lfs diff=lfs merge=lfs -text
2524
  checkpoint-70000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 
 
 
2522
  checkpoint-48000/images_steps-40_cfg-5/imagenet_9_ostrich,[[:space:]]Struthio[[:space:]]camelus_6.png filter=lfs diff=lfs merge=lfs -text
2523
  checkpoint-48000/images_steps-40_cfg-5/imagenet_9_ostrich,[[:space:]]Struthio[[:space:]]camelus_8.png filter=lfs diff=lfs merge=lfs -text
2524
  checkpoint-70000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
2525
+ checkpoint-78000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
2526
+ checkpoint-80000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
checkpoint-78000/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ar_steps": 1,
3
+ "architectures": [
4
+ "DiffVLMDiffusion"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "condition_layer": -1,
9
+ "eos_token_id": 151645,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1536,
12
+ "image_token_id": 151655,
13
+ "img_cross_attention_dim": 2048,
14
+ "img_diffuser_depth": 1,
15
+ "img_ffn_dim_multiplier": null,
16
+ "img_hidden_size": 1536,
17
+ "img_multiple_of": 256,
18
+ "img_norm_eps": 1e-05,
19
+ "img_num_attention_heads": 12,
20
+ "img_num_kv_heads": 12,
21
+ "img_qk_norm": true,
22
+ "in_channels": 32,
23
+ "initializer_range": 0.02,
24
+ "inject_img_diffuser": false,
25
+ "input_size": 32,
26
+ "intermediate_size": 8960,
27
+ "layer_group_size": 7,
28
+ "layerwise_start_idx": 0,
29
+ "lora_alpha": 16,
30
+ "lora_bias": "none",
31
+ "lora_dropout": 0.05,
32
+ "lora_enable": false,
33
+ "lora_r": 64,
34
+ "max_position_embeddings": 32768,
35
+ "max_window_layers": 28,
36
+ "model_type": "qwen2_vl",
37
+ "non_linearity": 1,
38
+ "norm_elementwise_affine": true,
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 28,
41
+ "num_key_value_heads": 2,
42
+ "patch_size": 1,
43
+ "repa_coeff": 0.5,
44
+ "repa_layers": null,
45
+ "repa_shared": false,
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "mrope_section": [
49
+ 16,
50
+ 24,
51
+ 24
52
+ ],
53
+ "rope_type": "default",
54
+ "type": "default"
55
+ },
56
+ "rope_theta": 1000000.0,
57
+ "sample_size": 128,
58
+ "sampling_steps": 28,
59
+ "sliding_window": null,
60
+ "tie_word_embeddings": true,
61
+ "torch_dtype": "bfloat16",
62
+ "transformers_version": "4.47.0",
63
+ "use_cache": true,
64
+ "use_repa": false,
65
+ "use_residual_attn": false,
66
+ "use_sliding_window": false,
67
+ "vae_path": "mit-han-lab/dc-ae-f32c32-sana-1.1-diffusers",
68
+ "video_token_id": 151656,
69
+ "vision_config": {
70
+ "hidden_size": 1536,
71
+ "in_chans": 3,
72
+ "model_type": "qwen2_vl",
73
+ "spatial_patch_size": 14
74
+ },
75
+ "vision_end_token_id": 151653,
76
+ "vision_start_token_id": 151652,
77
+ "vision_token_id": 151654,
78
+ "vocab_size": 151936
79
+ }
checkpoint-78000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.47.0"
6
+ }
checkpoint-78000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84ae799fabac4b2895a6c2bd32c89832368ce88c9e95491291439589a4c3f45a
3
+ size 4973325456
checkpoint-78000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c79f28b8a085cce1ff6b1b85925bfd8c7fc640426d15a3f6dc0503abec9816
3
+ size 3234250886
checkpoint-78000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-78000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302c35d7271a2b4acbf8b5f7455b644c1f3f9eaf8d95646071f00c19ed39a35f
3
+ size 7453278409
checkpoint-78000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f105253a9fca9654c8afe9bdbcb63deef2653ec4dad0e3d2d4ea43c2dc4b542
3
+ size 15024
checkpoint-78000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea21c95ec5349d7e496afb517a64fe3211cbb1980547dc1d97b3d2d127257eb
3
+ size 15024
checkpoint-78000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86e9a72faec43ba038fe6fea3a7d2f75c9133f5f75127a8a4107fe12a61818fa
3
+ size 15024
checkpoint-78000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b3b036900f9c0ecf67e7dece42178ce91585073c83d6de27af0485b984f31c9
3
+ size 15024
checkpoint-78000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d9cd208c29ca106a1ae1d1516966547030aae064dee0101ef11dce466e94f4
3
+ size 1064
checkpoint-78000/trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcbd25529dae7244b0ef67d2c836e1a53153aa4b40a492c4841dc8d969b62558
3
+ size 11993473
checkpoint-78000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f4fd6f1d6f2e71ce0aee85aa337a4a8b7f4d4efa4ed7e8760fcb9edf6bda73
3
+ size 6008
checkpoint-80000/config.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ar_steps": 1,
3
+ "architectures": [
4
+ "DiffVLMDiffusion"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "condition_layer": -1,
9
+ "eos_token_id": 151645,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 1536,
12
+ "image_token_id": 151655,
13
+ "img_cross_attention_dim": 2048,
14
+ "img_diffuser_depth": 1,
15
+ "img_ffn_dim_multiplier": null,
16
+ "img_hidden_size": 1536,
17
+ "img_multiple_of": 256,
18
+ "img_norm_eps": 1e-05,
19
+ "img_num_attention_heads": 12,
20
+ "img_num_kv_heads": 12,
21
+ "img_qk_norm": true,
22
+ "in_channels": 32,
23
+ "initializer_range": 0.02,
24
+ "inject_img_diffuser": false,
25
+ "input_size": 32,
26
+ "intermediate_size": 8960,
27
+ "layer_group_size": 7,
28
+ "layerwise_start_idx": 0,
29
+ "lora_alpha": 16,
30
+ "lora_bias": "none",
31
+ "lora_dropout": 0.05,
32
+ "lora_enable": false,
33
+ "lora_r": 64,
34
+ "max_position_embeddings": 32768,
35
+ "max_window_layers": 28,
36
+ "model_type": "qwen2_vl",
37
+ "non_linearity": 1,
38
+ "norm_elementwise_affine": true,
39
+ "num_attention_heads": 12,
40
+ "num_hidden_layers": 28,
41
+ "num_key_value_heads": 2,
42
+ "patch_size": 1,
43
+ "repa_coeff": 0.5,
44
+ "repa_layers": null,
45
+ "repa_shared": false,
46
+ "rms_norm_eps": 1e-06,
47
+ "rope_scaling": {
48
+ "mrope_section": [
49
+ 16,
50
+ 24,
51
+ 24
52
+ ],
53
+ "rope_type": "default",
54
+ "type": "default"
55
+ },
56
+ "rope_theta": 1000000.0,
57
+ "sample_size": 128,
58
+ "sampling_steps": 28,
59
+ "sliding_window": null,
60
+ "tie_word_embeddings": true,
61
+ "torch_dtype": "bfloat16",
62
+ "transformers_version": "4.47.0",
63
+ "use_cache": true,
64
+ "use_repa": false,
65
+ "use_residual_attn": false,
66
+ "use_sliding_window": false,
67
+ "vae_path": "mit-han-lab/dc-ae-f32c32-sana-1.1-diffusers",
68
+ "video_token_id": 151656,
69
+ "vision_config": {
70
+ "hidden_size": 1536,
71
+ "in_chans": 3,
72
+ "model_type": "qwen2_vl",
73
+ "spatial_patch_size": 14
74
+ },
75
+ "vision_end_token_id": 151653,
76
+ "vision_start_token_id": 151652,
77
+ "vision_token_id": 151654,
78
+ "vocab_size": 151936
79
+ }
checkpoint-80000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.47.0"
6
+ }
checkpoint-80000/model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a936c23e1923bbea9b8bc27e077e7640e0a66ef3798da06af868e11a0e00a10
3
+ size 4973325456
checkpoint-80000/model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e63927b4e219c8ac25a15d36e76afbefdcb2ef7da79dc1672a3dfba229ddcee
3
+ size 3234250886
checkpoint-80000/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-80000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab68dbbf433be97581e657124c82ab62bf3439e2e1538456a1ef76a1747655d1
3
+ size 7453278409
checkpoint-80000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03a5855f058fec5b4fa8a5f276f8c45ba5fcf8820466870d2a4b978fcd2ed7a4
3
+ size 15024
checkpoint-80000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:935da6131c460a190f3b98eb61979cb6c8aec021e27a66f315c63ab63d2d4407
3
+ size 15024
checkpoint-80000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c936aef35c5c8f0121c0d3145053eabeaee8a4a3bc98d75cc26136f8b25cbe2f
3
+ size 15024
checkpoint-80000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:576b89a6d32556fbb8501dfe92221f403d73ed0840e6f54765ca0a6c9cfdde78
3
+ size 15024
checkpoint-80000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294ec2260830a6fd101e2e74185a511806e6b97a9a6ef33b3033c30f9ad86a0a
3
+ size 1064
checkpoint-80000/trainer_state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4aa65c3ee4fb1f766ea650e3ff11868e2a7afe2030e724b5d167dbee77b9c62
3
+ size 12301703
checkpoint-80000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f4fd6f1d6f2e71ce0aee85aa337a4a8b7f4d4efa4ed7e8760fcb9edf6bda73
3
+ size 6008