Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- added_tokens.json +0 -0
- config.json +55 -0
- generation_config.json +13 -0
- global_step336000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
- global_step336000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt +3 -0
- global_step336000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt +3 -0
- global_step336000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt +3 -0
- global_step336000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- latest +1 -0
- merges.txt +0 -0
- model.safetensors.index.json +0 -0
- rng_state_0.pth +3 -0
- rng_state_1.pth +3 -0
- rng_state_10.pth +3 -0
- rng_state_11.pth +3 -0
- rng_state_12.pth +3 -0
- rng_state_13.pth +3 -0
- rng_state_14.pth +3 -0
- rng_state_15.pth +3 -0
- rng_state_16.pth +3 -0
- rng_state_17.pth +3 -0
- rng_state_18.pth +3 -0
- rng_state_19.pth +3 -0
- rng_state_2.pth +3 -0
- rng_state_20.pth +3 -0
- rng_state_21.pth +3 -0
- rng_state_22.pth +3 -0
- rng_state_23.pth +3 -0
- rng_state_24.pth +3 -0
- rng_state_25.pth +3 -0
- rng_state_26.pth +3 -0
- rng_state_27.pth +3 -0
- rng_state_28.pth +3 -0
- rng_state_29.pth +3 -0
- rng_state_3.pth +3 -0
- rng_state_30.pth +3 -0
- rng_state_31.pth +3 -0
- rng_state_4.pth +3 -0
- rng_state_5.pth +3 -0
- rng_state_6.pth +3 -0
- rng_state_7.pth +3 -0
- rng_state_8.pth +3 -0
- rng_state_9.pth +3 -0
- special_tokens_map.json +31 -0
- tokenizer.json +3 -0
- tokenizer_config.json +3 -0
- trainer_state.json +3 -0
- training_args.bin +3 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer_config.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
trainer_state.json filter=lfs diff=lfs merge=lfs -text
|
38 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"LlavaQwenForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"diffusion_name_or_path": "Efficient-Large-Model/SANA1.5_1.6B_1024px_diffusers",
|
9 |
+
"eos_token_id": 151645,
|
10 |
+
"head_dim": 128,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 2048,
|
13 |
+
"image_aspect_ratio": "square",
|
14 |
+
"image_end_tag_id": 151670,
|
15 |
+
"image_end_token_id": 217209,
|
16 |
+
"image_start_tag_id": 151669,
|
17 |
+
"image_start_token_id": 151674,
|
18 |
+
"initializer_range": 0.02,
|
19 |
+
"intermediate_size": 6144,
|
20 |
+
"max_position_embeddings": 40960,
|
21 |
+
"max_window_layers": 28,
|
22 |
+
"mm_hidden_size": 1152,
|
23 |
+
"mm_patch_merge_type": "flat",
|
24 |
+
"mm_tunable_parts": "mm_language_model",
|
25 |
+
"mm_use_im_start_end": true,
|
26 |
+
"mm_vision_select_feature": "patch",
|
27 |
+
"mm_vision_select_layer": -2,
|
28 |
+
"mm_vision_tower": "/fsx/sfr/data/jiuhai/hub/models--csuhan--TA-Tok/snapshots/ac4dbb76a42d8b8bc92532b6fdb90c42e2d0a447/ta_tok.pth",
|
29 |
+
"mm_vision_tower_lr": null,
|
30 |
+
"model_type": "qwen3",
|
31 |
+
"num_attention_heads": 16,
|
32 |
+
"num_hidden_layers": 28,
|
33 |
+
"num_image_tokens": 65536,
|
34 |
+
"num_key_value_heads": 8,
|
35 |
+
"num_new_tokens": 65536,
|
36 |
+
"num_scale_tokens": 3,
|
37 |
+
"pos_skipping_range": 4096,
|
38 |
+
"rms_norm_eps": 1e-06,
|
39 |
+
"rope_scaling": null,
|
40 |
+
"rope_theta": 1000000,
|
41 |
+
"scale_end_token_id": 151673,
|
42 |
+
"scale_start_token_id": 151671,
|
43 |
+
"sliding_window": null,
|
44 |
+
"tie_word_embeddings": true,
|
45 |
+
"tokenizer_model_max_length": 2048,
|
46 |
+
"tokenizer_padding_side": "right",
|
47 |
+
"torch_dtype": "bfloat16",
|
48 |
+
"transformers_version": "4.51.3",
|
49 |
+
"use_cache": false,
|
50 |
+
"use_mm_proj": true,
|
51 |
+
"use_pos_skipping": false,
|
52 |
+
"use_sliding_window": false,
|
53 |
+
"vision_tower_pretrained": null,
|
54 |
+
"vocab_size": 217210
|
55 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 151643,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": [
|
5 |
+
151645,
|
6 |
+
151643
|
7 |
+
],
|
8 |
+
"pad_token_id": 151643,
|
9 |
+
"temperature": 0.6,
|
10 |
+
"top_k": 20,
|
11 |
+
"top_p": 0.95,
|
12 |
+
"transformers_version": "4.51.3"
|
13 |
+
}
|
global_step336000/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cdf799c265c5f664d943a104f90d535058aebe2467cea4fd7fee56094d966d1
|
3 |
+
size 702932887
|
global_step336000/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b59c95ce3ebbd68b9ba4d5d7589407450014a6f38326bf85a20d925c0b49d92c
|
3 |
+
size 702933079
|
global_step336000/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44d272dbf974f9152a1f059f62b328a04a8257a0081b9636de499c39b97368a0
|
3 |
+
size 702932887
|
global_step336000/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43370983b67de93ef4e8e4b3b329975f6a45a93a5475712d0c411b82955d5893
|
3 |
+
size 702933079
|
global_step336000/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5aaad2d1f98047c1cdfdb2013c4f06339e3a64107fea7b972327e35200fde734
|
3 |
+
size 702932048
|
latest
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
global_step336000
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfbf3a93ea2fd6af0447b3c08e437c33f74355fe43bd990a2e877f9d777c37de
|
3 |
+
size 15920
|
rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e07fea7a8fd74cd9034b2218ea7194c0839396b3d00db1ac217618319daee0cd
|
3 |
+
size 15920
|
rng_state_10.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af2f2cc068d265366e8c73805bbe2c815be680c30d74781be11b09403ba1a077
|
3 |
+
size 15933
|
rng_state_11.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98749aedd46278865b70e368188f3d6fe1741b581f68480f16f947569a6b89aa
|
3 |
+
size 15933
|
rng_state_12.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0682422747eaf8eeabfd55efcd7f7b8683bab141309988c2ddb085f60c3a6331
|
3 |
+
size 15933
|
rng_state_13.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4ccde51d36b282fe4a74a3a196b133fe38d0d4e2479fbcfafbce8d29dcee70c
|
3 |
+
size 15933
|
rng_state_14.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7253d19e57bbecc244671ae253a5095c52aa4e0295b8fe4338d8431186f1d7d
|
3 |
+
size 15933
|
rng_state_15.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c0f3fcfd4dd30a0a539f5b42028ddbd57bf12caaf98db77228eeb4ca2923c11
|
3 |
+
size 15933
|
rng_state_16.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:181608d3996463ef618ecb026d5baae69d8a68164f0e520d4872d2218bd22c16
|
3 |
+
size 15933
|
rng_state_17.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0b6885374e8fcf7b3dc30869e5f80a3428c4d0b9695d925e696149118efb9be
|
3 |
+
size 15933
|
rng_state_18.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:071dc61a6a2ae0de5c34125be067fb4392a931bf121fc25d71283dd961a8b76d
|
3 |
+
size 15933
|
rng_state_19.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cf0c5221a93bbdea7ff07d19688b295ff7bb7a6c29892478a7c0c1e92d72228
|
3 |
+
size 15933
|
rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ce12b772209c6975f4bbdcab4f7cdc2bab62accfb01f001151a468fee01e498
|
3 |
+
size 15920
|
rng_state_20.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fb9aae6d5cdad174a93713762bce69618c5a452ea2fc66d97bb626507552c8c
|
3 |
+
size 15933
|
rng_state_21.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:467b736e849b5e0aa02e34d59ea320b507974d5de603a7db1b72cd7c108ad8cf
|
3 |
+
size 15933
|
rng_state_22.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d07e2adfa56b9a27ddf3b9b6e5cc20401894fc15ea9b36c736af0fd610d036f
|
3 |
+
size 15933
|
rng_state_23.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb8274c755286261911fe1402d962d14c056c4cc4c6eb0d8d8fd1a1ea0e290d3
|
3 |
+
size 15933
|
rng_state_24.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a50ffa48f0233ae45c301fa6920573da13bdf8fd44ddcaa48f55183e8b470c9
|
3 |
+
size 15933
|
rng_state_25.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc56ce423ee26104c1cb8f4c53ef0ed88a3dc9079b6588fe0dec645aa5f9aadc
|
3 |
+
size 15933
|
rng_state_26.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d29d8d05878da34d1193a6ef897db19d5c2d2f9e85f18493b4297a54c143036b
|
3 |
+
size 15933
|
rng_state_27.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5e4a82f38fd2c214c01bd5b320be6b53cdfdf33f46037269ff65fecad57a3f0
|
3 |
+
size 15933
|
rng_state_28.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:110c52848a7135de3c20a854a50283387ebedbe8079a07560b36dbe93756df05
|
3 |
+
size 15933
|
rng_state_29.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0983e8bfe96b0ae0fc27ff006076ef46d37c3587b8c3df6ec8665b6400998550
|
3 |
+
size 15933
|
rng_state_3.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68c304bacda072f0bd316bfb1da834ec6b9339cfc9166d6e051f11707e81d9c0
|
3 |
+
size 15920
|
rng_state_30.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66107ce55d0ae8583205c90819716e8858da1d078c5b7857590d931694127330
|
3 |
+
size 15933
|
rng_state_31.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10c2a5b01e439b08654bf1368207f34556d0220bf7e225dc6a6eab115967de11
|
3 |
+
size 15933
|
rng_state_4.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67266457cbd516b91c79d9c05c53f3a99dce9a94aa7e7054ec68073dbf2721e0
|
3 |
+
size 15920
|
rng_state_5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8417c6231e84c539a6252368eab51eab5f398bd5eedb62b2696f8c8e94207e74
|
3 |
+
size 15920
|
rng_state_6.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8511bbfbdb5e61ca72d4908f93d7312b55286de4c11e8f8f85f6e3821adf1e88
|
3 |
+
size 15920
|
rng_state_7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1688a5b2a7efc15d086844a150e22cabfe7a10dc71bd9c31c05736d244e714b
|
3 |
+
size 15920
|
rng_state_8.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da3f793232767178f6621edf393c801a9a5a716df3c3e8173b3f3a0d4f1fced5
|
3 |
+
size 15920
|
rng_state_9.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86a1721f10b6b30ec13c2ab33cf5dd58abc23c3dab6e788a136a03c51856d535
|
3 |
+
size 15920
|
special_tokens_map.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>",
|
5 |
+
"<|object_ref_start|>",
|
6 |
+
"<|object_ref_end|>",
|
7 |
+
"<|box_start|>",
|
8 |
+
"<|box_end|>",
|
9 |
+
"<|quad_start|>",
|
10 |
+
"<|quad_end|>",
|
11 |
+
"<|vision_start|>",
|
12 |
+
"<|vision_end|>",
|
13 |
+
"<|vision_pad|>",
|
14 |
+
"<|image_pad|>",
|
15 |
+
"<|video_pad|>"
|
16 |
+
],
|
17 |
+
"eos_token": {
|
18 |
+
"content": "<|im_end|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
},
|
24 |
+
"pad_token": {
|
25 |
+
"content": "<|endoftext|>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": false,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
}
|
31 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:199f2519b835cd3333c7926d458699c990256337ce10988527a10004b735c8f0
|
3 |
+
size 23536619
|
tokenizer_config.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93dc1094a0340b71d081e72e9e582d6b3f40b7d67765688ffe41016ae2f9e593
|
3 |
+
size 11468342
|
trainer_state.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:597afc78efc0026a2ace1ef543495225eabda8993f90bc255af38adc45fac8b2
|
3 |
+
size 11883205
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1337ac8e4c3eb5b0dfc493b2ac86ec811a4a2d0f5e8afb2ec17633d49fed1afa
|
3 |
+
size 7224
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|