Delta-Vector commited on
Commit
347e531
·
verified ·
1 Parent(s): 0f451fa

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: []
3
+ library_name: transformers
4
+ tags:
5
+ - mergekit
6
+ - merge
7
+
8
+ ---
9
+ # GLM-Mag
10
+
11
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
+
13
+ ## Merge Details
14
+ ### Merge Method
15
+
16
+ This model was merged using the Passthrough merge method using /home/quixi/storage/models/GLM-Tulu + /home/quixi/storage/models/Rei-SFT-Lora as a base.
17
+
18
+ ### Models Merged
19
+
20
+ The following models were included in the merge:
21
+
22
+
23
+ ### Configuration
24
+
25
+ The following YAML configuration was used to produce this model:
26
+
27
+ ```yaml
28
+ base_model: /home/quixi/storage/models/GLM-Tulu+/home/quixi/storage/models/Rei-SFT-Lora
29
+ dtype: bfloat16
30
+ merge_method: passthrough
31
+ models:
32
+ - model: /home/quixi/storage/models/GLM-Tulu+/home/quixi/storage/models/Rei-SFT-Lora
33
+
34
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '
2
+ ' + message['content'] + '<|im_end|>' + '
3
+ '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
4
+ ' }}{% endif %}
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Glm4ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "eos_token_id": 151343,
8
+ "head_dim": 128,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 6144,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 23040,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "glm4",
15
+ "num_attention_heads": 48,
16
+ "num_hidden_layers": 61,
17
+ "num_key_value_heads": 2,
18
+ "pad_token_id": 151329,
19
+ "partial_rotary_factor": 0.5,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_theta": 10000.0,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.52.4",
25
+ "use_cache": false,
26
+ "vocab_size": 151345
27
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ base_model: /home/quixi/storage/models/GLM-Tulu+/home/quixi/storage/models/Rei-SFT-Lora
2
+ dtype: bfloat16
3
+ merge_method: passthrough
4
+ models:
5
+ - model: /home/quixi/storage/models/GLM-Tulu+/home/quixi/storage/models/Rei-SFT-Lora
model-00001-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9c87b8d7ebf09a17326af2457ffa2ad33658dcf62557812f0ab44cedc4fed64
3
+ size 4726150632
model-00002-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cc3f799036608ba1a35053b78d8753ab5dc92fbcb8a6062db8213ab7b2ac6c1
3
+ size 4954760608
model-00003-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b37eeefbf8bf8339c2e3aa0c94a244fb774fbe0f59d3af3e317921d5ae920d6
3
+ size 4957918848
model-00004-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24780b6ff5c3b6b8ff1a1042ff82c63b032063f45ba2f3b78cd6f1a7e122965b
3
+ size 4464002216
model-00005-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfbf92e624994b0114a001892011b09191089b78a94cafffb817392f6ebae38e
3
+ size 4750300808
model-00006-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1aec20a05df5cb73d9cd5808f1b04a666ea44d9ca86646ac715d73386974205
3
+ size 4954760608
model-00007-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0bc2cbba09745dd23bc2977409011c5bee4190de1eb9e1aa0b1e5c3c8cb5bff
3
+ size 4957918848
model-00008-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e86ac0d12afe2260d4529726a9f8f5cfb295ca8760a75681c43e9c6e61efda
3
+ size 4464002224
model-00009-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a36201a66c4b60be2cddd4d863b8c8775924b8664f8cb7ab5e525c2d25261248
3
+ size 4750300800
model-00010-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f4765068a78fad2faad69fcecaa155342f74099557f537ab29e7179159bc62a
3
+ size 4954760616
model-00011-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9078d5996e0da21352a342edfcbc4c6ce610a12a7b8da3a1d518dbb187a50e8
3
+ size 4957918840
model-00012-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:352d3b06a74dbfe21a36fa1d4865ace4ecfdc2bfeee8cf92894789a0d27a038d
3
+ size 4464002224
model-00013-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6142485d54fffdc1e816ac8752acf3dcb34f972886231138152bcf77b30640e
3
+ size 4750300800
model-00014-of-00014.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57997255ae102b0bc7c8eff9bf741917898789783f6c26d9bd47c1fd3c9b4664
3
+ size 3020049832
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.1.3"}, "weight_map": {"lm_head.weight": "model-00001-of-00014.safetensors", "model.embed_tokens.weight": "model-00001-of-00014.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00014.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00001-of-00014.safetensors", "model.layers.0.mlp.gate_up_proj.weight": "model-00001-of-00014.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00014.safetensors", "model.layers.0.post_mlp_layernorm.weight": "model-00001-of-00014.safetensors", "model.layers.0.post_self_attn_layernorm.weight": "model-00001-of-00014.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00014.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00014.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00014.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00014.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00014.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00002-of-00014.safetensors", "model.layers.1.mlp.gate_up_proj.weight": "model-00002-of-00014.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.1.post_mlp_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.1.post_self_attn_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", "model.layers.10.input_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00002-of-00014.safetensors", "model.layers.10.mlp.gate_up_proj.weight": "model-00002-of-00014.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.10.post_mlp_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.10.post_self_attn_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", "model.layers.11.input_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00002-of-00014.safetensors", "model.layers.11.mlp.gate_up_proj.weight": "model-00002-of-00014.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.11.post_mlp_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.11.post_self_attn_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", "model.layers.12.input_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00002-of-00014.safetensors", "model.layers.12.mlp.gate_up_proj.weight": "model-00002-of-00014.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.12.post_mlp_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.12.post_self_attn_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", "model.layers.13.input_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00002-of-00014.safetensors", "model.layers.13.mlp.gate_up_proj.weight": "model-00002-of-00014.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.13.post_mlp_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.13.post_self_attn_layernorm.weight": "model-00002-of-00014.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", "model.layers.14.input_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00003-of-00014.safetensors", "model.layers.14.mlp.gate_up_proj.weight": "model-00003-of-00014.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.14.post_mlp_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.14.post_self_attn_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", "model.layers.15.input_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00003-of-00014.safetensors", "model.layers.15.mlp.gate_up_proj.weight": "model-00003-of-00014.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.15.post_mlp_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.15.post_self_attn_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", "model.layers.16.input_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00003-of-00014.safetensors", "model.layers.16.mlp.gate_up_proj.weight": "model-00003-of-00014.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.16.post_mlp_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.16.post_self_attn_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", "model.layers.17.input_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00003-of-00014.safetensors", "model.layers.17.mlp.gate_up_proj.weight": "model-00003-of-00014.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.17.post_mlp_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.17.post_self_attn_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", "model.layers.18.input_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00003-of-00014.safetensors", "model.layers.18.mlp.gate_up_proj.weight": "model-00003-of-00014.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.18.post_mlp_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.18.post_self_attn_layernorm.weight": "model-00003-of-00014.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", "model.layers.19.input_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00004-of-00014.safetensors", "model.layers.19.mlp.gate_up_proj.weight": "model-00004-of-00014.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.19.post_mlp_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.19.post_self_attn_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", "model.layers.2.input_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00004-of-00014.safetensors", "model.layers.2.mlp.gate_up_proj.weight": "model-00004-of-00014.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.2.post_mlp_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.2.post_self_attn_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", "model.layers.20.input_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00004-of-00014.safetensors", "model.layers.20.mlp.gate_up_proj.weight": "model-00004-of-00014.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.20.post_mlp_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.20.post_self_attn_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", "model.layers.21.input_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00004-of-00014.safetensors", "model.layers.21.mlp.gate_up_proj.weight": "model-00004-of-00014.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.21.post_mlp_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.21.post_self_attn_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", "model.layers.22.input_layernorm.weight": "model-00004-of-00014.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00004-of-00014.safetensors", "model.layers.22.mlp.gate_up_proj.weight": "model-00005-of-00014.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.22.post_mlp_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.22.post_self_attn_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", "model.layers.23.input_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00005-of-00014.safetensors", "model.layers.23.mlp.gate_up_proj.weight": "model-00005-of-00014.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.23.post_mlp_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.23.post_self_attn_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", "model.layers.24.input_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00005-of-00014.safetensors", "model.layers.24.mlp.gate_up_proj.weight": "model-00005-of-00014.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.24.post_mlp_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.24.post_self_attn_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", "model.layers.25.input_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00005-of-00014.safetensors", "model.layers.25.mlp.gate_up_proj.weight": "model-00005-of-00014.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.25.post_mlp_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.25.post_self_attn_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", "model.layers.26.input_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00005-of-00014.safetensors", "model.layers.26.mlp.gate_up_proj.weight": "model-00005-of-00014.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.26.post_mlp_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.26.post_self_attn_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", "model.layers.27.input_layernorm.weight": "model-00005-of-00014.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00006-of-00014.safetensors", "model.layers.27.mlp.gate_up_proj.weight": "model-00006-of-00014.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.27.post_mlp_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.27.post_self_attn_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", "model.layers.28.input_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.28.mlp.down_proj.weight": "model-00006-of-00014.safetensors", "model.layers.28.mlp.gate_up_proj.weight": "model-00006-of-00014.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.28.post_mlp_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.28.post_self_attn_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", "model.layers.29.input_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.29.mlp.down_proj.weight": "model-00006-of-00014.safetensors", "model.layers.29.mlp.gate_up_proj.weight": "model-00006-of-00014.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.29.post_mlp_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.29.post_self_attn_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", "model.layers.3.input_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00006-of-00014.safetensors", "model.layers.3.mlp.gate_up_proj.weight": "model-00006-of-00014.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.3.post_mlp_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.3.post_self_attn_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", "model.layers.30.input_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.30.mlp.down_proj.weight": "model-00006-of-00014.safetensors", "model.layers.30.mlp.gate_up_proj.weight": "model-00006-of-00014.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.30.post_mlp_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.30.post_self_attn_layernorm.weight": "model-00006-of-00014.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", "model.layers.31.input_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.31.mlp.down_proj.weight": "model-00007-of-00014.safetensors", "model.layers.31.mlp.gate_up_proj.weight": "model-00007-of-00014.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.31.post_mlp_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.31.post_self_attn_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", "model.layers.32.input_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.32.mlp.down_proj.weight": "model-00007-of-00014.safetensors", "model.layers.32.mlp.gate_up_proj.weight": "model-00007-of-00014.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.32.post_mlp_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.32.post_self_attn_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", "model.layers.32.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", "model.layers.32.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", "model.layers.33.input_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.33.mlp.down_proj.weight": "model-00007-of-00014.safetensors", "model.layers.33.mlp.gate_up_proj.weight": "model-00007-of-00014.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.33.post_mlp_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.33.post_self_attn_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", "model.layers.33.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", "model.layers.33.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", "model.layers.34.input_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.34.mlp.down_proj.weight": "model-00007-of-00014.safetensors", "model.layers.34.mlp.gate_up_proj.weight": "model-00007-of-00014.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.34.post_mlp_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.34.post_self_attn_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", "model.layers.34.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", "model.layers.34.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", "model.layers.35.input_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.35.mlp.down_proj.weight": "model-00007-of-00014.safetensors", "model.layers.35.mlp.gate_up_proj.weight": "model-00007-of-00014.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.35.post_mlp_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.35.post_self_attn_layernorm.weight": "model-00007-of-00014.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", "model.layers.35.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", "model.layers.35.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", "model.layers.36.input_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.36.mlp.down_proj.weight": "model-00008-of-00014.safetensors", "model.layers.36.mlp.gate_up_proj.weight": "model-00008-of-00014.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.36.post_mlp_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.36.post_self_attn_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", "model.layers.36.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", "model.layers.36.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", "model.layers.37.input_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.37.mlp.down_proj.weight": "model-00008-of-00014.safetensors", "model.layers.37.mlp.gate_up_proj.weight": "model-00008-of-00014.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.37.post_mlp_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.37.post_self_attn_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", "model.layers.37.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", "model.layers.37.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", "model.layers.38.input_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.38.mlp.down_proj.weight": "model-00008-of-00014.safetensors", "model.layers.38.mlp.gate_up_proj.weight": "model-00008-of-00014.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.38.post_mlp_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.38.post_self_attn_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", "model.layers.38.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", "model.layers.38.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", "model.layers.39.input_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.39.mlp.down_proj.weight": "model-00008-of-00014.safetensors", "model.layers.39.mlp.gate_up_proj.weight": "model-00008-of-00014.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.39.post_mlp_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.39.post_self_attn_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", "model.layers.39.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", "model.layers.39.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", "model.layers.4.input_layernorm.weight": "model-00008-of-00014.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00008-of-00014.safetensors", "model.layers.4.mlp.gate_up_proj.weight": "model-00009-of-00014.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.4.post_mlp_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.4.post_self_attn_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", "model.layers.40.input_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.40.mlp.down_proj.weight": "model-00009-of-00014.safetensors", "model.layers.40.mlp.gate_up_proj.weight": "model-00009-of-00014.safetensors", "model.layers.40.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.40.post_mlp_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.40.post_self_attn_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.40.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", "model.layers.40.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", "model.layers.40.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", "model.layers.40.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", "model.layers.41.input_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.41.mlp.down_proj.weight": "model-00009-of-00014.safetensors", "model.layers.41.mlp.gate_up_proj.weight": "model-00009-of-00014.safetensors", "model.layers.41.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.41.post_mlp_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.41.post_self_attn_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.41.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", "model.layers.41.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", "model.layers.41.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", "model.layers.41.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", "model.layers.42.input_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.42.mlp.down_proj.weight": "model-00009-of-00014.safetensors", "model.layers.42.mlp.gate_up_proj.weight": "model-00009-of-00014.safetensors", "model.layers.42.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.42.post_mlp_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.42.post_self_attn_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.42.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", "model.layers.42.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", "model.layers.42.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", "model.layers.42.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", "model.layers.43.input_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.43.mlp.down_proj.weight": "model-00009-of-00014.safetensors", "model.layers.43.mlp.gate_up_proj.weight": "model-00009-of-00014.safetensors", "model.layers.43.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.43.post_mlp_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.43.post_self_attn_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.43.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", "model.layers.43.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", "model.layers.43.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", "model.layers.43.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", "model.layers.44.input_layernorm.weight": "model-00009-of-00014.safetensors", "model.layers.44.mlp.down_proj.weight": "model-00010-of-00014.safetensors", "model.layers.44.mlp.gate_up_proj.weight": "model-00010-of-00014.safetensors", "model.layers.44.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.44.post_mlp_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.44.post_self_attn_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.44.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", "model.layers.44.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", "model.layers.44.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", "model.layers.44.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", "model.layers.45.input_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.45.mlp.down_proj.weight": "model-00010-of-00014.safetensors", "model.layers.45.mlp.gate_up_proj.weight": "model-00010-of-00014.safetensors", "model.layers.45.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.45.post_mlp_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.45.post_self_attn_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.45.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", "model.layers.45.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", "model.layers.45.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", "model.layers.45.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", "model.layers.46.input_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.46.mlp.down_proj.weight": "model-00010-of-00014.safetensors", "model.layers.46.mlp.gate_up_proj.weight": "model-00010-of-00014.safetensors", "model.layers.46.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.46.post_mlp_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.46.post_self_attn_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.46.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", "model.layers.46.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", "model.layers.46.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", "model.layers.46.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", "model.layers.47.input_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.47.mlp.down_proj.weight": "model-00010-of-00014.safetensors", "model.layers.47.mlp.gate_up_proj.weight": "model-00010-of-00014.safetensors", "model.layers.47.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.47.post_mlp_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.47.post_self_attn_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.47.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", "model.layers.47.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", "model.layers.47.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", "model.layers.47.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", "model.layers.48.input_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.48.mlp.down_proj.weight": "model-00010-of-00014.safetensors", "model.layers.48.mlp.gate_up_proj.weight": "model-00010-of-00014.safetensors", "model.layers.48.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.48.post_mlp_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.48.post_self_attn_layernorm.weight": "model-00010-of-00014.safetensors", "model.layers.48.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", "model.layers.48.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", "model.layers.48.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", "model.layers.48.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", "model.layers.49.input_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.49.mlp.down_proj.weight": "model-00011-of-00014.safetensors", "model.layers.49.mlp.gate_up_proj.weight": "model-00011-of-00014.safetensors", "model.layers.49.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.49.post_mlp_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.49.post_self_attn_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.49.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", "model.layers.49.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", "model.layers.49.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", "model.layers.49.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", "model.layers.5.input_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00011-of-00014.safetensors", "model.layers.5.mlp.gate_up_proj.weight": "model-00011-of-00014.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.5.post_mlp_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.5.post_self_attn_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", "model.layers.50.input_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.50.mlp.down_proj.weight": "model-00011-of-00014.safetensors", "model.layers.50.mlp.gate_up_proj.weight": "model-00011-of-00014.safetensors", "model.layers.50.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.50.post_mlp_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.50.post_self_attn_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.50.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", "model.layers.50.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", "model.layers.50.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", "model.layers.50.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", "model.layers.51.input_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.51.mlp.down_proj.weight": "model-00011-of-00014.safetensors", "model.layers.51.mlp.gate_up_proj.weight": "model-00011-of-00014.safetensors", "model.layers.51.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.51.post_mlp_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.51.post_self_attn_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.51.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", "model.layers.51.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", "model.layers.51.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", "model.layers.51.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", "model.layers.52.input_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.52.mlp.down_proj.weight": "model-00011-of-00014.safetensors", "model.layers.52.mlp.gate_up_proj.weight": "model-00011-of-00014.safetensors", "model.layers.52.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.52.post_mlp_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.52.post_self_attn_layernorm.weight": "model-00011-of-00014.safetensors", "model.layers.52.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", "model.layers.52.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", "model.layers.52.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", "model.layers.52.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", "model.layers.53.input_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.53.mlp.down_proj.weight": "model-00012-of-00014.safetensors", "model.layers.53.mlp.gate_up_proj.weight": "model-00012-of-00014.safetensors", "model.layers.53.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.53.post_mlp_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.53.post_self_attn_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.53.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", "model.layers.53.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", "model.layers.53.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", "model.layers.53.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", "model.layers.54.input_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.54.mlp.down_proj.weight": "model-00012-of-00014.safetensors", "model.layers.54.mlp.gate_up_proj.weight": "model-00012-of-00014.safetensors", "model.layers.54.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.54.post_mlp_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.54.post_self_attn_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.54.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", "model.layers.54.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", "model.layers.54.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", "model.layers.54.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", "model.layers.55.input_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.55.mlp.down_proj.weight": "model-00012-of-00014.safetensors", "model.layers.55.mlp.gate_up_proj.weight": "model-00012-of-00014.safetensors", "model.layers.55.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.55.post_mlp_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.55.post_self_attn_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.55.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", "model.layers.55.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", "model.layers.55.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", "model.layers.55.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", "model.layers.56.input_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.56.mlp.down_proj.weight": "model-00012-of-00014.safetensors", "model.layers.56.mlp.gate_up_proj.weight": "model-00012-of-00014.safetensors", "model.layers.56.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.56.post_mlp_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.56.post_self_attn_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.56.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", "model.layers.56.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", "model.layers.56.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", "model.layers.56.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", "model.layers.57.input_layernorm.weight": "model-00012-of-00014.safetensors", "model.layers.57.mlp.down_proj.weight": "model-00012-of-00014.safetensors", "model.layers.57.mlp.gate_up_proj.weight": "model-00013-of-00014.safetensors", "model.layers.57.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.57.post_mlp_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.57.post_self_attn_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.57.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", "model.layers.57.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", "model.layers.57.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", "model.layers.57.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", "model.layers.58.input_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.58.mlp.down_proj.weight": "model-00013-of-00014.safetensors", "model.layers.58.mlp.gate_up_proj.weight": "model-00013-of-00014.safetensors", "model.layers.58.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.58.post_mlp_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.58.post_self_attn_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.58.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", "model.layers.58.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", "model.layers.58.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", "model.layers.58.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", "model.layers.59.input_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.59.mlp.down_proj.weight": "model-00013-of-00014.safetensors", "model.layers.59.mlp.gate_up_proj.weight": "model-00013-of-00014.safetensors", "model.layers.59.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.59.post_mlp_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.59.post_self_attn_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.59.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", "model.layers.59.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", "model.layers.59.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", "model.layers.59.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", "model.layers.6.input_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00013-of-00014.safetensors", "model.layers.6.mlp.gate_up_proj.weight": "model-00013-of-00014.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.6.post_mlp_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.6.post_self_attn_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", "model.layers.60.input_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.60.mlp.down_proj.weight": "model-00013-of-00014.safetensors", "model.layers.60.mlp.gate_up_proj.weight": "model-00013-of-00014.safetensors", "model.layers.60.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.60.post_mlp_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.60.post_self_attn_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.60.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", "model.layers.60.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", "model.layers.60.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", "model.layers.60.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", "model.layers.7.input_layernorm.weight": "model-00013-of-00014.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00014-of-00014.safetensors", "model.layers.7.mlp.gate_up_proj.weight": "model-00014-of-00014.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.7.post_mlp_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.7.post_self_attn_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00014-of-00014.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00014-of-00014.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00014-of-00014.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00014-of-00014.safetensors", "model.layers.8.input_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00014-of-00014.safetensors", "model.layers.8.mlp.gate_up_proj.weight": "model-00014-of-00014.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.8.post_mlp_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.8.post_self_attn_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00014-of-00014.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00014-of-00014.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00014-of-00014.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00014-of-00014.safetensors", "model.layers.9.input_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00014-of-00014.safetensors", "model.layers.9.mlp.gate_up_proj.weight": "model-00014-of-00014.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.9.post_mlp_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.9.post_self_attn_layernorm.weight": "model-00014-of-00014.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00014.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00014.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00014.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00014.safetensors", "model.norm.weight": "model-00014-of-00014.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "[MASK]",
5
+ "[gMASK]",
6
+ "[sMASK]",
7
+ "<sop>",
8
+ "<eop>",
9
+ "<|system|>",
10
+ "<|user|>",
11
+ "<|assistant|>",
12
+ "<|observation|>",
13
+ "<|begin_of_image|>",
14
+ "<|end_of_image|>",
15
+ "<|begin_of_video|>",
16
+ "<|end_of_video|>"
17
+ ],
18
+ "eos_token": {
19
+ "content": "<|im_end|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "pad_token": {
26
+ "content": "<|endoftext|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ }
32
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:373d3e2e65cc5e215af93845ef1f79d849095ee1428047c2909814e297bc2d33
3
+ size 19966873
tokenizer_config.json ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "151329": {
4
+ "content": "<|endoftext|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "151330": {
12
+ "content": "[MASK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "151331": {
20
+ "content": "[gMASK]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "151332": {
28
+ "content": "[sMASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "151333": {
36
+ "content": "<sop>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "151334": {
44
+ "content": "<eop>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "151335": {
52
+ "content": "<|system|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "151336": {
60
+ "content": "<|user|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "151337": {
68
+ "content": "<|assistant|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "151338": {
76
+ "content": "<|observation|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "151339": {
84
+ "content": "<|begin_of_image|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "151340": {
92
+ "content": "<|end_of_image|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "151341": {
100
+ "content": "<|begin_of_video|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "151342": {
108
+ "content": "<|end_of_video|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "151343": {
116
+ "content": "<|im_end|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "151344": {
124
+ "content": "<|im_start|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ }
131
+ },
132
+ "additional_special_tokens": [
133
+ "<|endoftext|>",
134
+ "[MASK]",
135
+ "[gMASK]",
136
+ "[sMASK]",
137
+ "<sop>",
138
+ "<eop>",
139
+ "<|system|>",
140
+ "<|user|>",
141
+ "<|assistant|>",
142
+ "<|observation|>",
143
+ "<|begin_of_image|>",
144
+ "<|end_of_image|>",
145
+ "<|begin_of_video|>",
146
+ "<|end_of_video|>"
147
+ ],
148
+ "clean_up_tokenization_spaces": false,
149
+ "do_lower_case": false,
150
+ "eos_token": "<|im_end|>",
151
+ "extra_special_tokens": {},
152
+ "model_input_names": [
153
+ "input_ids",
154
+ "attention_mask"
155
+ ],
156
+ "model_max_length": 128000,
157
+ "pad_token": "<|endoftext|>",
158
+ "padding_side": "left",
159
+ "remove_space": false,
160
+ "tokenizer_class": "PreTrainedTokenizer"
161
+ }