numen-tech's picture
Add weights
d8b1c6e
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4141007104.0,
"BitsPerParam": 4.070121093571116
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262672384,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
128258,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262672384,
"byteOffset": 0
}
],
"md5sum": "ec0ca2851615084283d1736f30a6db82"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c042f4edf5d58fe28fcc5dadd224578a"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ef6fa8940374fe644908f19e90d8458c"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 262672384,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
128258,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262672384,
"byteOffset": 0
}
],
"md5sum": "a579a063407189a057a32ee8b074bd6c"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "46e6c311e305072c41142036da6b6d6a"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cd0904a029480dac893846a0987eff82"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 21963008,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
128258,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8208512,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8208512
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 8216704
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 9134208
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10969216
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10977408
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
128258,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8208512,
"byteOffset": 10985600
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 19194112
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 19202304
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 20119808
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21954816
}
],
"md5sum": "454e7de6845cd0449f0c0571cc96f37e"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bf3b178a18260a4ae7cd6490b16852c4"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c3177083722c1f3b7c54b86ed740922d"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "35df3193448ed1573f3cfbf3e18608ea"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "69e18821591edf2d80105709cfe414c2"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "31c46238a7883de9bdf8e2d42c8e784f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "6f4a89ec5bbd98c8b924ee578e2abcea"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "15a3ab03c5b3c07e87850a080ca3fba9"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2f3ec5bc7c0267de1b80c9d591474f23"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "88cca267bac0175382a45694a357557f"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cac70d3b5ef45ef21050dce6e706cef2"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "28db7ded0ee7e56fb7c884fa1a34e674"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "3c91be453f2e76f23ac825e5eb64531c"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0d8b4841d65edb15cf48a3bbbc39488a"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "05808ce15a75d6af1d58f133115b4d96"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "52f9868951453ba10eba569f24204679"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2086368ad7dbf551e35868612deecc7f"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fa4031273ce2f468013e563b63c69ebc"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "1c2c8eab5eb40e2f9586b7cd5a29eab8"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a376019618acae7233282312a7d064dc"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fdc6e766b0e7b828dbc13392bf20c35a"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "e64fa0e31a1b72d2c72a68444d7f7e0f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "30b22d71dfce18a4ba73538206c83cb9"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f9df77f012bbf04233b3ecf2ca940e6c"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 24379392,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21626880
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22544384
}
],
"md5sum": "294e261d0c36df6acec33f8bf0e1eb64"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3b2d458b019b9230adfbb5a46ddd4edc"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e98e4ec5d96693e01c34ee6a0e428ac0"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "09865536ba22d89519a3948e85d8590f"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "35c3f02201bee8f3b215be59e44a551b"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "23ce15cbf03ec1591ccc9747a148600c"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "1ad620d05c0d6f303a5c248631e19824"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4bca1a257935c6495dc51e50349bd992"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0cfe32836d0117a6a5b5c7007359a770"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "ae842b71904da1658e82feb33b7d1a02"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d80a1d32d70605d72e12cf42b14fc0cd"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "14999a5c62e8767971c2133d8eaa2477"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "099d62745411e2eda141b660e03e178a"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5f81e0cd92f843a4f67c4729a38cb894"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "490293077acf6561686ae27568364553"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "340b039cf8df4fbfdff1c3a6aa7210d7"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5cc781dcbcae4bb06c8d7fc68efa41d6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "48e7689c28995a374d8326c3e97d97c5"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "76a0b1345c2cb76137ea2d30f8b7ea5e"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6345031c28dd2582dca3335ce199a3e5"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bc44bb19f666ac1a8b614378280bddf7"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "040de83ef85c433117f9ebdb79bac34b"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1697a6d7a48c6b7915f632ae1851ec21"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f5efd17f727481b1e3ee8f439e4b60b8"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "02773d86ede6db593745c03fabc60afe"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "17db022c0764dd17a688695e19e02993"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6cc5e8a2e516cf1c0b8ce568a15d8eb2"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "26ec57a65d574ba566ec0bba665e7dee"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1441f61a3e20bff3546a12bf30a6ddc2"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ab52da15f9e266a493376d02fc6bb914"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "1da87f822781a87f60ae240eeb4bda89"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d91d6f8fc08be66b9ef55f49d8fb520a"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a0e4ef476d600ea0dbbf404b6b276f82"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 24412160,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21635072
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21643264
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21651456
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22568960
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24403968
}
],
"md5sum": "fddad52eb794e94586a383832be93986"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c76174e74f46b049863dfa133edbd9bc"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fcfb2c4ab39a2c627d2ca1cd2bebaa0f"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "00261b5a3ae50cfecbffc394672f0d5c"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "187ff600adba4dd47f65ecd05a231c6b"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "81d67262e2b9436c2eb2dc67b85745f3"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "565d3f28f05ad0d27b057a79b2d07779"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3c02c13d56dacb961464645114db4ae7"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3d744911a199075ef5200f8748f24c3f"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "bce3229aad11df6c51c30030f7b7b8ee"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b38eed833970af76572d0d94cdee16aa"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b78243daa5f62cacfe4653fa4e49fe8b"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "8257a33cce5f6a6648052c21202fea4f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "844ae8741475491a64f21ac9294a2fb4"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "79fe55a42bced6aed55abf05d38c213c"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "87bbaa99746ce2063ea5743a64f00e31"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "17e26027b4471b9e581e09a01d117720"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1c874c811c27b6ddef0a1f0993e6557e"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "e8fc5496bb14122cb9af43121b033f57"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6f03a14448ce917b64f978507e8d2fc3"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4d51b9d49286f8681b451fe8305371c0"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "da33a29964a1686bf0bf8ff27d243743"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c588715040bd5152a3b3c1a24a5171dc"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7f14a169a5339b8507817abfd1bc97b1"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "a336001a3e3a74bd833e20c9dffc0fab"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5bca7c2546f2af2f3ca874911b70ca2b"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "38702bb10db47de19d9528b3d8d0cf55"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "36a29153303d65b72f47576bc576b80d"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e7ff73327a173aeb3d2d5245c0df89f1"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3f7077589f93e21a0ee0e83fa353dfbb"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "f269ca2a5e42e1e47e589b64c452ae63"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d6560006fef0bff634e3f54a496c6489"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2217f544aaea482000a68162e799dfb9"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "d0d5ed2406a52bd3f8f0e5b988ed9d86"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "89695b75341670d0eb287decf0b62e39"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "53f0b0e73b2a296dc2c0f304b692f661"
}
]
}