{ "metadata": { "ParamSize": 405, "ParamBytes": 8136314880.0, "BitsPerParam": 5.00086029169671 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 81920000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920000, "byteOffset": 0 } ], "md5sum": "864de78a8348047218285c990bdbcb48" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "01e1af245f704a5d9e6a99b94c789600" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d3bea6e772458b8bd4d241b6c78cd510" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a9f5e4a1fc88a60150054f24ec02f165" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "97b08d01db5e926952b0fa8724a570a9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32890880, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240000, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 10240000 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 10250240 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14673920 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14684160 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14694400 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19118080 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 27965440 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 27975680 } ], "md5sum": "e0d3405cd817035e7c761d8f58713353" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b273aee4a2253683e624b0d38e8cfeb8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "90e92ed28868d84a078e06413eb8c5ef" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9ea191d13e873445289b7151dde81423" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "e558d0946df1be8256ec6e6ca1b975d0" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0e5022a82f5f3048f89249771da01870" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "57c7d293b65f5c6553f039be3af76963" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e05e9c4baab0c43105314c5e71f3e387" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "55dcdecd9698e7c6c7174165390cd0bd" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "296c1d885ac8736654cb8a09dadccd9d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a0a9b584eb153a731e733a4cab057d8a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d062e4f9fcab6864e4b6fbe36e8c5b71" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "a60f564608bf35bf693140eb52295054" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9b20e5c19a5a07f7d198e400c64035e6" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1f12323488d29474c3988830f4591e43" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d434fe2d46072b69d19cb36e0630aa72" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "50e26a6a5b7b7fd33122f8487b24da00" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0ece3448cc6af01b3fcd2725e1e14a3f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cdf4b211773911b035bd50340866b064" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b5c92e21b4b2f49ba20029dcebcb3f78" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "cf0b3d4027136e00750a2647994ba1db" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b936d427a828ef337d4616da2fc12b3e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0079645efbe7c6b8184a1df24431df0f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "42c6275d92335b39d60470e0f8157623" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "39e03eb85b95400e0f1136ba801aa128" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bd0e5c2d76eb91121347368078a2aa3e" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c0d3faf60f47c4ace91756bb89dc511b" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "2394a3656acd3270ae2c6ae776870d09" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "7bc9ec9cab711faefd74662d0f42e4c1" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "76401432d0e32a7a65285d719257ce6b" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e65014ec8465b482eb0da29ef8921b24" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "779e8833ee4c9e3d0ea45bfff0b646ec" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 14755840 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 19179520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28037120 } ], "md5sum": "aa28c139d09b82a37e43fdb6a48bb5b0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 81920000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 81920000, "byteOffset": 0 } ], "md5sum": "03f14e673a60dfd8de7d80639b2047a0" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "91d9953b831990f026e0a84cadd02bdb" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7cd60a1d562542b4dd6f76393573054b" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29429760, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 14745600 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240000, "byteOffset": 14755840 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 24995840 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 25006080 } ], "md5sum": "08a259b4b35992db7339b6b69917d5f8" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "6c7b70c6ad7c2081c67f02c792833dbe" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a0977dde72fc3394fb70f1284506b1d6" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ae30f0defaba0472b5ade36eb3b5fb6f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "7b99b3817b642b7369c68c957b64245d" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3ed51ccd2d7b180a4eaeae8bdee2b37b" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b50a3723cf3b26f260dbb3cc4c574608" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "81467a29690f015d95641160c056c99a" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "e12837288033c868c86d70d8b85981bc" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0124ff0a1f200b45be936dc179df4c23" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "92c1ab1a49076b5af8b3aadfc001bdd9" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "beba158cdcfb06f698c64a7b69709ff6" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "c0d1b8ba58ce57b55ffc41e14d1561d2" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "bf0779de9d628e7fa9e91cba26b599c0" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d648acd04637f82776ac87470793e3df" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6c1144587888ed4d0629be0791c317b1" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "cecf234721af54bd97960b87218f71e5" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e0fd9423830f5eda37a237a7e7b3448c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2b56623ed72b8aef469b4e53ff339511" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "46749cc536875f210a27db89c286ac43" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "1fec7a9facb13d78990a4b8639900106" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "aec4e5f01f4cb5a2b7ee24f6425f9c9b" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "402b745321b1b0faac91cebad99b1a11" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0cef5a19bbfee4fa73a7886511f0b54a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "8c300e9b1a07e3f319cf9d728f7995bb" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "2e08cb9d8ebbc5e07cb5bbb95aca25e6" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "071489bda2def9867369c4d0216666c2" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "853a7b98bfa98c77188f160902d21f43" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33443840, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 28518400 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 33433600 } ], "md5sum": "0b285f01a5c721279bcaaee1f89ff21b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4dd7e495b57d55620e78df9e4bdf031d" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "53a946229e9c71a65db0381f7e73e01a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fa3374f843c92e71285bfe0498b4d562" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "204e17c68c1fa9c9be518069b855674a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "16cb38d4ac7c07e70fc1f59f1e29b89a" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0098fb3a0661e255da1f3130b37f727f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a80a6289b630c7d36ac836fc6051d1aa" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "1449425c1662e8c86f291b3b17c838a6" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8804c9b1ddef2b74480bbefd0ca526ad" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "fde3cf0a1e37a93f814b5d3fa0316487" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9b501907731e23ce2f53d279606267a5" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "5aa8e8fff2e5c685f5256e6ce38bd917" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1f571a827b9d08f39bd6ec36a4eeda8c" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e2956ffc60adee997003a56e86e54387" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "57d3fc32b6950fd8beeaeabf9ed25086" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "6c7decfb07b030de19e1aa0dffab794a" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f9e3ae975ead02e6d33c23cbcbc82276" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a810e7d2ff7ef792f145f610c65aa113" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b7d436e7c49877eac0ec5090c09da34f" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "d25c27ca526ea6612964ea588f384d9d" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "18cc6f6540c99d5611e81d8670b3fa58" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "93490d9bf78130de2d26c91131fdd4d5" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d7c79758dc7ed4e097fcd56a430376a5" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "06a934bf96618bfa1cddc4a734f166c4" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ed26b40d86f6c14b4d1e1781620e0a64" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c9bcbbdc687083b6d3f0754da66a9e13" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bc338927785cb470efe4c478f9ae2bfb" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "d253981360c70557f2c8c4b04c4e17f6" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1bd738d8142ae51e48718f9af0f7de64" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "bdf8cfc45750283c61427dc55daa5eda" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "264d1dd895ee0b3dc848e8c67967256a" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 13281280 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18196480 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 31303680 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 32942080 } ], "md5sum": "b0faec33ac52c57c9c18e06fd1388110" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b5e90f6ccb30b9b5f868d84791b56670" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3d81dc383212799e91bdb37c77d8eae8" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a8996dab0b33742cda0b9d9ad1476655" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 32460800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 4423680 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 13271040 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13281280 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26388480 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28037120 } ], "md5sum": "61fdc25e126552f209f3971a3f658791" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ba8441e6c47801e445e336127628adc1" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "209def3eaf81c6dfcee4ad414f480b86" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6cb60d3bc83d89ea9de8217d9c8f3444" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "bdf9334abfd0313dab26150968a162a4" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d772dad7a595599a2e71b643bccd11cd" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "0f49c2777cb517a629013f11ba890f2b" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3137fda2d3217408be76128524152d73" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "9419fa1f1d3950ff854b5129490515c8" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "38ed07c8bce1704270dc871b7b113ed3" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8b8ece753ffab7e15e4533468d55256a" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "33dd9b7c1aabf973feb2f3fa3f6bd0cf" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "6c66da8f0b1e4076916b91db6f732f67" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "05dca5c15deeb50087c4a7d3ee01a9f0" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "278b12a6722a888c096149cf8554eaaa" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "31440dd4e596d91ff7e3a260ca8eb647" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "f72c33fa513d73b048a6d33e627601ca" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e21da711dc588f258e2247b88be8e7a3" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "942e5c36a8fd84cd049faf78ba4a4859" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d766658b6c398d0d637ed32621658451" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "be82347b072974fa29455c8166902484" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3fa479f854863abce1f47c8dec5e52c0" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6070bf75baec452579deb3f35ae1df5f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f8f31fa0a2c7c96ef4839a77124ab881" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "637aa2c05d10ebf13aec3912b6d88fb6" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1200ca920b31a2644c1ca15173a9c7dc" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a9c7ae47877415e84f491bd3b4cc798e" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "08d15d2df956f79baa1113c4bfe18822" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "e0f606134215671adc2070827766b0df" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ee3f29e528e185f5c77e67b72f43de4c" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a89f7d60339a27f8d91ddae09dfce251" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1a814f4b919e6a034ec95a01b4b52266" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "cfbce5bf75c3fe38bb18a6c844e7914f" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d8810fe22817b93cf2731858c6dcac76" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "be317671eb0452669efeb209d08cb228" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4437b4d9b153eecfd044ea2e742cf81a" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "e4f882f623ea782e0da18b15ba8c0c20" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "05b0c783d1ee1aada1f2ed016de689d7" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5ab60195305eca7940dceddce5a08d9e" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8a3d90a4ebb8797b2ef6e845b727cdb5" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "52c9b97d0cc918cdf8942b268913d792" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "100a4a31b51943f7d3cab403b83d22af" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "97e1ce8b53ff5163d11760554d5a2347" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "36d2cdacf2abb015505ba59a85686910" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "98ee7cb56fae05e990d2a98ae5ad3cfc" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a8b88d52c6e6a081c84ca860408c6617" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1701c6ca61a567d86ed0a61617710b8f" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b7060e7017a404a0a9b509f604eba9d9" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "580af7a89a3fad3a0bd216a4cec1a674" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "2bdf834315b55a0986e14c1f5456fe12" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "4dc961c79fba4f304d09af165070e9d3" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "846a65b982271674a36ee4db0de1e009" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 32952320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28518400 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 28528640 } ], "md5sum": "b31221c4e958539803caa2c3fa13b0ef" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "301bfec3fd564b58c59b408f2f0563da" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4dca7b49e0fba1a9aa1e764f802b0866" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 28518400, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8857600 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13772800 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26880000 } ], "md5sum": "0ccd63e259838d4514c2b8954135cb21" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 15360, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e770e24cb342c0369c7fe100d540b3a8" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 28508160, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 15360, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4915200, "byteOffset": 8847360 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 26869760 } ], "md5sum": "f5d2754d1b5f53ae8d7827e9141e08ff" } ] }