| { | |
| "metadata": { | |
| "ParamSize": 267, | |
| "ParamBytes": 277996288.0, | |
| "BitsPerParam": 4.501665573729716 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 68067328, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 151936, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 68067328, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d18e2f2cdb906934bd61dc55f8c53734" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33234176, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 151936, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8508416, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 8508416 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 8510208 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 10689280 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 10961664 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 15319808 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 15864576 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 15866368 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 15868672 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 16384768 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 16449280 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 16850688 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 16900864 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 16902656 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 19081728 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 19354112 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 23712256 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 24257024 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 24258816 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 24261120 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 24777216 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 24841728 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 25243136 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 25293312 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 25295104 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 27474176 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 27746560 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 32104704 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 32649472 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 32651264 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 32653568 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 33169664 | |
| } | |
| ], | |
| "md5sum": "bbd660e14d061043c49ddb2e8f19a6bb" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33505280, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 401408 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 451584 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 453376 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 2632448 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 2904832 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 7262976 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 7807744 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 7809536 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 7811840 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 8327936 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 8392448 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 8793856 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 8844032 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 8845824 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 11024896 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 11297280 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 15655424 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 16200192 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 16201984 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 16204288 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 16720384 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 16784896 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 17186304 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 17236480 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 17238272 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 19417344 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 19689728 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 24047872 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 24592640 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 24594432 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 24596736 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 25112832 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 25177344 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 25578752 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 25628928 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 25630720 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 27809792 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 28082176 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 32440320 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 32985088 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 32986880 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 32989184 | |
| } | |
| ], | |
| "md5sum": "379f753bf2235653ef8d8f4b0ec15810" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33053696, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 64512 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 465920 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 516096 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 517888 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 2696960 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 2969344 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 7327488 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 7872256 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 7874048 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 7876352 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 8392448 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 8456960 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 8858368 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 8908544 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 8910336 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 11089408 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 11361792 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 15719936 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 16264704 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 16266496 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 16268800 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 16784896 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 16849408 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 17250816 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 17300992 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 17302784 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 19481856 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 19754240 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 24112384 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 24657152 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 24658944 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 24661248 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 25177344 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 25241856 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 25643264 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 25693440 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 25695232 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 27874304 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 28146688 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 32504832 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 33049600 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 33051392 | |
| } | |
| ], | |
| "md5sum": "c9250010a90c3ea620977cfeade9c13f" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33020928, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 516096 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 580608 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 982016 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 1032192 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 1033984 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 3213056 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 3485440 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 7843584 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 8388352 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 8390144 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 8392448 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 8908544 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 8973056 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 9374464 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 9424640 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 9426432 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 11605504 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 11877888 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 16236032 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 16780800 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 16782592 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 16784896 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 17300992 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 17365504 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 17766912 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 17817088 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 17818880 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 19997952 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 20270336 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 24628480 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 25173248 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 25175040 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 25177344 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 25693440 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 25757952 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 26159360 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 26209536 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 26211328 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 28390400 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 28662784 | |
| } | |
| ], | |
| "md5sum": "b3ba9b0e565a1fa170e9759b5b30c467" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29211648, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 544768 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 546560 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 548864 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 1064960 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 1129472 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 1530880 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 1581056 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 1582848 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 3761920 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 4034304 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 8392448 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 8937216 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 8939008 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 8941312 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 9457408 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 9521920 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 9923328 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 9973504 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 9975296 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 12154368 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 12426752 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 16784896 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 17329664 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 17331456 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 17333760 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 17849856 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 17914368 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 18315776 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 18365952 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 18367744 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 20546816 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 20819200 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 25177344 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 25722112 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 25723904 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 25726208 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 26242304 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 26306816 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 26708224 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 26758400 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 26760192 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 28939264 | |
| } | |
| ], | |
| "md5sum": "74075a278553c97b31e502e6cdc23411" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 33297408, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 4358144 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 4902912 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 4904704 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 4907008 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 5423104 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 5487616 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 5889024 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 5939200 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 5940992 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 8120064 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 8392448 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 12750592 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 13295360 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 13297152 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 13299456 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 13815552 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 13880064 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 14281472 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 14331648 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 14333440 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 16512512 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 16784896 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 21143040 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 21687808 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 21689600 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 21691904 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 22208000 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 22272512 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 22673920 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 22724096 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 22725888 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 24904960 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 25177344 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 29535488 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 30080256 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 30082048 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 30084352 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 30600448 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 30664960 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 31066368 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 31116544 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 31118336 | |
| } | |
| ], | |
| "md5sum": "6f009ede7c214c407015c185190b2b2a" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 14605824, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 272384 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 4630528 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 5175296 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 5177088 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 5179392 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 5695488 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 5760000 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 6161408 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 6211584 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 608 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2179072, | |
| "byteOffset": 6213376 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 272384, | |
| "byteOffset": 8392448 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 9728, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 4358144, | |
| "byteOffset": 8664832 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 9728, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 544768, | |
| "byteOffset": 13022976 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 13567744 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.bias", | |
| "shape": [ | |
| 1152 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2304, | |
| "byteOffset": 13569536 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_weight", | |
| "shape": [ | |
| 1152, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 516096, | |
| "byteOffset": 13571840 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.q_scale", | |
| "shape": [ | |
| 1152, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 64512, | |
| "byteOffset": 14087936 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 896, | |
| 112 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 401408, | |
| "byteOffset": 14152448 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 896, | |
| 28 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50176, | |
| "byteOffset": 14553856 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 896 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1792, | |
| "byteOffset": 14604032 | |
| } | |
| ], | |
| "md5sum": "86594ef31f053d5784385575bc2ebbd8" | |
| } | |
| ] | |
| } |