diff --git a/config.json b/config.json index e4296a51526b3bb39f71f72e4a7d3408ed0fb498..070a9fe021a20fe07ac877223f79efed7f804971 100644 --- a/config.json +++ b/config.json @@ -27,7 +27,7 @@ "router_jitter_noise": 0.0, "sliding_window": null, "tie_word_embeddings": false, - "torch_dtype": "float32", + "torch_dtype": "bfloat16", "transformers_version": "4.47.0", "use_cache": true, "vocab_size": 99584 diff --git a/model-00001-of-00030.safetensors b/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..149c8cc3a80e174653b788803a4f0fe954b72c03 --- /dev/null +++ b/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab6a4e225468a2fc08240306b3c1c2e7bf0b9222adbcc68cfea065137abf3b3 +size 4978303696 diff --git a/model-00001-of-00060.safetensors b/model-00001-of-00060.safetensors deleted file mode 100644 index 0097261955c8ebeb99a505e17ce5be8ec36b574c..0000000000000000000000000000000000000000 --- a/model-00001-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9354e0eaa47e7a94b3396df7346df835162d429d72a84854e897e8b5944db5e6 -size 4724000480 diff --git a/model-00002-of-00030.safetensors b/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c6d954400d37981cd5819730cc854f549cd64300 --- /dev/null +++ b/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d37a800d1f808443f7595b07a4f98e7619eefef955bbdb7dc5e3865e2d542e1 +size 4881228840 diff --git a/model-00002-of-00060.safetensors b/model-00002-of-00060.safetensors deleted file mode 100644 index e8fb10883c64f6065ecf9297bc701b5a1ce246a0..0000000000000000000000000000000000000000 --- a/model-00002-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0a766aafd5d585f79342cb8027726d62acc387b668f63eeacae0771bf4f1f065 -size 4949486440 diff --git a/model-00003-of-00030.safetensors b/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e350acba3d8611b335dfc2c2dc6a103e938cf16 --- /dev/null +++ b/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f0245a2c4f6d9fe57c227755a3c58dad8a49c98ec1ced34805d195275f703a +size 4881228840 diff --git a/model-00003-of-00060.safetensors b/model-00003-of-00060.safetensors deleted file mode 100644 index 1f92bccc64083e7b78d2120a472bd6a411da49b1..0000000000000000000000000000000000000000 --- a/model-00003-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f07e772d032c83cb5bce1a1f8fd613926b0d29122b64fb980a354754e6a5be9 -size 4812966080 diff --git a/model-00004-of-00030.safetensors b/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ff285b6002918080d8fb85494cd850768ae1688 --- /dev/null +++ b/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a2577b5509a1cd2fd56c805c6d85fc488f7f0f71442ffeeceae97873cd8bb0 +size 4949489368 diff --git a/model-00004-of-00060.safetensors b/model-00004-of-00060.safetensors deleted file mode 100644 index 721ad10cc1c1c5ee63ce9e58af3c81565b4a5bf5..0000000000000000000000000000000000000000 --- a/model-00004-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7920ef3cdaf6d0628824ea6c07ddf9437b96e488974f2593207847d6da269804 -size 4949486440 diff --git a/model-00005-of-00030.safetensors b/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f990988a230846474585d2e5e59bc8163de1695 --- /dev/null +++ b/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c605024e82d8c9b27c2c5fbf5c7331bf1b71b73fe7d80c04a5ac3af16c0719 +size 4881228840 diff --git a/model-00005-of-00060.safetensors b/model-00005-of-00060.safetensors deleted file mode 100644 index 111caa8bf87ab9fc08d1fba20e0491ac1ac66615..0000000000000000000000000000000000000000 --- a/model-00005-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a5f508712d85498c45372c27fb182c1114a17c2369411a284b23f7dfa27dd4a -size 4949486440 diff --git a/model-00006-of-00030.safetensors b/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d33da2c1a493a348d3fa4ad75fcb53b20159586 --- /dev/null +++ b/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831494d3146c8dcba5b86f34c9e7bb3c9881566f5a4244aeffa187b91d4b0fe +size 4881228840 diff --git a/model-00006-of-00060.safetensors b/model-00006-of-00060.safetensors deleted file mode 100644 index ccc0c9f9e2edec0a6167b50a35d4749ea690ea11..0000000000000000000000000000000000000000 --- a/model-00006-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ba25cb7f19f88959a0e3687fefce9ae26d9d6af9091f20d8980ab7cd6518f91 -size 4812966080 diff --git a/model-00007-of-00030.safetensors b/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a249c1d3b0f0a1706726515967c5d08e55333d68 --- /dev/null +++ b/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b92effdbc0db6202bae24759f08099e56fdab37a05277efc30608f8112eb02de +size 4949489368 diff --git a/model-00007-of-00060.safetensors b/model-00007-of-00060.safetensors deleted file mode 100644 index 0d9780f16375589520454b06b6fd00ddae655e7f..0000000000000000000000000000000000000000 --- a/model-00007-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0c2b57006fd79f870b573cd5ac9b57efd547767cc537d59402585dad15627eb8 -size 4949486440 diff --git a/model-00008-of-00030.safetensors b/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36025d6d939fbbfa74ba5897fc0003b149b77940 --- /dev/null +++ b/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:675c25fda3ae654ad5e9e1791a0878209796fac8c0d10abe4b8c13f7a8e67ebb +size 4881228864 diff --git a/model-00008-of-00060.safetensors b/model-00008-of-00060.safetensors deleted file mode 100644 index 791c93f5973e06174b9040988250013fe898fbbb..0000000000000000000000000000000000000000 --- a/model-00008-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de4ccc04618a92cab3ce523fb296bbe342977b9e2696c7bb1d0d8ab62a87e35e -size 4949486440 diff --git a/model-00009-of-00030.safetensors b/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0561fdcd2a273dffc214d20acdb59778d97b5ad --- /dev/null +++ b/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7349d1db7a0068aacadacf7e11e578546572334a16d19e607af413ddaa4827ce +size 4881228880 diff --git a/model-00009-of-00060.safetensors b/model-00009-of-00060.safetensors deleted file mode 100644 index 181f2a1566a082dab894d508f955357a0cfa7f1c..0000000000000000000000000000000000000000 --- a/model-00009-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42393e5925a1ca1ddb6290960d8c9041170de7f4ac3af6d9d3158005e145afa3 -size 4812966080 diff --git a/model-00010-of-00030.safetensors b/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8246600dc8f2bb08beec258413587a644b3ccc8b --- /dev/null +++ b/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9198efcb119c62136fc143515d689d2793a50834d29c3b9fd795f36c711ae942 +size 4949489408 diff --git a/model-00010-of-00060.safetensors b/model-00010-of-00060.safetensors deleted file mode 100644 index 8ed178b08d49e54204c8c23504da46b40ceab97c..0000000000000000000000000000000000000000 --- a/model-00010-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c36ea8590e436d243315dc2dab16d04ba5d35eb8562f6d056ebc556686be394e -size 4949486440 diff --git a/model-00011-of-00030.safetensors b/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a53c9438747758904938523d322d143aa39597d --- /dev/null +++ b/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95fbc4239653105ca5d48ba35a8590a97d11870d4ed84318936c20fd7cba533 +size 4881228880 diff --git a/model-00011-of-00060.safetensors b/model-00011-of-00060.safetensors deleted file mode 100644 index 7dadf31fb986c88564e2aa8380504ec25072ad3e..0000000000000000000000000000000000000000 --- a/model-00011-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5b8d24a74c4bb8f7514249a1728b14ae7864cd1d6d0e527859328327908f737d -size 4949486440 diff --git a/model-00012-of-00030.safetensors b/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..367fa1ee23d54b180fd147946ae56c3058af0358 --- /dev/null +++ b/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04400c018f2c234d8fdc55321d86b136ec1876509a65c35479d1c6211afa5d0c +size 4949489416 diff --git a/model-00012-of-00060.safetensors b/model-00012-of-00060.safetensors deleted file mode 100644 index e913b31e2a9d27e0200750caae93c112b944f5ea..0000000000000000000000000000000000000000 --- a/model-00012-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86a5b3ad86cbc3bad5f9b691e1e54d41b82d8fe320a0ba1924522d60b93ad878 -size 4812966080 diff --git a/model-00013-of-00030.safetensors b/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..280443283140a86cc85ee5b9d9902482027c7320 --- /dev/null +++ b/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39818d216e46443a9a4764167dc368f381397b545de8b91b4fe0399d2976c88d +size 4881228880 diff --git a/model-00013-of-00060.safetensors b/model-00013-of-00060.safetensors deleted file mode 100644 index 66bc3c10ff28b2bcc3ba0fe268d7a5a4114e9020..0000000000000000000000000000000000000000 --- a/model-00013-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f74ba613177ef4194b77a6f97c3b219da59f7fd156b7135194cbfae807a9e8d2 -size 4949486440 diff --git a/model-00014-of-00030.safetensors b/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5442d94fad43279f3b29e614872a406d2af4b827 --- /dev/null +++ b/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a936968cbec0b3df5e10453fbc23ea2a99d0cc88a61dad9893aab76b3d10be4e +size 4881228880 diff --git a/model-00014-of-00060.safetensors b/model-00014-of-00060.safetensors deleted file mode 100644 index d58c0ee0e2c36720dbbf201ac06a23e8d2c87c02..0000000000000000000000000000000000000000 --- a/model-00014-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6563a06406b66451b2f9aa700b83542d27d586a98158ee2a1f89c59333242375 -size 4949486440 diff --git a/model-00015-of-00030.safetensors b/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a858d07991c0927bf6f01bad73cc2a555a36d43 --- /dev/null +++ b/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5519c5b2159cbe71ce74968e6c054b419fc85e93168b1bf0e20a9d4028d1d06 +size 4949489408 diff --git a/model-00015-of-00060.safetensors b/model-00015-of-00060.safetensors deleted file mode 100644 index 6182fe68983f92d2ebf8caacf1ed8531aac6ad01..0000000000000000000000000000000000000000 --- a/model-00015-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ae332a9d220539e4b935e1925ad382e342267b7efc9d753f25761e3723e9e0ef -size 4812966080 diff --git a/model-00016-of-00030.safetensors b/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bee6029b512afe6f1c7cb412ddfa123fc1c3431 --- /dev/null +++ b/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c400034fdd40524cc90a4607b1b273c7deec7ad148b093aac1029bc9d707ac0c +size 4881228880 diff --git a/model-00016-of-00060.safetensors b/model-00016-of-00060.safetensors deleted file mode 100644 index f98805db587f1b3ffb7ee40d9d4c198669d84da5..0000000000000000000000000000000000000000 --- a/model-00016-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d11f9b71cd004bda398b22283e2c70cec41ac4999d4616aa2d3d0d5034d47b21 -size 4949486456 diff --git a/model-00017-of-00030.safetensors b/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bffe1e644b1ad9ac9429e6cb6752d25c6ca89ef8 --- /dev/null +++ b/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6be180cb8f7c3f617fdbe19d4d4da979b1a0abee94a4e9ca57d6ed4af582eacb +size 4881228880 diff --git a/model-00017-of-00060.safetensors b/model-00017-of-00060.safetensors deleted file mode 100644 index cd2444c3075fe5d9834ca6419e6b1440c294841a..0000000000000000000000000000000000000000 --- a/model-00017-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c098171c7271794be19341c81b1a874103f9973ea8cc0e02f4ff408b8a5af98e -size 4949486464 diff --git a/model-00018-of-00030.safetensors b/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cbf4fc5402f51960f0d57772c88d047ee0e9b4d --- /dev/null +++ b/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd7506b3208eeb4d0dba7de02bfac3388515a6a26a8366b57a4ffa752eab446 +size 4949489408 diff --git a/model-00018-of-00060.safetensors b/model-00018-of-00060.safetensors deleted file mode 100644 index a32a4425010fd3023fd099926a97e17e8af21b2a..0000000000000000000000000000000000000000 --- a/model-00018-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed7074146a9b240e07a396cbff896ef55024d56f573a0999fc4badc1f512cbdd -size 4812966096 diff --git a/model-00019-of-00030.safetensors b/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2b65fbe2c52d1da768e92bda327c5ee4a42f135 --- /dev/null +++ b/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f351b536fd992d8cf4f2d8089947842d8d65f3133768cc2a2e424e045f52241 +size 4881228880 diff --git a/model-00019-of-00060.safetensors b/model-00019-of-00060.safetensors deleted file mode 100644 index ae76da578bdff369ea75c56260753fe4ed3639d4..0000000000000000000000000000000000000000 --- a/model-00019-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:39befab0371153dae995b9597bdbe7bf5d8f4e0b58264acda017c1919532f893 -size 4949486456 diff --git a/model-00020-of-00030.safetensors b/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36a0b42210087cd599b02623a34870e8a48f43ae --- /dev/null +++ b/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a63798ee40f2458d7c83539701194b037abb7d9a29ec45233662e7ddf24bcc +size 4881228880 diff --git a/model-00020-of-00060.safetensors b/model-00020-of-00060.safetensors deleted file mode 100644 index cf475d5f5433304860550dfb26af59ece0b4c629..0000000000000000000000000000000000000000 --- a/model-00020-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b01b32b7c25153dc2a2d802042b592beb93fe41a7678ffbe3caa5493fc8415e7 -size 4949486464 diff --git a/model-00021-of-00030.safetensors b/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ef8967ce4431ea1e10b9acaf514b2c44dd710e5 --- /dev/null +++ b/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa411463952e0cb7e7b47e2f726893887323ad289f5b1a8d3ece7e6950c1c18 +size 4949489408 diff --git a/model-00021-of-00060.safetensors b/model-00021-of-00060.safetensors deleted file mode 100644 index c42ba396e64ad511662a32a4510a98e96c899d4d..0000000000000000000000000000000000000000 --- a/model-00021-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99604faf076c23e69d53f60ece376796ed8ef70a3e0d80a49169e475f1e863b2 -size 4812966096 diff --git a/model-00022-of-00030.safetensors b/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf6e82406c793e45d1be3b52c769390f90aa7aef --- /dev/null +++ b/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:157fa3990548cbc9bc85bf0b040b4f69773b8d6f229406e65a40c12db0d566dc +size 4881228880 diff --git a/model-00022-of-00060.safetensors b/model-00022-of-00060.safetensors deleted file mode 100644 index c87394320732a4271bd1b794be57a96de4c75aae..0000000000000000000000000000000000000000 --- a/model-00022-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b234f206234607581fa124b4d7c141536602de089bafcf6157851100a64c6da6 -size 4949486456 diff --git a/model-00023-of-00030.safetensors b/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b39f2e59ad8bcfe229cdaa2b93f26df7c1ba3d00 --- /dev/null +++ b/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:451afd8df31e627842ba482b23460e2c3237ec42f2e562a67fde041773584f35 +size 4986107432 diff --git a/model-00023-of-00060.safetensors b/model-00023-of-00060.safetensors deleted file mode 100644 index 8aaf58b1a552e823a4764d643cc95dd6bf323c3f..0000000000000000000000000000000000000000 --- a/model-00023-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e2cb32644db6c67df7233770f1e9f168a38f8a7b1a77fad16a8a2c9b702a1fdd -size 4949486464 diff --git a/model-00024-of-00030.safetensors b/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..387f4a7fed01bfd3245a6c165dc629c8fa71a5d3 --- /dev/null +++ b/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5efc1bbb874397e3d1cdf32cc486ade589e2b84e38ff407f07340279d747f3b8 +size 4986168760 diff --git a/model-00024-of-00060.safetensors b/model-00024-of-00060.safetensors deleted file mode 100644 index 593b78dfe45da51714365fabcbc0bfd6c09977a9..0000000000000000000000000000000000000000 --- a/model-00024-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c450e5e8aa6ec5d89ec7be9a954803bb7fd7f1bd9d5049d10c4bcba5a20fd549 -size 4917865000 diff --git a/model-00025-of-00030.safetensors b/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a722874e356cfdb7084d17efe8980f70e6399970 --- /dev/null +++ b/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4570598dfc8624f88229dc55fba3a61c76653a6e726c79cae5aa336d1e4f8bc0 +size 4881228880 diff --git a/model-00025-of-00060.safetensors b/model-00025-of-00060.safetensors deleted file mode 100644 index 28f203054073736fb65b9b23e73b5e98f0b80d82..0000000000000000000000000000000000000000 --- a/model-00025-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2a701bd5927ec566deb5c2b89cf9554e5aac56019ed714dbdb296da514462b3d -size 4844587552 diff --git a/model-00026-of-00030.safetensors b/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed82b3f80400e5c102578965af043d4490a48d13 --- /dev/null +++ b/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247a1a5fee8f5a809648abd3aab953f8efb3c1dbffced2e1a9f1037eab3f55b7 +size 4949489408 diff --git a/model-00026-of-00060.safetensors b/model-00026-of-00060.safetensors deleted file mode 100644 index 29958a35df96c9d178b51f5a55a70e7a60ad1b5a..0000000000000000000000000000000000000000 --- a/model-00026-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a890734d609dbff7e3aa5e504c1dccdab043955882b2a9ed24db83a4fe45e614 -size 4949486464 diff --git a/model-00027-of-00030.safetensors b/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..31a1833a52cc6072b53dfb5017d3b68dd26c2ad3 --- /dev/null +++ b/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b6d6698a284ab417de02498e9eb367bed0d52cf868e204cbe4ddea2e9952bc +size 4881228880 diff --git a/model-00027-of-00060.safetensors b/model-00027-of-00060.safetensors deleted file mode 100644 index 33983831079845cc1a44f43745da0c546c325f3f..0000000000000000000000000000000000000000 --- a/model-00027-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6642bd2c6ed3df366ecff3a49125ea4e8c3c1139e67f0b16874b86e9a6a5d40b -size 4949486464 diff --git a/model-00028-of-00030.safetensors b/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ef34d477bed11a61cead437debeafc0fcf8150a --- /dev/null +++ b/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fdb8789cc22ce912c5c54de99593ef77690cb52774e6f7d546b3b7f63ea6591 +size 4881228880 diff --git a/model-00028-of-00060.safetensors b/model-00028-of-00060.safetensors deleted file mode 100644 index 67aa6236102a8332778312d8ea11f8fc4e2e267e..0000000000000000000000000000000000000000 --- a/model-00028-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7a61281ac67053a1173417681de710d402e592bca97875e5469f83ab3772ba98 -size 4812966096 diff --git a/model-00029-of-00030.safetensors b/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07356d2306eaa3024dcfa63097b5b419c1eee42b --- /dev/null +++ b/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1a172c90932409740e4a36f166f9356c80f4519832d4919e345df3ecefd56d0 +size 4949489408 diff --git a/model-00029-of-00060.safetensors b/model-00029-of-00060.safetensors deleted file mode 100644 index 3b1afec3d7d216e08dd834fdc5491c06a2dfca5f..0000000000000000000000000000000000000000 --- a/model-00029-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7625707163a0d46413be839279742a9dab6fa9ec078a14c535fd40b848b490e3 -size 4949486464 diff --git a/model-00030-of-00030.safetensors b/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5745342a3ea6d9a0f1c4bf606942b3f14fccf66 --- /dev/null +++ b/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0362aef838feaa80c7a7bf33bff19fb250b3a17efab6299c61b5417e3452e34f +size 3850929176 diff --git a/model-00030-of-00060.safetensors b/model-00030-of-00060.safetensors deleted file mode 100644 index ffda1207574e2e7202bbaf110ef2f036654b7f7b..0000000000000000000000000000000000000000 --- a/model-00030-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f601b51cf6b526beec1bf68243a5a4f4753dc23aeccb6fd8bdac6cad76eeb360 -size 4949486464 diff --git a/model-00031-of-00060.safetensors b/model-00031-of-00060.safetensors deleted file mode 100644 index e807044274b9596a3a2485b59feab46289479336..0000000000000000000000000000000000000000 --- a/model-00031-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a0064153197ebde3fb5d2a062ce4683d78444181dbfdd26817dd4cb512126200 -size 4812966096 diff --git a/model-00032-of-00060.safetensors b/model-00032-of-00060.safetensors deleted file mode 100644 index 0a0a60ae8b6f79a3bf876d8a4202065c9bc63e28..0000000000000000000000000000000000000000 --- a/model-00032-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e7df95697e3fd35f1c8b60a1878a6ce72b635984bdddebcfdcf8270c0b018d9d -size 4949486464 diff --git a/model-00033-of-00060.safetensors b/model-00033-of-00060.safetensors deleted file mode 100644 index 07364e731efa415ae84248bb81231d1ac8456c66..0000000000000000000000000000000000000000 --- a/model-00033-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b4adf863de7a50a50923ea509fe2ee7752f6151daf190d2818198250d0425a0 -size 4949486464 diff --git a/model-00034-of-00060.safetensors b/model-00034-of-00060.safetensors deleted file mode 100644 index e563d2c38d0b8209f5263e8ed1b79f1ea1b1a98b..0000000000000000000000000000000000000000 --- a/model-00034-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:74678b711365357de1e56aef64c4e68cd29b49874a97e8630cfcd301d1654544 -size 4812966096 diff --git a/model-00035-of-00060.safetensors b/model-00035-of-00060.safetensors deleted file mode 100644 index 2ec647f01a0cfa3ba34499caa1441a6d06755f94..0000000000000000000000000000000000000000 --- a/model-00035-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f229396553d04ae49b7c1526c91ce05a903836df0c3419987c6f19c4b5d8d4e2 -size 4949486464 diff --git a/model-00036-of-00060.safetensors b/model-00036-of-00060.safetensors deleted file mode 100644 index dacbea2b4219158d475d729e3be141047c2c2564..0000000000000000000000000000000000000000 --- a/model-00036-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c395453025d0128286eb8fc3e81e0a787899094e126521074551bc327695b590 -size 4949486464 diff --git a/model-00037-of-00060.safetensors b/model-00037-of-00060.safetensors deleted file mode 100644 index e7ab467e5d8924675b22070d25b9688470484c33..0000000000000000000000000000000000000000 --- a/model-00037-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:07ebf6cea3fc353327f69f3d7b7762b676ae9cf3142c0dc861573671ba9d0660 -size 4812966096 diff --git a/model-00038-of-00060.safetensors b/model-00038-of-00060.safetensors deleted file mode 100644 index a45996b205e8c4437653eb46c91f917a83b42e93..0000000000000000000000000000000000000000 --- a/model-00038-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cc788a1f810be64f41af3758afd6206a744ac83b064517531f3b550a7541d1a1 -size 4949486464 diff --git a/model-00039-of-00060.safetensors b/model-00039-of-00060.safetensors deleted file mode 100644 index 32ddacec3f8811246b39df7096c26e46167080d3..0000000000000000000000000000000000000000 --- a/model-00039-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb8b1528a15938815db41a3d38e347c3ec5a7c3dd4eb4eac9bcbce6e1f934e74 -size 4949486464 diff --git a/model-00040-of-00060.safetensors b/model-00040-of-00060.safetensors deleted file mode 100644 index f028cf39218d9b69884015a277fb332698cc94b3..0000000000000000000000000000000000000000 --- a/model-00040-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af6387224282af6ca8cc238a10a39216bb8265635c1c2682b16e475bb29dd820 -size 4812966096 diff --git a/model-00041-of-00060.safetensors b/model-00041-of-00060.safetensors deleted file mode 100644 index 702e98df67ff5514009379d5bef10b81cd4411d5..0000000000000000000000000000000000000000 --- a/model-00041-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0a9b921bb0edd20b9f7c63fedf90de1206be02f7c9f810528ba0efc703bb50fa -size 4949486456 diff --git a/model-00042-of-00060.safetensors b/model-00042-of-00060.safetensors deleted file mode 100644 index bbd423707db78e0962d7e5a2ab9f7895cc8efd61..0000000000000000000000000000000000000000 --- a/model-00042-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:30624ba72a40b1ef0e53f66341f01632c6578e3d59cc0850728ddf1e00e66017 -size 4949486464 diff --git a/model-00043-of-00060.safetensors b/model-00043-of-00060.safetensors deleted file mode 100644 index 6785f4c929057cb2a9cf3fb8a27efd39bff182c2..0000000000000000000000000000000000000000 --- a/model-00043-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17c8b3aae7dc1bddcc1d904f8ef867e44d6b1bdaea2342a4901337ef34f60971 -size 4812966096 diff --git a/model-00044-of-00060.safetensors b/model-00044-of-00060.safetensors deleted file mode 100644 index 2c49fe5828b0498070b62bccbd21076e5057b02d..0000000000000000000000000000000000000000 --- a/model-00044-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d5fd7c4af2d6e514191ed0ebd90f1cadb2d41f88839a1c93c65281408f313df7 -size 4949486456 diff --git a/model-00045-of-00060.safetensors b/model-00045-of-00060.safetensors deleted file mode 100644 index 9f98a33de225e63ca945bf920eb8bc3abd3ad361..0000000000000000000000000000000000000000 --- a/model-00045-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:afa42f59eefb48573f5a60b837fa6256af422f4ae1ac7f0b8a3db3f30fa19b8e -size 4949486464 diff --git a/model-00046-of-00060.safetensors b/model-00046-of-00060.safetensors deleted file mode 100644 index a02a212442ee58650fe628e058670ff7ff71fb32..0000000000000000000000000000000000000000 --- a/model-00046-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:747f21337baa60b3f99875c70b440c439ade06a4c92359938da75eebf684bfcb -size 4812966096 diff --git a/model-00047-of-00060.safetensors b/model-00047-of-00060.safetensors deleted file mode 100644 index f5c9ca475758d5612c4a91768867b57ccaf41816..0000000000000000000000000000000000000000 --- a/model-00047-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:08e8c8f3f7014a219ee259ae27975cb96d52ba1dc9163e89b719fc677497d8b6 -size 4949486456 diff --git a/model-00048-of-00060.safetensors b/model-00048-of-00060.safetensors deleted file mode 100644 index 683c4c48907bb6bf8687e5ea8fed24521a5e3d0f..0000000000000000000000000000000000000000 --- a/model-00048-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6a54e2f7028348d13926ef7d8506c734606a1f09e65127f12de6d2a975c4dd66 -size 4949486464 diff --git a/model-00049-of-00060.safetensors b/model-00049-of-00060.safetensors deleted file mode 100644 index 6be47cf37f723a868920f200cb28ba789cac9071..0000000000000000000000000000000000000000 --- a/model-00049-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8fe419eaddbb97345f187dc7621ce05ab49c981de125d9c1dc0e73f650fb0982 -size 4917865000 diff --git a/model-00050-of-00060.safetensors b/model-00050-of-00060.safetensors deleted file mode 100644 index dd841a76dea446d3da6c59a5245bc13cae9c9928..0000000000000000000000000000000000000000 --- a/model-00050-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82e530fdd8fbc1ead654d80a3b4b4807746cb57af6db65585d37b9e0d41f7181 -size 4844587552 diff --git a/model-00051-of-00060.safetensors b/model-00051-of-00060.safetensors deleted file mode 100644 index 16dd75156a77cb70f6f5699b8f50a84b388f2d17..0000000000000000000000000000000000000000 --- a/model-00051-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:361e8589b26d57553bac9972b2941c96bf39b626c57dc8ff6dc48503627a2065 -size 4949486464 diff --git a/model-00052-of-00060.safetensors b/model-00052-of-00060.safetensors deleted file mode 100644 index a3dba4aee9ffea39f2bec0c0d838757b31225d1d..0000000000000000000000000000000000000000 --- a/model-00052-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc03cb4b9f1f70c66c727e257380800347aff5e3eeff552914da2afc5f407641 -size 4949486464 diff --git a/model-00053-of-00060.safetensors b/model-00053-of-00060.safetensors deleted file mode 100644 index 7874d782d4c2fec307d581aff2dd180e242df1e2..0000000000000000000000000000000000000000 --- a/model-00053-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:75b2508c595c7202b887b11250da3173e41fe6fd3c6f985a5130af20b9bf7fd5 -size 4812966096 diff --git a/model-00054-of-00060.safetensors b/model-00054-of-00060.safetensors deleted file mode 100644 index 028942beb66a3e9838726058bce416f8414bc403..0000000000000000000000000000000000000000 --- a/model-00054-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:124ed4f033cc7c2eb293fae7edbd4df61dd253a9095c5c76816979653cf11b69 -size 4949486464 diff --git a/model-00055-of-00060.safetensors b/model-00055-of-00060.safetensors deleted file mode 100644 index 5a7e86e622e97427dbe2381993f09c8fa16d3744..0000000000000000000000000000000000000000 --- a/model-00055-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06879bdb6beb8ed9216aae5b0bb9bd222f2054f0032fea5c61e2206a4d0d2ad0 -size 4949486464 diff --git a/model-00056-of-00060.safetensors b/model-00056-of-00060.safetensors deleted file mode 100644 index f5d35661e1f28d0325fb6872732973dcdcee29de..0000000000000000000000000000000000000000 --- a/model-00056-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bf0fdcaeb4972ea7d66b951bcafa7e84f3c7bb45c4261577c373d2d74be1091b -size 4812966096 diff --git a/model-00057-of-00060.safetensors b/model-00057-of-00060.safetensors deleted file mode 100644 index 7c1d69ee60f4d505aec70589f236e9ebccd6e445..0000000000000000000000000000000000000000 --- a/model-00057-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ef17a40394d2342a0045d0c239af41e9b72f7f2b61e3389716047d5efad4fb7a -size 4949486464 diff --git a/model-00058-of-00060.safetensors b/model-00058-of-00060.safetensors deleted file mode 100644 index 0a408cdd253f31706c2daddea0b1205cc916b2c2..0000000000000000000000000000000000000000 --- a/model-00058-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:df6c30170eaa87cdb6e6f72dcfd37268f43701239104cab54a0015d54cf896aa -size 4949486464 diff --git a/model-00059-of-00060.safetensors b/model-00059-of-00060.safetensors deleted file mode 100644 index dca821c6cedcc45ded0beed969b508ca0ee3258b..0000000000000000000000000000000000000000 --- a/model-00059-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:76ad5cb0a9474afe670385e44b15ca42f78125cacb53cafe5ce48b6669805a8a -size 4812966096 diff --git a/model-00060-of-00060.safetensors b/model-00060-of-00060.safetensors deleted file mode 100644 index 9bb7396dbfd2f95ba2337dc8bf6ffb386581c14e..0000000000000000000000000000000000000000 --- a/model-00060-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:22f679d7250f62d0996c837adb75d4a37bb5eccfc2c052f68e3e39cd3027c767 -size 3455120464 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 3cac9a21d8893a7476aa4cc36d4d620e6aa9e3e5..047eb1fe0b6464e579d69e0432cb9f02eaf34c6d 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,1250 +1,1250 @@ { "metadata": { - "total_size": 292655288320 + "total_size": 146327644160 }, "weight_map": { - "lm_head.weight": "model-00060-of-00060.safetensors", - "model.embed_tokens.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00060.safetensors", - "model.layers.0.input_layernorm.weight": "model-00002-of-00060.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00060.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00060.safetensors", - "model.layers.1.input_layernorm.weight": "model-00004-of-00060.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00060.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.gate.weight": "model-00016-of-00060.safetensors", - "model.layers.10.input_layernorm.weight": "model-00017-of-00060.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00017-of-00060.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.11.block_sparse_moe.gate.weight": "model-00017-of-00060.safetensors", - "model.layers.11.input_layernorm.weight": "model-00019-of-00060.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00019-of-00060.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.gate.weight": "model-00019-of-00060.safetensors", - "model.layers.12.input_layernorm.weight": "model-00020-of-00060.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00020-of-00060.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.13.block_sparse_moe.gate.weight": "model-00020-of-00060.safetensors", - "model.layers.13.input_layernorm.weight": "model-00022-of-00060.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00022-of-00060.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.gate.weight": "model-00022-of-00060.safetensors", - "model.layers.14.input_layernorm.weight": "model-00023-of-00060.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00023-of-00060.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.gate.weight": "model-00023-of-00060.safetensors", - "model.layers.15.input_layernorm.weight": "model-00024-of-00060.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00024-of-00060.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.gate.weight": "model-00025-of-00060.safetensors", - "model.layers.16.input_layernorm.weight": "model-00026-of-00060.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00026-of-00060.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00025-of-00060.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00025-of-00060.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00024-of-00060.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00025-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.gate.weight": "model-00026-of-00060.safetensors", - "model.layers.17.input_layernorm.weight": "model-00027-of-00060.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00027-of-00060.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.gate.weight": "model-00027-of-00060.safetensors", - "model.layers.18.input_layernorm.weight": "model-00029-of-00060.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00029-of-00060.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.gate.weight": "model-00029-of-00060.safetensors", - "model.layers.19.input_layernorm.weight": "model-00030-of-00060.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00030-of-00060.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00060.safetensors", - "model.layers.2.input_layernorm.weight": "model-00005-of-00060.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00060.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.gate.weight": "model-00030-of-00060.safetensors", - "model.layers.20.input_layernorm.weight": "model-00032-of-00060.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00032-of-00060.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.gate.weight": "model-00032-of-00060.safetensors", - "model.layers.21.input_layernorm.weight": "model-00033-of-00060.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00033-of-00060.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.gate.weight": "model-00033-of-00060.safetensors", - "model.layers.22.input_layernorm.weight": "model-00035-of-00060.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00035-of-00060.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.gate.weight": "model-00035-of-00060.safetensors", - "model.layers.23.input_layernorm.weight": "model-00036-of-00060.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00036-of-00060.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.gate.weight": "model-00036-of-00060.safetensors", - "model.layers.24.input_layernorm.weight": "model-00038-of-00060.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00038-of-00060.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.gate.weight": "model-00038-of-00060.safetensors", - "model.layers.25.input_layernorm.weight": "model-00039-of-00060.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00039-of-00060.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.26.block_sparse_moe.gate.weight": "model-00039-of-00060.safetensors", - "model.layers.26.input_layernorm.weight": "model-00041-of-00060.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00041-of-00060.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.gate.weight": "model-00041-of-00060.safetensors", - "model.layers.27.input_layernorm.weight": "model-00042-of-00060.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00042-of-00060.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.28.block_sparse_moe.gate.weight": "model-00042-of-00060.safetensors", - "model.layers.28.input_layernorm.weight": "model-00044-of-00060.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00044-of-00060.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.gate.weight": "model-00044-of-00060.safetensors", - "model.layers.29.input_layernorm.weight": "model-00045-of-00060.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00045-of-00060.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00060.safetensors", - "model.layers.3.input_layernorm.weight": "model-00007-of-00060.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00007-of-00060.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.30.block_sparse_moe.gate.weight": "model-00045-of-00060.safetensors", - "model.layers.30.input_layernorm.weight": "model-00047-of-00060.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00047-of-00060.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.gate.weight": "model-00047-of-00060.safetensors", - "model.layers.31.input_layernorm.weight": "model-00048-of-00060.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00048-of-00060.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.gate.weight": "model-00048-of-00060.safetensors", - "model.layers.32.input_layernorm.weight": "model-00049-of-00060.safetensors", - "model.layers.32.post_attention_layernorm.weight": "model-00049-of-00060.safetensors", - "model.layers.32.self_attn.k_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.32.self_attn.o_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.32.self_attn.q_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.32.self_attn.v_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.gate.weight": "model-00050-of-00060.safetensors", - "model.layers.33.input_layernorm.weight": "model-00051-of-00060.safetensors", - "model.layers.33.post_attention_layernorm.weight": "model-00051-of-00060.safetensors", - "model.layers.33.self_attn.k_proj.weight": "model-00050-of-00060.safetensors", - "model.layers.33.self_attn.o_proj.weight": "model-00050-of-00060.safetensors", - "model.layers.33.self_attn.q_proj.weight": "model-00049-of-00060.safetensors", - "model.layers.33.self_attn.v_proj.weight": "model-00050-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.gate.weight": "model-00051-of-00060.safetensors", - "model.layers.34.input_layernorm.weight": "model-00052-of-00060.safetensors", - "model.layers.34.post_attention_layernorm.weight": "model-00052-of-00060.safetensors", - "model.layers.34.self_attn.k_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.34.self_attn.o_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.34.self_attn.q_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.34.self_attn.v_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.gate.weight": "model-00052-of-00060.safetensors", - "model.layers.35.input_layernorm.weight": "model-00054-of-00060.safetensors", - "model.layers.35.post_attention_layernorm.weight": "model-00054-of-00060.safetensors", - "model.layers.35.self_attn.k_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.35.self_attn.o_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.35.self_attn.q_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.35.self_attn.v_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.gate.weight": "model-00054-of-00060.safetensors", - "model.layers.36.input_layernorm.weight": "model-00055-of-00060.safetensors", - "model.layers.36.post_attention_layernorm.weight": "model-00055-of-00060.safetensors", - "model.layers.36.self_attn.k_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.36.self_attn.o_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.36.self_attn.q_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.36.self_attn.v_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.gate.weight": "model-00055-of-00060.safetensors", - "model.layers.37.input_layernorm.weight": "model-00057-of-00060.safetensors", - "model.layers.37.post_attention_layernorm.weight": "model-00057-of-00060.safetensors", - "model.layers.37.self_attn.k_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.37.self_attn.o_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.37.self_attn.q_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.37.self_attn.v_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.gate.weight": "model-00057-of-00060.safetensors", - "model.layers.38.input_layernorm.weight": "model-00058-of-00060.safetensors", - "model.layers.38.post_attention_layernorm.weight": "model-00058-of-00060.safetensors", - "model.layers.38.self_attn.k_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.38.self_attn.o_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.38.self_attn.q_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.38.self_attn.v_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.gate.weight": "model-00058-of-00060.safetensors", - "model.layers.39.input_layernorm.weight": "model-00060-of-00060.safetensors", - "model.layers.39.post_attention_layernorm.weight": "model-00060-of-00060.safetensors", - "model.layers.39.self_attn.k_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.39.self_attn.o_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.39.self_attn.q_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.39.self_attn.v_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00060.safetensors", - "model.layers.4.input_layernorm.weight": "model-00008-of-00060.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00060.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00060.safetensors", - "model.layers.5.input_layernorm.weight": "model-00010-of-00060.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00010-of-00060.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.gate.weight": "model-00010-of-00060.safetensors", - "model.layers.6.input_layernorm.weight": "model-00011-of-00060.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00011-of-00060.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.gate.weight": "model-00011-of-00060.safetensors", - "model.layers.7.input_layernorm.weight": "model-00013-of-00060.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00013-of-00060.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.gate.weight": "model-00013-of-00060.safetensors", - "model.layers.8.input_layernorm.weight": "model-00014-of-00060.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00060.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.9.block_sparse_moe.gate.weight": "model-00014-of-00060.safetensors", - "model.layers.9.input_layernorm.weight": "model-00016-of-00060.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00016-of-00060.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00060.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00060.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00060.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00060.safetensors", - "model.norm.weight": "model-00060-of-00060.safetensors" + "lm_head.weight": "model-00030-of-00030.safetensors", + "model.embed_tokens.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00030.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00030.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00008-of-00030.safetensors", + "model.layers.10.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00009-of-00030.safetensors", + "model.layers.11.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00010-of-00030.safetensors", + "model.layers.12.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00010-of-00030.safetensors", + "model.layers.13.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00011-of-00030.safetensors", + "model.layers.14.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00012-of-00030.safetensors", + "model.layers.15.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00012-of-00030.safetensors", + "model.layers.16.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00013-of-00030.safetensors", + "model.layers.17.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00014-of-00030.safetensors", + "model.layers.18.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00015-of-00030.safetensors", + "model.layers.19.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00002-of-00030.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00015-of-00030.safetensors", + "model.layers.20.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00016-of-00030.safetensors", + "model.layers.21.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00017-of-00030.safetensors", + "model.layers.22.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00018-of-00030.safetensors", + "model.layers.23.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00018-of-00030.safetensors", + "model.layers.24.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00019-of-00030.safetensors", + "model.layers.25.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00020-of-00030.safetensors", + "model.layers.26.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00021-of-00030.safetensors", + "model.layers.27.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00021-of-00030.safetensors", + "model.layers.28.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00022-of-00030.safetensors", + "model.layers.29.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00003-of-00030.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00023-of-00030.safetensors", + "model.layers.30.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00024-of-00030.safetensors", + "model.layers.31.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00024-of-00030.safetensors", + "model.layers.32.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00025-of-00030.safetensors", + "model.layers.33.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00026-of-00030.safetensors", + "model.layers.34.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00026-of-00030.safetensors", + "model.layers.35.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00027-of-00030.safetensors", + "model.layers.36.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00028-of-00030.safetensors", + "model.layers.37.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00029-of-00030.safetensors", + "model.layers.38.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00029-of-00030.safetensors", + "model.layers.39.input_layernorm.weight": "model-00030-of-00030.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00030-of-00030.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00004-of-00030.safetensors", + "model.layers.4.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00004-of-00030.safetensors", + "model.layers.5.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00005-of-00030.safetensors", + "model.layers.6.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00006-of-00030.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00007-of-00030.safetensors", + "model.layers.8.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00007-of-00030.safetensors", + "model.layers.9.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.norm.weight": "model-00030-of-00030.safetensors" } }