diff --git a/.gitattributes b/.gitattributes index dddf6338a57eda48b64064c1d686aea314b5c439..7e6f438db0fe867cd0edf03b1418e0b3a1c1f197 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8047,3 +8047,164 @@ neuronxcc-2.17.194.0+d312836f/MODULE_d1b677efdc213d35a822+7e4da68b/wrapped_neff. neuronxcc-2.17.194.0+d312836f/MODULE_ddb4b83b834889a5553c+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.17.194.0+d312836f/MODULE_fa4a1715cab5dbc60401+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json new file mode 100644 index 0000000000000000000000000000000000000000..5cabd82efb84d5e0fcd591d8276b5615264f5c34 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/513de6b2506332c5b9f1.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ctx_batch_size": 1, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 1, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json new file mode 100644 index 0000000000000000000000000000000000000000..53b9a8e6d4bf97e8891cc092df98161955e8b483 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/94c61502e79bb36d4b48.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ctx_batch_size": 2, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 2, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json new file mode 100644 index 0000000000000000000000000000000000000000..c0ecae3aca95f6d474125537a85d0719e083708f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev4/llama/llamafactory/tiny-random-Llama-3/e6729e799b90f142688d.json @@ -0,0 +1,79 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "ctx_batch_size": 1, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev4", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 1, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json new file mode 100644 index 0000000000000000000000000000000000000000..241210f23e0405ff535bcd7746493a24b3821674 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/granite/ibm-granite/granite-3.1-2b-instruct/b3a1fba358c17db868cd.json @@ -0,0 +1,51 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "ibm-granite/granite-3.1-2b-instruct", + "_task": "text-generation", + "architectures": [ + "GraniteForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.1, + "attention_multiplier": 0.015625, + "embedding_multiplier": 12.0, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "logits_scaling": 8.0, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "granite", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "ibm-granite/granite-3.1-2b-instruct", + "checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 40, + "num_key_value_heads": 8, + "residual_multiplier": 0.22, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 5000000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 49155 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json new file mode 100644 index 0000000000000000000000000000000000000000..688bd6be0f8633f0d6ca1cad9dbbaba652ae68e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/2265c570ad91bd59bb02.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json new file mode 100644 index 0000000000000000000000000000000000000000..6386bcb91a634eb25c024e21f14c1feefbce542b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/eed40aeb6cee419f447f.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json new file mode 100644 index 0000000000000000000000000000000000000000..a3acb9b5b9afe4c1481f59f0077003a8feb76e77 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/deepseek-ai/DeepSeek-R1-Distill-Llama-70B/fccf90563714271da2a8.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "deepseek-ai/DeepSeek-R1-Distill-Llama-70B", + "checkpoint_revision": "b1c0b44b4369b597ad119a196caf79a9c40e141e", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 24, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 24, + "vocab_parallel": false + }, + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json new file mode 100644 index 0000000000000000000000000000000000000000..8b8bdb435ab88695a10bdb0a5135d3f16938eef9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/0c38f1a1c11d2ca23067.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json new file mode 100644 index 0000000000000000000000000000000000000000..054a282284428a4d05af9ae4d6808806a7aab632 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/3181882551441281ffc4.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json new file mode 100644 index 0000000000000000000000000000000000000000..b4d6243e5d4b8e3d9fb0d15be007b78f7087594c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/59c2ac4deb5876ce233e.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json new file mode 100644 index 0000000000000000000000000000000000000000..7fdd07bd374d7f5da5e783c14da2c82c20336a33 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/5cd8dc3fd87fbbb5bee6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 131072, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 131072, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 131072, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json new file mode 100644 index 0000000000000000000000000000000000000000..77831aa0c3a4a6c1269b734f8eedd4e0ffe5cad6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/7fb3de63efc7f50b8c3e.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json new file mode 100644 index 0000000000000000000000000000000000000000..41be769517c9937d61a171d6722e062842b647d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/b4c26214cb4b9de35645.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json new file mode 100644 index 0000000000000000000000000000000000000000..8ddd8456c15777b7d9f078e2585e326e83f5902f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/d970cf435a01c931891b.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json new file mode 100644 index 0000000000000000000000000000000000000000..dab7dde7fb0b6d9f6a796e2c4d9bd0b1449a0138 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e2fa63eb39084b138562.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 131072, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 131072, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 131072, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json new file mode 100644 index 0000000000000000000000000000000000000000..da020dfa113114e31fb18ef15a7f579b9887749e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/e58176371a82a2c42de6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json new file mode 100644 index 0000000000000000000000000000000000000000..617bc7eaf3c72d0cef8a7359eca36759e8a14cde --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/llamafactory/tiny-random-Llama-3/edad95801000b2eb5ff8.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "llamafactory/tiny-random-Llama-3", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 4, + "hidden_act": "silu", + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 64, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 2, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "llamafactory/tiny-random-Llama-3", + "checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 2, + "max_context_length": 100, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 100, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 100, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 4, + "num_hidden_layers": 2, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json new file mode 100644 index 0000000000000000000000000000000000000000..e618799b3dc350ffc5bf05df0005a0f4a824c418 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Llama-3.2-1B-Instruct/a21dff1f796befca42cc.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Llama-3.2-1B-Instruct", + "checkpoint_revision": "9213176726f574b556790deb65791e0c5aa438b6", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json new file mode 100644 index 0000000000000000000000000000000000000000..d74b1f6cad110aadc6f7ba864a4e56a05169fc94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/1c6cc851d88b10f70611.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json new file mode 100644 index 0000000000000000000000000000000000000000..a321dd8afdf5bda01b0e16b81dc8a26ffa956888 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/25288e331f1cf66f02d6.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json new file mode 100644 index 0000000000000000000000000000000000000000..d38172b14ae7a489c79aaccac26fc3f196e86e9d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a0360a2aab05149b5ed.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 64, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 64, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json new file mode 100644 index 0000000000000000000000000000000000000000..9e80009b51724d4cae514746d961ea6203139c38 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/2a8ae18c973b94646af4.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json new file mode 100644 index 0000000000000000000000000000000000000000..966875a25609e1452ad6022778b369cb4183b467 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/48fd484fde912c3c9981.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json new file mode 100644 index 0000000000000000000000000000000000000000..cbb04b68014d09b30996e4cb8edb19c939320090 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/61ec5ee35df13f5203e3.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 1, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "collectives_layout": "HSB", + "continuous_batching": false, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 8 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json new file mode 100644 index 0000000000000000000000000000000000000000..146b222d1439e631a573272f85c4ccd50d18e91b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/690a9eef6000b3a2bbed.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 16, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 16, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json new file mode 100644 index 0000000000000000000000000000000000000000..91fb853613704680bf9fb154cddef54f6100e7c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/738e74927966314ed1c8.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json new file mode 100644 index 0000000000000000000000000000000000000000..5c45d0daf631e4aba99856997a9a7b5c497ee565 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/9b33c62e0648eb870335.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 8, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 8, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json new file mode 100644 index 0000000000000000000000000000000000000000..1a68633a93ac094b5515b50dbf2a64d258e0faa2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/meta-llama/Meta-Llama-3.1-8B/cddf83ce508409c44d25.json @@ -0,0 +1,77 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "meta-llama/Meta-Llama-3.1-8B", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 32, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "meta-llama/Meta-Llama-3.1-8B", + "checkpoint_revision": "d04e592bb4f6aa9cfee91e2e20afa771667e1d4b", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 8, + "logical_nc_config": 1, + "max_batch_size": 32, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 8, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa6594271b8fda59471c29473dd49a2936512e1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/01c51b5f669289b2eb04.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json new file mode 100644 index 0000000000000000000000000000000000000000..25f453a6d16c427e489c7f51db7cd9d31c8a90f9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0bdd17a350c28485d969.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json new file mode 100644 index 0000000000000000000000000000000000000000..009ed8368b9e52576ef542010e5123c472821b29 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0f3fbabe5ed533277bf9.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": null, + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 5, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json new file mode 100644 index 0000000000000000000000000000000000000000..7a0150d747f7111bb7d09073bb9dd6f632b6de5c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/142a929213c01997fffc.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json new file mode 100644 index 0000000000000000000000000000000000000000..24b50cb006fe8c6d27d24e4532d105bf1ee0e27a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/3023619cecc7f9cbaf9a.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json new file mode 100644 index 0000000000000000000000000000000000000000..f6af62b36854f68acb3aeb56cc7103e3526aa930 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/33a6d4289f8b2eba4ff2.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": false, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json new file mode 100644 index 0000000000000000000000000000000000000000..8e03bd917935620fe9c6a218c5f7dbbee980ab25 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/458eb7b3b111db07e053.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json new file mode 100644 index 0000000000000000000000000000000000000000..53fdf98f139b406dff01372eb53c6993b71beefb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/68b1648075a9c57bbbf0.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json new file mode 100644 index 0000000000000000000000000000000000000000..0bb70999a12776cd0fe1eab73c2f267e89a8f9b9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/ab9fe256f5b14c61d847.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "ctx_batch_size": 4, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 4, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json new file mode 100644 index 0000000000000000000000000000000000000000..2c732950a77e807e7548b4da8c2b945462d1c0f1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/aef9dbaa8849e9c96f95.json @@ -0,0 +1,56 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "BSH", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json new file mode 100644 index 0000000000000000000000000000000000000000..748845f910c5022dd9833990c6e1e213606c94c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd751009690e75e22350.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json new file mode 100644 index 0000000000000000000000000000000000000000..1f4732a7b49fb7cf527f73c032c204dbf0d3ef06 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bd96975ba59ea098e5c6.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ctx_batch_size": 1, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 4, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json new file mode 100644 index 0000000000000000000000000000000000000000..289a05142c5150167add32bb376d6738a0fe7d43 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f7b6a3b0f3b1c18b5df8.json @@ -0,0 +1,78 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 1, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": null, + "checkpoint_revision": null, + "continuous_batching": false, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": false, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 1, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "bfloat16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "torch_dtype": "bfloat16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json new file mode 100644 index 0000000000000000000000000000000000000000..bc0980857d8726571921e874572072ab5fa5ce37 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/llama/unsloth/Llama-3.2-1B-Instruct/fdb451e918153518b628.json @@ -0,0 +1,80 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "unsloth/Llama-3.2-1B-Instruct", + "_task": "text-generation", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "async_mode": false, + "attn_kernel_enabled": false, + "batch_size": 4, + "capacity_factor": null, + "cc_pipeline_tiling_factor": 2, + "checkpoint_id": "unsloth/Llama-3.2-1B-Instruct", + "checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c", + "continuous_batching": true, + "ctx_batch_size": 4, + "enable_bucketing": false, + "ep_degree": 1, + "flash_decoding_enabled": false, + "fused_qkv": true, + "glu_mlp": true, + "is_chunked_prefill": false, + "local_ranks_size": 2, + "logical_nc_config": 1, + "max_batch_size": 4, + "max_context_length": 4096, + "max_topk": 256, + "mlp_kernel_enabled": false, + "mlp_kernel_fuse_residual_add": false, + "n_active_tokens": 4096, + "neuronxcc_version": "2.17.194.0+d312836f", + "num_cores_per_group": 1, + "on_device_sampling": true, + "optimum_neuron_version": "0.2.0.dev5", + "output_logits": false, + "padding_side": "right", + "pp_degree": 1, + "qk_layernorm": false, + "qkv_kernel_enabled": false, + "rpl_reduce_dtype": "float16", + "sequence_length": 4096, + "sequence_parallel_enabled": false, + "speculation_length": 0, + "start_rank_id": 0, + "target": null, + "tkg_batch_size": 4, + "torch_dtype": "float16", + "tp_degree": 2, + "vocab_parallel": false + }, + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "use_cache": true, + "vocab_size": 128256 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json new file mode 100644 index 0000000000000000000000000000000000000000..9ef8385aa23fe097c3a97b83ffc8ed33af523eab --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/phi3/microsoft/Phi-3-mini-4k-instruct/d448cc693abaa936183b.json @@ -0,0 +1,55 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "microsoft/Phi-3-mini-4k-instruct", + "_task": "text-generation", + "architectures": [ + "Phi3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "microsoft/Phi-3-mini-4k-instruct--configuration_phi3.Phi3Config", + "AutoModelForCausalLM": "microsoft/Phi-3-mini-4k-instruct--modeling_phi3.Phi3ForCausalLM" + }, + "embd_pdrop": 0.0, + "hidden_act": "silu", + "hidden_size": 3072, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 4096, + "model_type": "phi3", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": false, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "bf16", + "batch_size": 4, + "checkpoint_id": "microsoft/Phi-3-mini-4k-instruct", + "checkpoint_revision": "0a67737cc96d2554230f90338b163bc6380a2a85", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": true, + "group_query_attention": null, + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 32, + "original_max_position_embeddings": 4096, + "partial_rotary_factor": 1.0, + "resid_pdrop": 0.0, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "sliding_window": 2047, + "tie_word_embeddings": false, + "use_cache": true, + "vocab_size": 32064 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json new file mode 100644 index 0000000000000000000000000000000000000000..46cc725cbe57f312fa9af1ccc3ad30f600bc9cdc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/0_REGISTRY/0.2.0.dev5/qwen2/Qwen/Qwen2.5-0.5B/b752b9c4c49cbb36b712.json @@ -0,0 +1,49 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "Qwen/Qwen2.5-0.5B", + "_task": "text-generation", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "neuron": { + "_serialized_key": "HloNeuronConfig", + "all_reduce_dtype": null, + "allow_flash_attention": true, + "attention_layout": "HSB", + "attn_output_transposed": false, + "auto_cast_type": "fp16", + "batch_size": 4, + "checkpoint_id": "Qwen/Qwen2.5-0.5B", + "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987", + "collectives_layout": "HSB", + "continuous_batching": true, + "fuse_qkv": false, + "group_query_attention": "shard-over-heads", + "log_softmax_scores": false, + "neuronxcc_version": "2.17.194.0+d312836f", + "optimum_neuron_version": "0.2.0.dev5", + "output_all_logits": false, + "sequence_length": 4096, + "tp_degree": 2 + }, + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 32768, + "tie_word_embeddings": true, + "use_cache": true, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 151936 +} \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4791d130af9f8109b29e6bebf26d5d680a2c5474 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ff37e12281fa8f044284d20485952c2af497d3bb2bfcf50dd83e0572ef21aa4 +size 53031 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c72573bf7eaa7c712e08eec11936737b603fe090 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0449610f6fd4f6f631e1+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca18599ce5eec9b1297728172da2463337ab3ee0bbbb53457c2a8561f9305fe +size 213976064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d5702208316ed7c035c347fd4f87571e4585ab78 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7544aa0228f9807878b3593b3d9365ff45083234a699e7a6bf47b13bafd78d +size 136713 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..dcea471796badc25e42350934288e7df95ebdeef --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_046927b427b44084e1bc+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37775bcf0f850099391bfb1e72da4cd5ceae7bfc2e9165983bf8ffd016057a2 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb6f2723d1f18312aaf8a1944971fe98c41bf53e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918100fe462b8b8d4a67a7edf8bc9148a2c0d891ab575554f4055604f3838f10 +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e76137ecebf61d0022849d22820e9d2f0bb73098 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4196cbad5f93409e7072d3f84c49467f52bf5c8401cdc6044319f4d999e3ee1 +size 2724864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..574f13d205ec58fe72edb9e796c4669649a4b118 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_06542fbce823360bb08a+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc744c22e9c0e5d585da942928d2aed8853154a05d0dd0546c211df5c3b7e74 +size 2862808 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3bad8a326e2a2910cf7cfa0e4598374dbf4d631d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812e7533a893b01d4889fd56aed08a4d04c176a543db1294b6993942845906cc +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..19b74f99d7005f6baf16a7a958fbd178265724f9 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_0672b5f56c1c30461234+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..36cde0271029f7b29a60c9d2d9cb87791e050675 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdda8f0916cbf4ea683a5b82d37da6d44e3d8594ffb075276862d47c9784b5c6 +size 421229 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..34118995a1a3bb90594e68d91c285e37a4bb9806 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_072764f63c9851e5dac5+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6350f78e621de70a884d9bdd3d20101c72514fc6c874594458c2f6d86e12cdd5 +size 30209024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1053224520467247466420abf84d766d540de5dd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbb43375f225e75f29624572006b6531f223eb07afc06f0f084b46ef315750fc +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3cbd0d0dce0fa8ef87d28cd3d3c5630e4f86d58 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_088cee8e523341202b27+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..84b92a2975c1eb782d2da9454e7bc14e4881db04 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c0f4f84c58d9d114a5eaaa32456932c142d3aa585d6196c105de1c1d210d188 +size 136009 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ba883d9e21b72f8230e91bc78e6eaa99150aa521 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_08d454a8ab7b360279f0+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943fc968a87a68bb3041bc5845f1465523a85d1a8d40dc14703de0ca92ece72f +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4f7368c5fc23301824858e79bd10efb84445403e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa4617fa284fce823ef66375cfde6242eebf08b02012e074f5a5f3abbb31599 +size 55270 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3bddd9446cdeb976c3fe5b7e9eec5c26da5b336a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_09f28c99662b24ed3e19+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7087ea495089609f4e18abc94cdec01e15f84a0cc008b5d70d4e23bfe968d715 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f835360a753047b6e1da3944147288e5a1e52db3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3f3dd80294c9051a9e8ff1f5e4eb5a646e3b8c6dbbd4c5a6aa8a6d237217298 +size 375531 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0684b0d41ced659f681c7e5452941a169718fa60 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e9e6ca1ebbe07cae334441774e22672f454eb4834a34019ac399084d261cb2 +size 1516544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..13b0a7330d5a371039457f253406976ffe6d23e2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0b8b4fa5620a4855f332+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:348b9649b6b8a7df2a1e571bab067492308d199a24c2ca9e75f24a483e150dba +size 1603514 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..793e479ec0e97d4082c9492e96f85d6e7ad508cc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb9ce39a84c7039460a99091ac81bc3cae059fa3e84d41889117abb0046577ca +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b26675f6ab21b02eed2280291e95e86dc119b771 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c567a566aed45527217+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:982dbdbe9769b168a9badbde0a61aa4248a9d8e90f9cfd3bb49fadc04bd59929 +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..17e465b29500287dfc421c8d743d31fbf97ec159 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d306ef1a91c6392e7b30deb0b89e40d577ce203876c1cd05ebf43e75ea116d00 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..67072b4f8ca1b229d5edc270b6d19e5e1faf6324 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0c721d432da3bc2ea161+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998cf0bdf7be0346d1d879365761c2b59e43e60f7d259d938d7da977733d9953 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7944191e21d64907a94ba6b5d12c4fa10e63e1ac --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ec18053447cb1cb442bcbaf59b49fa97c94742a8a19f2dd46422c0d267e465 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2fb9fdcc9f931f5579ddf232fdad18f8e8c41d82 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_0f7c042dbac93571be00+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0e50b96848979c053ec9ab2a6c2c642b1dfb032a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f3824fa2b74064f4a61d73062b0f353faa3ac1e01342ca30b3854cf84258163 +size 851466 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8063ad511f39870498c7e2bb6ab9f3ca152a2bed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_185f656153cbbaa5e1df+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd57a925819ece1b01115e8151a897c5223e0f9ce66671e4a3662e1646729e5 +size 36301824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c4854e080c49a63ceabc900490895c416012dbea --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f43e3311b9d83a4aa162f61115cf21df7aeb47f9c912a5f4dcf725fe278d5a +size 84594 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fece9fb9c6bf1d0cd82251fc87a66ebbe7d79253 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_194f7386f653405a01e3+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4547ad6f958efc355d8e9f9d51bcf1cef08dc37ade07690de39371eea802d5b0 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4b2e055f4587f249ca92d4e481b64e432e7ef85f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1ab9a9bc8b76147bb93389fa66a068ae4c026c6c315202afb6f1d545d194317 +size 45682 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e7d7011602f07a9edc9bf084536c2fd24a792e47 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a121861eb8eff7e1a738ec932e6c457cabbf469f83f7c5ce0ad03a4523a73670 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cbd9791e4f13766a60b00397f93642f4c27802ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1b15611e7d6b3a533309+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fb365e8e4d3c997bd2477f41da3bfa46d08fd899a9ff115620c9e1a9a88932 +size 162290 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..985e1e6ab81169edc4faa391269ed2be96a85322 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc5f8d4de9d781f0419a893cec6dc3bc62ac2040ce88b930d2931ab638a4e78 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ae8b79dac162be357387dc957de9a18019f4bf6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_1dd0a5da054703b15bff+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80fb0c317021aaef844fef7b99e44c023249d59c7425e85fb92ca347198d7bf +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a4c575fedb2406e8ade30ce340cfb7e9baebde85 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2137971bea207bdb91acb9be1bf33aaac12c4db06939596e029b04398282cf4 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..34604fb026d0abcb664c38e460711a7b33bc54ca Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_210699c2baa09235e112+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2d178778fbebccf6842fb5503485013d618d5590 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b1571bed76d1fcca273b4f1da1762454696fd38aee799df079f2aa9696cc10 +size 850865 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c9b27e3d0bfe7ef18e9bc11889ca3924e1de06c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_253544b6f2aff9508d28+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7bf03400cd6f77525afba5bbc885e5d0c316eca3ac5f44f8713809e32a49337 +size 32277504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7a327a1138aedf7f679c509b1717ad2e471c2e8e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82a4468f10d230f344133e3d413f1e6e21a9fc56f972c9a4ef54659b147d5a69 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..06f7aab2f291bf22dd3117e208b56315b5f83a6a Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_25cb3acfd7931dee158b+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0c6af5f04f59e9b17083d7c10bed6bfc49081ff --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e8148ead06342f4043259e16a8c99b3d5ea22f0a254da9cff35dd9a98a11ed5 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6e37af12b5260fbd338350f7e706806702f22696 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_2b992eb92f20e92de727+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..89cdbad77bd929cd20a67a65b9c0ecc9bc44b2c7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:606694f40496858e48b042dadaf2b67060c2a2f5cc13983ebfaaeda38a7db4de +size 47194 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..550ff5328646eebf7080b6946019d1abcad9bb2f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2f253907beefd0240403+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd661e28ecdfaf1c4b1362d7e3080d5f2dbd12f26b1036af5435f53209c8c7aa +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c71bcbe9639c7b4737b3e74f7801ffeda76a5062 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:295293f8c7852ed834baefc0e78cda5c135b1ccd577368fec9d355c510b44db6 +size 410439 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1d12df5e07e3b6bf7ae8be2549d157db4628938d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_2fc30eedacc5c737de4d+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9544998680efbb4525c277d1109272405d7f48612af16b4763f1894d25f4dd +size 31785984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3010733f7f882761ebd8+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3010733f7f882761ebd8+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3010733f7f882761ebd8+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3010733f7f882761ebd8+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3010733f7f882761ebd8+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..027671a48d9242be7d6e5900f158f50343c2e203 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3010733f7f882761ebd8+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373713a19c37d0213b439380ffb05009fae9869c3a6e00b41dbca565af9b6da5 +size 425388 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14e11b85ff0f8dababf2884ecab66b4a16beed66 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05801cd3263f8135e8ce8fd086b32fb1d934a8d9ae7c1685a8745f7d245f9323 +size 69051 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0937180031ae393d94924f5b4f693ad2ad5ecdd1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34a6b42796c8b4e2f58b+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075eab47089f6b5408189c16efa63c02b2be7813b34cf32f7ac353459842c720 +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0a49e90ce2887698d8f02167aa54f4ea966b086c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5da061dda32ca95842d9626ad2065911721bbc14196b4937d3705edde9b64c +size 774401 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b85d06d77ad46a3483ec224579d3adeaf0843be3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82431da774c7a22311535f345020110bc1bdc5b5bc60a48cc6cffdce209385b0 +size 3789824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0a89d37ed6e2834857a8a55d1b787304b4340341 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_34acc8dc9a4c77982018+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48a951a0fcd160dc01ce3c77887bdcba244ae3e72914001a3c9853835d7d414 +size 3927641 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..39314d95f24aba62f67d739ff4bd0a2273367e4a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dac56a2f8851e6baf8454739874e4cb3a5d674decc93dab98f310fafcfe48de8 +size 445394 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..6c550beacc7509931de7d0cb5be3a3cdb9d3354b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3623ca04e2134aeaae63+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73b60d845131590a01fcad48ce5136b7858378370b5a460570975fd36ea9074 +size 6267904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ccdba1e68b2f4f102baef4d511f5438ee0e0d3fc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a17898121f429cebb326d20cf0cf3bad35bc578e61f0e538cc86f32e226fe7e +size 42844 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b1714398e11ee2141fc4a374f39cf3eb7256963a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_367117cad7f17b720ab0+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6070a18334fd7917ac0f734a57eb2e5faffd2aadfd00f37ce124c4d011545d3c +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1c52faa3b0db6f1d0863e0ca75793455aed154e3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a3dcd2ead65060dc66788a9e49031115ab3ff754df2cb0d10fe04c93f0b53c +size 774385 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..edae4a5480ff3ff2fbded6ea977eb8f6f701896c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1009593c6f634c326a069d9c227a4e5cb9cfd27aa5ee3989d61d858b2573bb12 +size 3103744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1d3831c615d914b06a6f02b71d8b7732ad068c6f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_36767b8c2b2c1a0abc7d+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c354b3cd9828d7f2de90962a714d173d592082f973408d8077689765ab30e56 +size 3241561 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..68ecd7d2b843f41cef63262c50ae56ad5abec956 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea52d380fe4f37859998791e1672cdc8e24a757aadf3c9954d545acd81742d5d +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7b68692ad8b0db4fdda14db6547e510ce4f1d861 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_38e85f0b1222d1fa5d21+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837753189e7d070d26672d57f1a682fed9737ab7d3094ec0fdb251419d09e97d +size 7742464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..910e47f244eee704195cfb5d1588096585bba1f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a02e9ccc29e6da3480938961adc21cdd190bad8cd98fa2bcd5a16fc6e185395 +size 448531 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..105e4c9bd59e1aa99c1a0204ec75c2bc3d4d0b6d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3957da6ebe3dd483b584+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c00dee85928a1f9b7ae0871aedd0ebccb032c8700adbeaafe94d93f3a79ef680 +size 25181184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8fa1fd16b97a00b738414066438e3b279dfd9ab --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd11f9c02b52d7f705aeb808f36b3f9f29a503ad5149d528b8a37c590182676 +size 413964 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e87409e7860463d95346134f216e6aebdfe3a871 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed184f3f77a84fc4dff596704bc40263d775e8c199cfc8eb838654670074f5dc +size 2212864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ada5ead57a487761dbe24c6f352f6ada2f8b2fdc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_395c548dd9a6d3722142+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f7e8c7c4bcf9fbac51e9cb9f08b09a3bb1fe949993e0e221e2926a9b12c0d4f +size 2282608 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..677c422431e1773c81e112a267e1429e11bf6ae5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e9655d393e7edc003fd49e54ad3933f831525d8b4dbc2f8ffd75aee40f1841 +size 43178 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..837be56a9c9c97288ab7310ef68119984d5c3b11 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3a93504c3328b48600ae+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c9399329c2ede8a1c26b8239a26f682070553b835ce000d8308568a1848b52 +size 164864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d0ebee16350c14edc8ccac8a7f7d40bd6e5c8c9b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e075c7397ee8a0808897a32296a37ab9fd3074d51eafcd3c50b9489c1a22b266 +size 82489 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..da22242427ee4d6c26d08dbac653048b423cfd50 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71c077e93530edacfde4757021a076cd10a19fed69acff91b509e1d28170397 +size 461824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a83b93d39ad02a00af95abc93dc39b0c45c2878c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3aaef0995cabf6658420+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e62063b27c5ad62e19fdc90ad60db49316407efb7d7c5db1b20458df13f0eac +size 469663 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ef776a3ae5fbed24538c890545ce1e3577e1828f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6483360d51a8a440933536a3b2de8eb29088522cc6cea6ee66e10ba7fb84eab5 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2cbbb682783522cb65fd5fea9a6670723707c596 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_3dbae589d2769cd2fd66+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..44e2b3dbeb4446bbe262d6061a846c969ee78d0b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5e578af1e7a1de600b45032936e48542b291fdc730887158100826d41680c93 +size 45872 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f1a15282632c7f43ee46075877f863ecc6c1b7b5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae229cf9481028f6d5ad93f458bca08764b5b2f4211672820136cdf785dc308 +size 390144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..83de5e2231e194d082749b744ffe82401fbf1c3f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_3f0b0e6212dc79723cc3+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a1eaede11cf1d7f4c0d27fe54b25cdbc06455fa4a5ac85d414bd134e4c596e +size 397874 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7303d18f46979d2ed04959b739a487b29b127844 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2f25bd3c3845ff49cf885db664160bf05fd476ea4200b73baebd56f479a9fb +size 90431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3d6926a7563b56fbd719501e5530fadf5ace1c78 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_40f25e3af47a87d55eaf+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97453c1d5b9445d40a9b0bae6a69b41962f3eab552123ba9ac8c6414d83b3d0f +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..704d27e04015af11aeefc3f05d4e9ff7b01033a6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b58f20429010c51cfffea333665c28bbc50bcfb4cfbfea524337e48abad5a5c +size 447519 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9e6a181f493b170876ca858eef056cdbb03ae97f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_42eff25e6747e007380a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20f29677c579e42c38a5e0f6c3265139441c8066815c3fdfead7147cf656617d +size 25181184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..587be96c040f716a1dc8765262f417bbe2b03b16 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f30c313c013d0c1bd9bcf0e0a13feebc2af2803ad3f3337b94d7457d7ee4bb0 +size 1914485 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c5b0c95ee4bb307f124f40214201b01918453025 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aec6522cde94b94130728fd1f7121ae2c635703e4d9e57561d6ca0fd7496b4a +size 10333184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9b1bcc6783256ee9f744ef9b86b1aa0dee521a17 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_43df25d3bc7b10bffcdc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca560e921973586bddfbdbdbc5d097e1efd8bc81cc29bdda3650c64e7c35c9f6 +size 10676882 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65e576ffb5bb5749c0c87c3df58021ac1291a78a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f34d084878c04c6615bcf75cc4c59a958888f10e8aef5449f60733509befd2f8 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9a7cefe09a6cc882ee87ae546e1c9ee07089c7fd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_45a0e374789b934cc601+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611dc8ad87618de23b41c21879d25be94cc12f484b4c4240022d63cd91763c79 +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5962bed772570ce908257368e65dc535f00df158 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3ce75858a025549f5c1a8c5b259f833f7a70298977d8bcb31b69fbd7baa354 +size 482022 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..13945bd874cbbdf8aa6f3ce6fe3067ee67e518b2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_46db6cb5e1cdb009ed38+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275604dda3dedb594131871606492a90bf02c0cd403cd9a4191db0cce836e60d +size 6421504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5065c2d6f20b55b53d484536c77c6f45fa26e9ed --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e66ec249ad16c992cbf5a9fe134b429f5ffb31015ad95d48b06163013e87f288 +size 80815 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ae2e95fbfbb4ec5efb3a5158e02a164f026d716 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_47b49c104312abacbb79+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a57fa508135d4d59208e9c9ee82cbb303908e76ad18de2fa2b03fffc37dfc0f2 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3906e441667f3f48d84d2da23e19fe687a66bc79 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da33932abafecde481f769f8c365b4389cf32ef68dd270cfeda7fa2ebe556543 +size 331676 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..237d3b8a005b909fc11e4d2b03649e901cf70b7a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_48c6444adcac1b53fda6+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5255d986fb2240fb123fe23f9b70b6c0119d53197b40256a564eb5114fbf12c +size 2571264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..477830e5869c6dc8f89b1c7ed318cc2a3d6aa809 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1126587d06c2dc5e88990726c9b5b49818ae491f81449e4ec1713e53b7cfee45 +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..53fb78b5efc55b3048c2a8bb3906bcc70b96a691 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_493e78641761e8b5a857+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bdd2e2a0064c634263785913da590c5b76d79a3e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd99ced4ce85b798eb3906f3c537fa8aa0db903aa246441fe303bab63840ed0 +size 69058 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1db20fcb4ad4694e71d573fbbc87c7a50b8fdbc4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4b14ab6f395240df8643+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e15e1c999a6c34eddeae1d2ddff471bd8274c161b0cdf9f10a2397d0a428ff6 +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..58797ad4dbee1adbfe0492d1b2eeebd2a2229b15 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:499b47176f6148cec2112f5f61595728e1e9a63b42efba4209f86d4632f5ba48 +size 2268363 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..36405006d1b6bd0d934a2f74a165230bf409279e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4c28a8da143133817625+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73707863f3e65ce1080a8d8687761f9085a02e472b380471314e782c02184a9b +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..eb03f69b1f55081f7ce2e8a95edb2babafa8bf65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d1ae4fa1f8df057937b891eedca83f96c80145297bebc61014821f220bf154 +size 851450 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01f9162f5136cee8eae9da34c783de5d54735231 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4d11f774d981aa558da1+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02fbe4c78f0035709a032f2add6db202d9fc720419b70c9e261fd7c9871d2e8e +size 18545664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8b8c259dc59696cc7e2b534f8bb8619a53d9cb59 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4416339d0e2e57324d94a492d9e3b4b2fea4c87d7e20b2ed96ff53b359aa427a +size 45682 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..88ff6130b565af752a4a79896bd4f7ce1e6e98f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3c9104da3cf8214de132b364b60322b60bd67b9e4ef346b7885b81c96ec542 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..dacdcdc12219618df51d5e1c0a49fe7e8a0116eb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_4e95c3db255164ea7028+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92e167b02ad686e26c9f6c458e673babe22cc1b2ee10b71a0988f3e53987385c +size 162290 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ca18f79643c17be12440885b24ca13a83a84813e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d0ffae83cd5de1473d83f2e7cc99778b46d17b5294fd6771cb15b924f63e789 +size 47194 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4c260b91f5f5bf51b7516697e869ac35745ce3bb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_5368928916623911b1f2+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a6f31d06a967e0605add0a59526083df8e89d0eaed1f0652c7a5a23d2ce5f5 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..97d71dc73963f060a36dda85859e01f1ac5fa646 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06bb058e8d8c8149275a9adba004933b467e65aa6f7cf60864e61cf4e111fcc8 +size 58120 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..421580a151c5e906d60095d31df0ab47dfa803be --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cad33c7d01fc95e9c4afee863df51b6aff289c1be150552fc3e58aafb0c716a +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2bbed7bce167c32b79d06ae5c765a53833d8b8b6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_578d0c177e156b5acc20+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200387ad9a20e142a4a94b7d4417e2dee103fc729d8e279d9904b3b5de037d01 +size 275039 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f1e8255eec4367c12ada88787f2c3d0af4928cb2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fbf03bdca8871117888a18257ec507a634b0f975a450294af733fa147b17504 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4d534b6db871dc9f98c59c55dc91e750501575a1 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_57cf1c9c1db90790e8c9+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c8a3c488084489d00f1c5a626dec2a0b940bcb10 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62ac07377f0886a6aa4645110793df0deb2aca8574845df4c62f8a214feeb70e +size 777289 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2169e3a09d31d1c6418c3c8b8939bb8a569c868b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c7b0724fcb3a5edb36045d45a11660002fcadfc0f6f7f9a2fcb20d2106de76 +size 5889024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..ae4c8e3b14bc6aa816148ce6d246356d1b3d144f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_58bd8ddaffbc027f1a60+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c0267119b61367457d7c416935d5af7eeb41c287d46b1209c54a3c0096f869 +size 6026968 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d8884eebf0b93781b9340c0bc6f1806029ad85ea --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d057a696da9d4013284fdb8114b3fa6802eb8f8713e34a0d83d61b0e385f004e +size 771744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bb47392270e83d1b59e2d0d5fd384dfaae8c7b94 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3131ae16afa64bd7d04ab96a60864cdd61bcd823768b35b8419a090878281d1c +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9dc3912fdc032211e0816962673b9615f6649f29 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6295dbc32df51248e89e+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96803eada0e7ed5d9324a999ac82172d6936e04feb34af34bbf1383e9de1e293 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..294b3d8acd2a7f62dea63bf67b0bad796cf6a9a2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05dfb434ac3afe697289a9630e20cec8959061e6d0244d2ca7dd42c498ce80e +size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2e3cf8886bbfc1f7af32842437304909b23fe417 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_64e6e60637f6811c5a60+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a17ff48da5d0a6299577ec66a90b74bba08ebf9a679661ab0f5438620793ef5 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7bf847b4483321074e08b33c5422e870dc72c704 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a15276c21dcfefdade76a1d794ee9b8812eb5e26d6ec4bde2c362d602b32564c +size 88456 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5c2729fc54b9247065b3a81b10be7d23da27e56f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_65f7cd12ae54aaef2e5e+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d6da52c6a0a4e3a76be67b9610ab504221e4ff9b20055f86874b2ca6d581049 +size 1178624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a2079d54d518f8ce03954361efbed553a7fe2c35 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d36631128d9369ecd150de3b27d12c13da0e66a956103c213cb0ae6dcae73b2 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..841cd55640845960e3e1aab1dc81f56358d1204c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_660c70dc749c87df8b92+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e152ecd5db53775955f76490b5cce40680b885e39893b97cd562b5661bd0874a +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d675c2f0549a9a33c7e404876fa969723f5b2397 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e53fe4370d8cb8a07547ef0d795003bbf38c154721c556185a87123a090b75 +size 774385 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8c584fc14f0ffe260740ba7dc557fda411ff65fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab02e76b4b49fedefcd3b659e05b8201d8304fc0bc8a7ce1b8abd2b3d1599677 +size 2520064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a789d29ffcc0b195cc2be4a697a05a5e2a2455f2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6932a9bbd858d5b69426+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04221b0238147d09e98e081afb2921832cb737fa7a6e7e0164e5ae782a8ff8df +size 2657881 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f7a29c9139df8121826dfde166febc14ecdf9212 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654d36731a09d98daaa27f743413733cd6a9604dce6f457e2b7081b35cfcb6a1 +size 412354 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f3044734d66de6f014a0412ca23ae057dd145963 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef63ad612d586cb399ba652d43f4cd865197ffcb4dcbb8a4ecf684b0bceb8249 +size 1649664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e935acbc127f1c0c31cb9581d7234615e30629af --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_6ce512870ea1744a36e5+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2abfb34375b67d35a79d5ee74ce44a4e53b76c8107105d651a4df47b71542d +size 1736749 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5c8523c09490110a7a7acf36b59e374d3ec72163 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af2512847fc86a3f6dea3d6fe8bf73ebef7fe77c7d887f66401fd43b31e24441 +size 82299 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d083a2e357f22f7673c25c8e01d77913ad7c40c1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48eeb80f42a8107b570efe66a70149b11a5ecb522045fcc92ebca3bda66c43b9 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a647f4202d47a26c422d9d0dd7451bda07075ff4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_71e1efcb3071f7cb00b6+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e61fdfca0c1f8eccc9945c8adc0ec7530d73cc6f9995884bbab1cc6d631593ac +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..551e914156114d254b119588aa04c05f635fa528 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b24ddd83bc533bb0dc6fa01f666b270f6b56c456add04c37bc1e13d125e841ea +size 417838 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b7abfca8f6836379676e9c882b87e6e865206888 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ca8a3d3686d76ee3c29ae6cf65f74f5696181b9989f5e1ab91d989132e40f1 +size 4312064 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..59d56d0398f06d4ef9ea54a1a1428854d51d83b7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_72a72f9ba9aecebc0ec7+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488edd1540dcb498bdb7a9b1d0a3073fd2ef38106f02011bce80d93e89f6869e +size 4381935 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..4d49a653b9f5cdf628ffe2e64d263db031387df6 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25c8cb2b38101e7f69c2485fafcdab296c5befbbf1920020db0cdacc9283a0a2 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e5e5c873b2bd75f2ca4e235e10fe58c3cbcf7455 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_74829cf6741d23dce056+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..974349d60930a207cac52bd2f422f188bf0cd00b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f9610dec7ae63287ed558149e7f54cb819cf7c7ce9876aedc1b0931e3e76ea4 +size 80284 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b881d03cecead8525b5968da4cf55315b86ece27 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b620936c28987328afddcf4640fe4578dda072cb8e1f8b3179f47ade9a923609 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..acdf9ec49499d24ba04c3e59fba8e3c951c5b6a9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_75f61142a89fa888d71d+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038fd23bda7aa7939f823eb0446c52833eefc4dedefdf94d22301cf30c4f7070 +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..2f71afec14c66232afa9acc016373deeb50bb217 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:580bf01a47f9cc1abe14e28d6b87924f1be5f6e3761a0139fc65cb48ad7404b5 +size 69051 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ebb953fc11ae45997f205d00efd9e9f61364e76e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7679b52d5f03b0a3e52a+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14988dfdc69fd92e9d50ee823bd367f24f957b2ab6065b156d83992c506d792d +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..191cbfe3521a6c7b9959c540d58dc085f5afced0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62edf6a6007eb4951bc240d2fe5bab835f0e038bfc124805770cea4efb3189d4 +size 851450 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..68672572ba8bb66d1fb10502e9ef559e59c6426b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_76aeedf1aa921bf04675+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab011d402d7098bf212b1efec89fa95865f372d7c51e6425aaac727280c7dd69 +size 9155584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7eedc45142d6eb0847737001c9e13e57f2a8f442 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:361c93cf23b6c861204f434b6f4b680add2a341dcb521642f95ba99ad51408fe +size 48045 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b880a5479f9900d38ab7b86043b7c904bae68545 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54cfcb9cd9b7550a597e2281ab355a7dd7da3e349151b415422ba5c9fd3e7cd +size 2356224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..670b72cefece981dc55b5e1878f6a2658bf5e376 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_788eb0c6a9b0ca759eca+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c120b74390719a05ce1d398d4507db96cc267dbd826310f08de08324b19611a5 +size 2364014 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..187c29ba92de3a8fa4eb937c06b2883ee2b08a93 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc47b01f0744d6835634949f81c147ac5aa154ba4752356d7edfe49a76647130 +size 232973 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0f47fd619be380d689c74c5d846b5702ab9390a4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a4e910c3dbc7ccf8eb3+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80dd4b636c1c07e1433626d50e1c4c27d1c17ee492c2f785c4736cb7250c630 +size 29072384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8fffc794732adcc826a6c7548e255e1417908e1d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d5e1e45f49b3c84ef59b31e960b488bbe5aeefcc8ebce945aaea361e9680c5 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..086b0c3e90d7e6fe9b9f29f2b0620bb6ab6dfd1c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7a8db19a5139d05e555a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9364f278d2c3346f1eb496a4e7aaccea22d2c8e6ccebb31a21ed45338ce85364 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0aa9bd6a1f2082abcd6501fb56c852fee896b855 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3624a583087c4b1c7d35dba22851f8fb5429263b219e13ce2179d8b11645f2d8 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..7b6761c18373a6d9b5dc37beefa322ef0e3bede2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_7b84dc32834c9f9d06bc+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597093128bdbe3b3e5e50005930ff0cb93f133a5369e88bb02e03c18f253620f +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e495aa83ee058fdcd936d45bd17cce0673306b5b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c325ed92a03a92f9ed903edf1f90308f0d42c263b599462a74ab76b8eb30995e +size 46073 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..03267f8ca9de48553789516f4cbee2dc0a8eff05 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7953347c7b789e29f12296126d63618feedb9936e098aaa92e88ec53908c267 +size 175104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9e7d8725b1e26e10c920c36418a26b9ba599191f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_80077fb6927043106c33+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2665c6252208494148151248fcf5a9dcb49a8b73777664b487b3c0c787efdf +size 182770 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bf0f0d923373d6b31d17a56858c9207b8fbbd011 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d29567b9d379031562b538d6faf0fbdfe4a0b11782f3bf2e1f3274e541b43bcc +size 458016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..28d09995d94bec795c88c8c93f3194a94ca91c75 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_837e23366798c440ccdc+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9c96505aa6bac3f4e2777240679f3ffdb01db4136207b144e8c29c5b4ae32d +size 6431744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d7edf89fbbf263f55698baae67012e9bae86329a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e60ea05887da185382584017c8ab3cb1546ae53cf6e60d0c9397754b7ff724b +size 7113 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..118fee5ccaedc272bc032cd77e13e023c1efac9c Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_8843c20ed56d595d2236+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..57bfbf56a6575adde228f1e1b7089ee13caf2dd3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1899753a61026cc1113a9fd8df0944bb76d26fd38bdcae6a6ccceab7e6ae8e20 +size 376914 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..677f8d909dd5751f548f03f559d98d599a25da77 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbba648c2f71cd2802e87bae0d44a2c5fbcdaee7afb6740036b29b61981e5d9f +size 2151424 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1007b553e7ad60926cfdbf2e28d824cc1f242b52 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8a1b493c9c28be330b53+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bf4145ee5b450e3b15e73416d4f5ca8d109e2daa00bbbc3f8214cae665aea6 +size 2221053 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c1e717a3896a8f060db2484dae21f9a9858e9500 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d2d7e8969a21ea901315fdfd94653831ae04056dc4587bdb68cddbcd8c5386 +size 429280 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e60323974b68a6cd0984c8220268c56026604bff --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8be07b7e6265e9091606+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2980cf4459a042fd7c9ea455bca8c108d0b17c94059bbbfcf1980708e433b0c +size 6431744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..818cfe200b33b363185427f171177827d0934540 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..aba9225a0a83f76097df0d502ad837d8af71e22f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fad8c0722b47a516e23cc7f9c16463e66f908abae39ce7ea2d1e98932ee04110 +size 410718 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ea2b70547ea65aa18aa9e3f376ba18a8d3e3b72a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_8f8f2bb409247485b54f+841d78e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8194fed710fc1236fc22df831a1f8e5c3fef7e747f69b093a4751902741aed28 +size 2683904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6d632c9de2d3670ea609ddec170e0b4613fe185d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f68e6fc2c2247ce7381ea1fc3f522cc57be90a48dc4f55878b41b4159a45c26 +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8a3f2eea28f3842761673eadf6b78273f0d54dea --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6603874735ad499ac9feb7d4f970810cb351e29e36f379192acc35e1a9d1f2f5 +size 4619264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7d0d1397d389a9fa3be37449fdd2cdd276217711 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_921a42cb1de02e9c4230+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2ce5be81baebd204abe8a836723e6baab4b821e50581c118eb58a10542a8b9 +size 4757208 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..324ba9969baf5a425eeb63b029393a709a6aad13 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c04b52a9d7b1c44571496b65eab600bb1b385a3cbc7f8e092e32fad992e307 +size 410647 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0cf4bb7c9108f2548d1d4c116a74c04efbf5a813 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_953b5d662c5580b4b579+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22af2cfd936fc83ffbb931e7e19b7e96723c70ab550b4ea5385d9f61586e279 +size 25140224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a24c309fb72d4a6c3fc2b48f2e4b330f98945077 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0f2b2fb89aada76f426182e61cc4b3fc5274ecde4665f7745a7b2742418492 +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..82415664f1a7a2426757c8fb8679f74576f5a34c Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_96404ade088398b2f3d5+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8961d64d75806b635263866a7abc0e99b41beef7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5868766c097b546ed0a0fc35364ebea619aa954f1ee08d075c9cf4890b2bdbe +size 90431 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..823381e27df7ff618ab073996d335bf343fc424c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_966cc4bb99f8d4385df3+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7e78937ef2e29cf6d90ad789c71bcf9959bdf78f0251405262a4ec55880ec64 +size 213853184 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dcd1f5ce5fa97a4657f72387c6d00069ee60488f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33d1a0a75f98a7c028617eddb01556fa74f5a8fc02d45fea6183d4ec9040a2c9 +size 69051 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..295096b21d1d9afc12750f5b2d45574e27f686d3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9781cf9209feafdf03db+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39d6cdd765b0a5f8070ec281783972c5132e8aeb5dbf37ff5d70e9719afb9511 +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..f89477c294b7c20142b91f810f45863616c6b69b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5d556198cc20c7c9fe86ff05e1030327c8ae09fc80443c737c0a31dda6fe7a +size 773836 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..486ad09e24ed7967b02699520e9d0b3db107f012 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cfe8598097595eb0bfe746aa5e2f66501c19ad77f6508532862920fc3f278e9 +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..d61617cb220ce1c88e9ff0badf75f67de0d5ab23 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_97b8b043b93dd99b56d6+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10f54d19fa2d213cd731aeabed2625efb780e364c6bda22711fe377ccc62144 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bcd3dae1eb9dd48ef04542ffb80368f16b6ce403 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3559f6e56481bd849489099945950b4cd0e1ea5a63c95778aa69f414e58ca20 +size 55866 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fb93bba511f8aa4062be2e11e20cd494190c7d03 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_99ab9865d3a6c8d97b48+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d2fc7d49ee934c70e8062c621a4f715094507dbb7f7c8da8a9d3477c45b32ec +size 267264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ecfe1c7b9469c3f7f52ff88ff10bad52236713fe --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42df04557626b26ae2cdbabf2b5d7b2bafad04998bb8ffc9f5fa2569acec2ad0 +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e3406aeb93979f49907dc7707daec0b6922d62f3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f131d3d3241dcc0378d9d02fcfb3a380669f1a6d9c4403f1d15bdae6d84cc9 +size 6994944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..8a79b59913e872aecd75777933f4020c0dfa55c3 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9c325f953af689de1d2a+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f249ad14b6fc365498028665a1bd1d50766b17f8859061df2b61875424a829e0 +size 7132888 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..565301a112c43c776865041527cb372dcda49ea4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dceeb7dd49382409a9411da04b550b2b7703c090618d8778959ff6624060325a +size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4897ff3d473797b1583a2125e1ae47aa24224717 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9d48665852815568ded9+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2703a655a98e6c0b80f77c69e9c7f9067c21931cd629b7b70e27dac8270ebf3f +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..7399c3270e3b74a5fd8037a0c2f5bceb4a3bf460 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4226c65b197e328c540f0b8c4e9cd586c349215b6bdf06aaa804c0307784a12e +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..1884c7f52468112220f55709ead16b35edb97dce Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_9ee067d21fa88c1bf1ff+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6e4f50e0ae65fead7e52689d35a5e560c094df66 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45ce241db88d08322e1ee7382ad5db6041682f2d05ef11afdce89cc2ee9d457 +size 454062 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca9e74c694ab1d85320c2b906a4c03a509194e23 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a07d9411638f7ae0af71+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47a9d5ed92abd42fa34af276c282fe5c489a18a2ec332556baf9a3bff0081861 +size 31734784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9e6759a6665238a9df3e444decead547074ff8f9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c2ea4cc7a8e6c80169d0e98c6b723f5850a5b21b0ebfe4b239f171d610487e +size 80284 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b03eb4fe35b7e8334873f82cf26d4496c706a0fa --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e7ee718e24df70b9e6ac412d43a459121ef084dda98a82934adf3f5aa681130 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..76b5ec2312bd6638371afd8e7a4d39c2f44b05ee --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a21bcdc4a770063da8b6+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af63caec1c4d6a3e460d0d31056cddcc76ac1b45d4f985ab56fea5b95e06080 +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..1562dee19782fa6cc616e563f7eabce02015cbfc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8069b733c164fe34861244cc8c0b5d32ad6ef73e4f005634a7830c82912eb0a +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f931a678c9db21198fd32a33ae7c0d0400131469 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c04a2f9cff6d7bf88be21b604d8ef0a264722ac417dbd8562fd022791f7369c1 +size 4619264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..dce7644e9672372dcad74e6d58d81fd59d603e68 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a238d60826915510327b+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f63ee8ae5b3e31007aa28546e3dc20260db96ac926479a04538859b0f5dd8a0 +size 4757208 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8b23fab3d1a654fc977223e4793d4053587f5023 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8706ff9ebc6cbda4cd0bfaeb9badd4c4c2551ddb0e01ded252607c40fa6bf241 +size 429764 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a5dd8fecb73c67bb2be3707cb23d8dd93d689c6b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a458e55a7b863cf8358a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d84624a9deab3d606e7cf166c891110470889dd995128188df6b46d00ac872 +size 24515584 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..06accc9c5ff798f4530f932058f3721e4e3e8f76 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe649ea431b500751c457fd7ab93001853239f58cdb76eb3987c9afd8748827a +size 394018 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..141b1571ae80580c409e02ce44d03697063e17f4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfd11d6ccf0169cb1e1f16b822821d73b27ad954f53fc65f12817f716e6127c7 +size 4332544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..0360456b49c946bf475c132918629171b96b88e2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_a733d51bb4150137ce7b+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36600189fa6b5c8bb488dc96e38237c3aa521b40c7dd71a518975ce0ececb244 +size 4402415 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..00c84661d2295f2d35d07074551254e567b841d1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d1e88cc8264cd8e6174e2301fa2ee0ea8af7faaefc7970bdd517af22d7639d +size 87671 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..095c97de7f54b14299daa4fa15699dcc4ff31a92 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d4c76575a89c7f719ba050d61906543ea5b994b42383a890c674b9fe0d0aeda +size 2325504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c51825e00749d378b5f922ffae6166e80c3acf5e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ac566d62cdaa098c3285+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb72ed753add5d51f166812992c3825f2ed84bf5b6cfa60fe0bbe9eec97a0de3 +size 2333403 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0cc3528823b528b0feed4e4a9bfabfcc83ae382 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac51d0cd862876974b388b745079679da8be654ded686b0c58fa58ad0c0af17 +size 2084464 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9d62a82e18cba5191ca4b28ea9d6a2b4b61ddc81 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ada321cbb84058500c30+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2fefc2ba188f96b31023f7e5feb1e3a6a561902207db53e500075fcd886c10 +size 3349504 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3a073e9aabb9fd85d0ee3bbe84f0943e25c23439 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:158ddd56dae2e72f5249dd5769bba7cb5a55a8d3334dd6849f11507f19d69c42 +size 771744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..20dacd28127c440d9138c4d718d0651428b00c6a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cd3ca2fcc093163dbaf658f2917aa9781a65a15b7714b13ba4ad4cabc69bf82 +size 2171904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..43fc1405d42a0c4491677ab737d567095186fd86 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b04044f28490125b67f0+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac184ef110c27b44f08e34084c5c7944bd17b25bb95474b80e3a111575cc42c8 +size 2310233 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..26d3e1a5214c6d94617656132c37cd098a74cada --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044eb79fac79e9ca16d46f9796ea4e1c0316e56bb8ae6a09b0f857020a54dd90 +size 80474 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5bddc3ec2ba51eef13818de7a36405a3c5037316 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cbe6890a2c9ee83ebf176997878d3d2f1beabe1edf6a38a9e1c8a90125b7f2 +size 461824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4089e756cd80166c33fbe7dbc443740a1fc1275d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b425d3d8062a3c8907bd+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea0c70ac6bfb5a52631a6fcf61c4d2e9b5a0aed55e0367447df9d7bac568db74 +size 469663 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0399c7b4a765eb888224766628cfb3b32c36d528 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e5991e31a754de89899879b4185ef1a22a989d63bbef123e1fb97946023894 +size 416420 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..914626f9df1bdceb6951b6ae289767146403f1b9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b504ee956784d822c227+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8df18746c8169cea1b8c0090d943ceb5be0130737853fa14652474546670cfc0 +size 31642624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a52ad1255674572dabec0179d092e712a1575830 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:138ea54b503db177b6308ad5d590bd33d12244592e1aa218001eaa9d4954363f +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..42d8f0c498cddda38b0624e4b52a56b9c6ca8a65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b605ce9935543a7c4000+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0830700171e0de8043e5528fbdf604686e91111a9b21fef8b125b13904bccc3f +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fb1b28e79b90573deac45c300a6121723d8d3e7c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4df4ddcde0818d80562fddaaec000c6df317e66662ae25e172b1365c890cbd5 +size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..07ee65f87ef68bd120b44a358ca11274f72de6fd --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6551578a2d26cf0e433+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73efc545834d724084154bc14cc48f9ffced7673348a2f87cfafdd3c6ae08a73 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0590bae928b0caf241b1cadc616f92b69133e599 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c31ca5c42669af732c52e7635faec4f9600d0cf781e381c4a2a7f791c10b3d4 +size 445394 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..76fbe9ef1dad7084d0b9dac9bb0ead1c6d921927 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b6cc4e3b2052c9ae8504+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b31e34c8894efaa104e01140f1968514edb44ca9a21a3bccafa8e11c5ce5b78d +size 32072704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..5f3a91f015e8533b9b611f8f73d644ced241bd6d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07822f0ef5f77dcdf7bc1a73632a04b81876feb6485190c3d908a84688569da +size 69058 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..90beab8be1342bb0e686edd522bd78a534e0ff60 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b86697f072e2088c7c65+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48eec55201680072dad719edb1b789358b8028a8e3ad1b13998987aad9385ec5 +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..60caa389725f3928a756eb5a2ed7db001291bafc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd29b9fcad2a945198bae1058538f54be003576d1675a8d9da0832151db4194e +size 378951 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..39d62eb70f79ed172709c72c3f486ac6e1a2ef41 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca0984fe4b17c72150b3f1bb21b08b7e480ba250f6e1084471aa1f5f7a7e5084 +size 4404224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..94b8ac0fd4e5d2148102fe77330c87012b69f107 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b94c33a1002b2de0e6f0+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc5f7c13ca6d344a2adce9d3a54e07eca1e4972c01f35ae91fc1d2f12b6d0380 +size 4473980 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..82f40b7992684a38ecf52a2bd012e4af3f177142 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28bbd441aaf14edb23f4623a1c9b5f6bdc6509852df1544dde2dbe00decaae6e +size 412964 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3198104c978bf8c138af2f529f31e2ff94fe06c9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc0b66c9782944b7a36e7feb7fcd2df82daab6e7a3965afeddb13c614075385 +size 2212864 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..943c582afbf44efcd8aa404d9d0085f0266b461e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_b9b3451de340b8eee93e+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607f2fe98ed224b90067d330ce364e6b6f2a8247cf016cee7b72344c9784aa6d +size 2282608 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..27ced62a8e6fb20fd2f427f997ac582857e43330 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fc25da67cce0cd0213ee3dcc3c868aa3e36fdb4d63183e28b0f7a612d50aea1 +size 80244 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..01e8e81ae66feb277fa1e7947814109d370a663e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_bce95ac1b2d4df9f5b48+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:114adae7470f048fa548c77a0570ddef2d92f6e2ce59e8a4a23a7b00cdae2676 +size 226304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..22ee441afcfb5d7500e2955ad50e823878b0e474 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c28329f6cf4c41002edca947bc9fe238dcf678fa9015e197c84adfd624aefd +size 42844 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..47aff8bbdb096e649586f6ec0d42117bc8954452 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c0b1f40a34b7b2f5e74f+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7713e8e6e6447588def54781bc6347697462ad221892df9ec388a5f800c315 +size 154624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d7530ab5c36cafc0f065752c8cd2b45c725b63bb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029bfcb6a4d3856149b1d65d793f4d8506d68b35c298e6b6f6bfa46bf1bffbf3 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..97bf55b170cc73fb6d7feeea95d4065f04c68d32 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c5e4d228f8144deb6b22+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe4a4a14377a5de8e4c361308770ed05e91a7052ef6dbba8c5403e28c50a9ab +size 32646144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b516371ae4048d87dc14f5ee550f9bea1bb272b7 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7564a41544ec09a90ccdc48a0dbc196b9cd1851314e7a705f0df7e73cc89751a +size 69051 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d10d137970c9802ea8dbd8da45f5675c99b2fe5c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c64dbf51d7751bc4d5a5+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34f983cfd6aa502adcc59f1237194151bf21f7cf3bc7e1bf2b739af73723e63f +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..758f62837d11953b5d6b67769a2e5cb2c835057c --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b706b02fcf5d239ef2b5374d3aac8a84187b8463757f5c6bc13251fd24dacab +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..573c8bec4c67b05a683cc6e8cd42f198d21f5b85 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_c671356c64ab8a366139+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..11e4b0533b82aa7991f0d6429dfadcde71c3145b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e6e65cd1f7c168ba29ff3028d8daaa7722ef33e073fb68b1507831367992eb +size 50060 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ba3722c5881d9753e570c69ca36e9b6fe2c3fe7f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5195d6a0f4fced9a4bc8cc05d2f3ed94e74d1c88c551aab473551a8d63cd20c +size 2356224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9889d7fb2a11925acab5e3b5dddf4a661b51942d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c897718453cb0b8597c2+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a875e627978b9df23c1df56daef5a216a10baa81087514c494ee1e28f067c0fd +size 2364014 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b84a46840ae17f1b9b19296d48cc9ad8b9d243a8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa5cb11090c0db156b73c67514fecc774180253d4980064595632b1d0e7eb7a +size 7099 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5de76fe23744e18849755ddce914b3366b98c5eb Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f13ea78873a40e09cc+431f5505/model.neff differ diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..985c6ace7b8b76db71706c8216d3b49caba9a675 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed317151e832e8e2bbb5f5309d59f834a6136aafe42cf5fe9db157dd7bf3730 +size 136713 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..79bd6b25bd183ff4763eb173382786d077de8129 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c8f8349f83c7717252fb+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75168a4f77d5eb3903f75546aeb4a93637f327fd6cfeae1c8ce8982cbbde91c4 +size 2223104 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8f40e21e55a5a6e0687300e5d0557b16f95200e5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a3e30261fc8244f94a5bc47816e67cd24431171c16aeba568df720c3915674e +size 381006 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3ed8b630fa3795ff4828e85957af569b31504b46 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa47a004d3cc3d071b3f6c90d0279ecac27d4ff9ba7fb40ccdb146bc5cab1b10 +size 4404224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7682cb1521e3c5814bf46ba0b5cf525f4f4dd921 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_c9975b78b6ea5810a58d+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d5b4cbc0f305baa440936428d6b3b8536a0b4fd9cf79456e6095354f531077 +size 4473980 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..49f25c40c5c6d7983ec2898be992a32f924ccfeb --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657edce4bf1718bd0c3ad44e089b50f2d0d8e6a23640e13d968c274692d734dd +size 416660 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a54c38daf5bda2c766f562a13ee487ddcb3efcf0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ca54687af788507da1a6+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033cdf3fe19b017887cd57c270efbed24be0ae3a2a13c21d0ed4310c35ebcb0a +size 6288384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..573db90ee70df1e93fdec9265a1a4a017dfc6166 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ea01efd364ff4226a41ae2f3ae4815733a507c3426563823b419a61913f2c8 +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..abfbcb32837c39b340590ad8ec63956b433ebb96 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c692f31bf47de7add767573b1bf534507a9afe09dd6df129cff00eabd97463a1 +size 2857984 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..2ed236aa80cc8fee7106cc52de5f1d496028b54f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caafb976ef8676af2f7a+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf435b9878a087e4f47b7c0be24afb52cd6d1d242e65c6726f28b7e94f406c2d +size 2995928 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..892fab43157a43dc2f6c0bc74da09f06fb265ea9 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/compile_flags.json @@ -0,0 +1 @@ +"--enable-saturate-infinity --enable-mixed-precision-accumulation --model-type transformer -O1 --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2' --internal-enable-dge-levels vector_dynamic_offsets --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..caa06aa827793434a0f68a44c6c0bac365843693 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_caf5f11d6a1c2af1a2d1+bfc62e4c/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707bb33d702d0f80d0e09777ca3bd20f98aaab37d64b82380b9a96ef85865985 +size 1227398 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6f6df0c403dcca52848a5e3acdfb2c189a8bb133 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0c9b97612f7bf7cff942c0feb4c43a02a1d453152afe729985d2f86548c40c +size 332710 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..77d6fab8fc714797615b4b08a51ffb6a98f0412e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb7223927d0638d20f81+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde578b178ec4415bdc6c07da2e05ee48e7623dfa22973a71f01b3994b345f5e +size 4557824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..818cfe200b33b363185427f171177827d0934540 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/speculation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..fd4d3fa3b75f864cbffec595245760533d4a59ac --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9c952da62c99e51430860b82801af6f15b0146073435ec0e9f7339632b764ab +size 414499 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..bd48205496afb1890e8835451966fb7858801137 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cb789335a6208a17763d+841d78e1/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d852ec80fafaf631db0377e687f2bb758a34d97f451f22894ae4411105830fe0 +size 4301824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b881cb71abc210fdf66e628d7a054c2607bc2e7d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2282c60b9e7a88a9f3739c0796ba6a76f3dbc7c34d4cbaac49f988d13b890cca +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..df15e49aeaec8fd825fe857539f56a45e14f664b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cbcf0a031370638cd6d3+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00037336a9a76178b7c52a2b646c487b123c4e82dc9360dfcf1ce69cd5f15c5 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..44d85a0c129de58eca961464230623f32fb055d4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c898f1bab83b894dd95995a7b66fc146d70fc69f167264e20c6c88f4d10e9dcc +size 1914485 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..efe2aa91c21a7b8532fe39d9a4bc03b76c7ece14 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429f7d4190e60c6ed8145b27795b1231af0748573b6edfec61e23cf348ff4443 +size 11787264 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..da4d54b695cad4e818b35b0b405848e3e4eb8e70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ce3f6b0c3ecffef892cf+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c96b0f6664c50c8b6a6e93ad18dcdce2a3238a054770993ee2ab5436d07c3cd +size 12130962 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0d573251d32ec16ff35d120043ff40c807e5225e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82617b73b42642502743459932ea40e4eed2cf2ea7e758bc62f1aef72ee5c97 +size 82299 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d82e3fc8f0f5982402b9376acdb447c6760442f4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b82744b2ed277051ad8130521d3371efe39cc491bbd7adf43d8a26d6b6d8265 +size 236544 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5301cc43fc955a7806ab6d0fd8b0bc378692e929 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_cf2d32e118f389e77f17+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcaccebd91e984f1c1cb795e8c454f6d9d9b03264e78538f99fc915a62b0a02c +size 244319 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..93ad1fcba9860c328794a9eb7dd1f32c2324a9c0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76802368657daf4393d770230eaaf9e6df3e9009dbded0eae7a25d131e166d3b +size 378951 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..104d4b860a5f252db6f544ce779027b2bbc01b0d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c17249efabf016c793be7bc8e979f004f0fa0b99a522eab0ce68adcae620ee +size 4404224 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..e3cf45588f4d86dceaafb46d43801d30151ef2f5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d0afd3579a8e6e3e459d+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a0354a2157f1168be5c558397a52203245b6268d574b03d973ad6bdd69ef8c7 +size 4473980 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..758c8b0b14043cf84245cffdc6c93317cf0b86be --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3751ca70154e5ead11b898ec288ee32be94ea5e6d2d1f183e03f97fbcab9de64 +size 777289 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..32b288e1254394c57d8044d65fadc70e5b86eaf1 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b483287f14e8c1be8f78f4bcc07347f1af2b92cbe9f19869d5fba8e10f9b4e2 +size 5889024 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..222cdb2f50f257ca9256902216657fc2ad782ad0 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d219dd8edc7eab1941bc+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803e520e36a5330b8f1dacd14497c050984908e6c7b5b272ad6ccc96cdf97131 +size 6026968 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ceada16784908c90f2737084cd29fb240d785fce --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b7ce902472a6afa072bf875ed4ad3d98d0532276139ac15b41d6c6393881a5 +size 1908818 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8ea3c30765757f868b090a472830180075a8ce10 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd922818927a72335a8525543ec4e32ba21e11aea3f4185f5b2c500626f6b740 +size 7138304 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b805b3785c3fb4498857be3296a52316fa7398ae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_d57d96a2409c0dcbcb1f+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e87175ac7451885066d3461f8fc86372568707274f6f49f83fcd5dfafbdf3227 +size 7481879 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..958ebcfae5870711e645c557d1a16874d474eb65 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f529d177f160037dd711356628769f9cef61a3fbe878909b494d4f4fbb53a5e +size 777673 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..509b64239ad3755c6a8cb701f94ced2c45891bcc --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac534e5b49f3131e7d4fcf81d4325a7d8b894a946090edbfb823571d630dafd2 +size 9247744 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1541695366bf3b6a56239e86c5a4545d9e8592c8 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_da408d8160805ec31d7c+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dea0db8daf215ee0ba41633c8854d6c2060decf85a6f0dacd50c167064e3c147 +size 9385688 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..41b58f296f457ec399b51d242a0a618db47dfd33 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bec9cd09095d18ecce29ecab423fab08e52b20be4f9f6d0e86eaedf82967e12 +size 850865 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..0d946b1841db87ab29409a12fa06ce32e333b537 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_dcb05b106d998132758a+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:788d907015c3ea08b3db2bd9cda362aa515a6aad3184947bdb99ad28fe30b423 +size 2417664 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..633d3d672e62a35b75c2af986ddf9c528bc1cc17 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae48749b4077f43ab15c97d37dd01f4e74d5474c2933604c854f2ddc1414608 +size 82915 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..2ab670769cad9f3c99be21dea57864711750965a --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ae5693fc64da9378b91fcbe5ea8a2c302a1251bf973955f80f144a7dac329c +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..36322d50fa3df2095fbd7304796d704c9bf2abae --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_de0cb04f05e1509e85d4+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e435a403ea581c490682f88d1b14d21514bee867f4cdeb32beda6838d0928e +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..2d97ebfc93bd3ce3b26648c316a0ddb9ebae2f70 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e8b95f304e280cba9bdbffb3de26b9776425a745 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee5d453b3cbb1e35f65df13e217227cf0c6694fcf8019de7e6b576ff3f60c269 +size 920277 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d5479d42c2012f89cc1944369266105ae4e5e081 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e58231c1f98c353133fe+793f1a96/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de12968be13c5c1148c5f87074705682b7f664362fe6f68db162949fa37ef93 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0d1d037151ebd0c087831fe289a6d2f2ced53904 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef740edfa70ce225aae83ca14f9018e30db2e3ee491fecc8351129f60751703 +size 373614 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..225ecfe90be363408c389a06b88c59ae6116ce51 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e5a1c84c46e5affeeb18+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c703e8bf1cc79ace196209ab7d95f7c0067b9a4423dab41cf813bc2e8509980 +size 31693824 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..e1dd82452b8e16fe8d25db04b67b50fefc4d0215 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a0b2b45aeefa1d75b73e952a7cd520223f195a6d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97745b5c52bedcbbfc2a41a1f3dcc13e1cb8c040171d75716e3e00689416828a +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96634a881f040a11c80b562b00753857d0c06c8e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44867e67662d01ae9854d806923b0c6d421da78276c960822dd95540f6e48463 +size 5192704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..405d70045dffcdce56da636669d8625f18e5e748 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e73a11fa3fd3b5c3b97b+165e9558/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e6fc3c6002297043830734eb637426044a076f40c57559422301e9260209a9 +size 5330648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..305107c75f95cd3ed63b75f9d225f1f27a5c2c23 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5032e57364bad1bb17e281e455bfb513a91103b5f420d700d83fd215d4754ec +size 777273 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..074cd0999c6ce0d91ae3db6ba8d096aeae2e7367 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0260d073e94d990b0f721e505189a50a3a44c0064780140cd3c8b122a0ff0c +size 5192704 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..4e1a23824d4999fabd4f95789b3c7d498103d379 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e77e279e6c35b74de67c+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222b8b025c05a0a5555d81b14b7bb3226929d94d04cc9a47071d31e2c1958a90 +size 5330648 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..dcde6c3978ed8b79ae83ebd29a4874956f871e22 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O2 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0aeab94286a0c69a0d434f8bd23cafa7b8f727ca --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce95de02650201b0eb91a5ec6ab62f6c994ccd6226cbc944f3a5de5817a36d07 +size 2268363 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c995689ff072de0071f1d22c6607bee8c0077fee --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_e8078469b2219abbe7b5+bfe5714b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7efb6811b9babd119431288df5f7d1029c3e81e74bd654ef0852c189b0e7f3 +size 3257344 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..d28c05c05087c7731d2d1c0a0e9565e9f5bb8a28 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2683e62e065e4a1cae87648c7d7fb6b2db8880f391acf20fcd7b60e38804a428 +size 136016 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..d4939ca189f95bfb12d9d8ad42c5ec58be137fb5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_ea8065fc67389287d5ae+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5000ef742cddc09845d5b98d6e9b3db9d7884cecaeb726f8b195ce6e1a7880b +size 2202624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..ad722b76e0c92f11139d95322d0e4b8c64f7b933 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b09fb4dd3cdfe0cfd6be83affe37b3d42d7cd5644fb220a51b2145b6e75d8f +size 88449 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..055ceaca43f9356639b71b7a38b71b6117e4cd62 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f101d5f6b79e47ea24cd+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9151cfe4e7b3ee29ae367d4070d739ad94be0ae7278665a51139aee67f3b264 +size 1178624 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..54652711b881ed95d11360a0397e86833329856b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/compile_flags.json @@ -0,0 +1 @@ +"--auto-cast=none --model-type=transformer --tensorizer-options='--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-dge-dma --vectorize-strided-dma ' -O1 --internal-num-neuroncores-per-sengine=1 --logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt --enable-internal-neff-wrapper" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..6a17c714e583544af0f5f16c1807616eb2ead967 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30360da067ec244788b37a03e0074e725241b5704b03412f893d53ecb7f98883 +size 57479 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c09bb72f75411fa288fb22a23309370c1ea4f035 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:385f22ae6aefb0889d0160457c29cefa59ff5f746a3123add0137777c373764c +size 246784 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..9370095f311385316f809ca4c134009ef5e9687d --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f358092e7338b2e4529f+7e4da68b/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fd557962db74d647b1d5362625a12b3248ab4a468132733a4f4e2a51b7b813 +size 254559 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..14142d4be6ba89f79cf9e4c64ef438cde1eac01b --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c114d00c5b63392debe1575b94c9cd5fb37da72b776dda342032b737f9dc67 +size 69044 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b52a3332bce4868684039c4278651a41af10f212 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f47f13ac4c1a1c792c33+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ec02bf5189d2a56eb101d3dc9d8918f10608b22c20c1dfb422650fa83b6f9b +size 1158144 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..dd15899b44b34f45c6fc22c6f2c0884d8a2943a5 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:998625bbddc15ef9e80fd9cdbbba5d30b61d1992bcbe676584c27cbe81be0ddc +size 339944 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..12d3d59702181ef108e1658e871e45c81e7c5741 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f781291bf31bf79730b8+431f5505/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b2f21896a15b36b8a3f35e5898819292b4003b538c614c4f502b4861ee1887 +size 8059904 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..0c655e4bcad17c00d4d9ec479b31fc9e637d4287 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--model-type=transformer", "--auto-cast=none", "--execute-repetition=1"] \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bb398860e50031bfd3e8249d9ace594704114ec2 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc998a517871f9971580382edc47f8e45377a0dab229bff749798ab51c992928 +size 181367 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a3eee59199b4b3663c1e8d1dfe1e80dbd01c9db4 --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_f91cd98a64f373af274d+613edded/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d67e5567b210909beac94e83b6f1552a1216d5fbc5f284b8e1118e5829351ca4 +size 2448384 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/compile_flags.json b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..9932c70d27d759d781be88485b74b17b49b4c04f --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/compile_flags.json @@ -0,0 +1 @@ +"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt" \ No newline at end of file diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.done b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.hlo_module.pb b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..505fa3507914323e2fba09c45c0eeae49294785e --- /dev/null +++ b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38797959349de7e57bbd566b3d27770746f3f23b4cc8ffbfbeaa6afd0d5385de +size 7106 diff --git a/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.neff b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..efc63dac52fb0d56d97c4f975cdaa12135efdec9 Binary files /dev/null and b/neuronxcc-2.17.194.0+d312836f/MODULE_fc6c6610807e794f06a7+431f5505/model.neff differ