diff --git a/.gitattributes b/.gitattributes index 0f90fe2b18b9d19feea68ae1eef879297247015d..d11a4ff0b626e66ba654b7bd684902ce0d112d3d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -41,3 +41,7 @@ checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text checkpoint-750/tokenizer.json filter=lfs diff=lfs merge=lfs -text checkpoint-1245/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1660/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-2075/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-415/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-830/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/adapter_config.json b/adapter_config.json index 58aa2fa26549913ebd0d7c2f263c8fb57708daa1..d9a94c10785ed353013e980625c777fea6b3cae9 100644 --- a/adapter_config.json +++ b/adapter_config.json @@ -25,10 +25,10 @@ "revision": null, "target_modules": [ "k_proj", - "gate_proj", + "o_proj", "up_proj", "down_proj", - "o_proj", + "gate_proj", "q_proj", "v_proj" ], diff --git a/adapter_model.safetensors b/adapter_model.safetensors index 41bc940b7ae36731bce7ad785f94560b840c8878..edbdb3357f63c72105a7eacba6bc0606639890fc 100644 --- a/adapter_model.safetensors +++ b/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26203871f20e282b4fd60fbbb580525e58877b465ef3b6ff3084dbb89634675f +oid sha256:234ca98ac0c07569d0c6482aba6c90a7a279051b76dadd40207eabce4792af00 size 335604696 diff --git a/checkpoint-1245/adapter_config.json b/checkpoint-1245/adapter_config.json index 58aa2fa26549913ebd0d7c2f263c8fb57708daa1..d9a94c10785ed353013e980625c777fea6b3cae9 100644 --- a/checkpoint-1245/adapter_config.json +++ b/checkpoint-1245/adapter_config.json @@ -25,10 +25,10 @@ "revision": null, "target_modules": [ "k_proj", - "gate_proj", + "o_proj", "up_proj", "down_proj", - "o_proj", + "gate_proj", "q_proj", "v_proj" ], diff --git a/checkpoint-1245/adapter_model.safetensors b/checkpoint-1245/adapter_model.safetensors index 41bc940b7ae36731bce7ad785f94560b840c8878..722407726ff6cab8b6483823dca6ab894912a16e 100644 --- a/checkpoint-1245/adapter_model.safetensors +++ b/checkpoint-1245/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:26203871f20e282b4fd60fbbb580525e58877b465ef3b6ff3084dbb89634675f +oid sha256:1a6c61f5186ebe0b34df2a6a9cd7ab13588b078c352dadb9ccb1ac9b025baf3b size 335604696 diff --git a/checkpoint-1245/optimizer.pt b/checkpoint-1245/optimizer.pt index 7e4b3bb7e5377e8faa21d092d430fa9d0ca41306..b322b72da7ee51cbad4effa10f009538067b522b 100644 --- a/checkpoint-1245/optimizer.pt +++ b/checkpoint-1245/optimizer.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff72aaab68ea32830fcf2e7b8942b36ce9a8e024e53aaa3385a5e4d7b5b1e3c7 +oid sha256:6a872a99187dc56b48791de104125dbdf6da6a4c362c93996922df0ad2934d49 size 671365003 diff --git a/checkpoint-1245/rng_state.pth b/checkpoint-1245/rng_state.pth index 9dea58bae855f98eecf08038acf40f96d550341e..aac91d7e631a2f14642b927a9a32e65e1b0b4a4c 100644 --- a/checkpoint-1245/rng_state.pth +++ b/checkpoint-1245/rng_state.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:467b639d7a4353b4f8bb935f709faa458ded2f9ba866d82cda0bae8617def7ce +oid sha256:e33d8d19070a9fe06df3df5bf4195ac2b3685ebef725b2077bab505e723f1929 size 14645 diff --git a/checkpoint-1245/scheduler.pt b/checkpoint-1245/scheduler.pt index dd981a93f942526783563beefed08ce56a409340..4a14cd81dd6aa57472b2151115b3847e82634076 100644 --- a/checkpoint-1245/scheduler.pt +++ b/checkpoint-1245/scheduler.pt @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:82fcd00123c3c69d4e7b09d8e96247aa0926ecb0862a7624726a4095234b5d76 +oid sha256:afcf24a919c10327f09cf2cfb38a92229f10d5eecb3efe6c943205819e281b06 size 1465 diff --git a/checkpoint-1245/trainer_state.json b/checkpoint-1245/trainer_state.json index dfe6e1138caa45baf20cebc47b530c4971251df2..f7f28d6af10be7d7d1f5fe0f7489cd960e666b46 100644 --- a/checkpoint-1245/trainer_state.json +++ b/checkpoint-1245/trainer_state.json @@ -11,451 +11,481 @@ "log_history": [ { "epoch": 0.060350030175015085, - "grad_norm": 0.6292673349380493, - "learning_rate": 0.0001894736842105263, - "loss": 1.7301, - "mean_token_accuracy": 0.6386896324157715, - "num_tokens": 154369.0, + "grad_norm": 0.5709517598152161, + "learning_rate": 0.00011428571428571427, + "loss": 1.8697, + "mean_token_accuracy": 0.6171649008989334, + "num_tokens": 158430.0, "step": 25 }, { "epoch": 0.12070006035003017, - "grad_norm": 0.4741007387638092, - "learning_rate": 0.00029993852448555923, - "loss": 0.753, - "mean_token_accuracy": 0.8149925297498704, - "num_tokens": 280943.0, + "grad_norm": 0.4126085937023163, + "learning_rate": 0.0002333333333333333, + "loss": 0.8694, + "mean_token_accuracy": 0.7941986697912217, + "num_tokens": 285311.0, "step": 50 }, { "epoch": 0.18105009052504525, - "grad_norm": 0.36744022369384766, - "learning_rate": 0.00029934198818572623, - "loss": 0.5634, - "mean_token_accuracy": 0.8515779197216033, - "num_tokens": 436084.0, + "grad_norm": 0.3864216208457947, + "learning_rate": 0.00029997787517981614, + "loss": 0.6122, + "mean_token_accuracy": 0.8420314061641693, + "num_tokens": 442499.0, "step": 75 }, { "epoch": 0.24140012070006034, - "grad_norm": 0.42183440923690796, - "learning_rate": 0.0002981133400718627, - "loss": 0.4375, - "mean_token_accuracy": 0.8794168013334275, - "num_tokens": 562245.0, + "grad_norm": 0.43023112416267395, + "learning_rate": 0.0002997630832860032, + "loss": 0.5051, + "mean_token_accuracy": 0.8632901525497436, + "num_tokens": 567883.0, "step": 100 }, { "epoch": 0.30175015087507545, - "grad_norm": 0.29458317160606384, - "learning_rate": 0.0002962577805768642, - "loss": 0.3206, - "mean_token_accuracy": 0.911292427778244, - "num_tokens": 717151.0, + "grad_norm": 0.32171109318733215, + "learning_rate": 0.0002993201135681549, + "loss": 0.3632, + "mean_token_accuracy": 0.8987007009983062, + "num_tokens": 723609.0, "step": 125 }, { "epoch": 0.3621001810500905, - "grad_norm": 0.3513452112674713, - "learning_rate": 0.00029378316362776546, - "loss": 0.3206, - "mean_token_accuracy": 0.9112378489971161, - "num_tokens": 843341.0, + "grad_norm": 0.398703008890152, + "learning_rate": 0.0002986496409313553, + "loss": 0.3154, + "mean_token_accuracy": 0.911303380727768, + "num_tokens": 850241.0, "step": 150 }, { "epoch": 0.4224502112251056, - "grad_norm": 0.2765076458454132, - "learning_rate": 0.0002906999634028451, - "loss": 0.2131, - "mean_token_accuracy": 0.9411528539657593, - "num_tokens": 998321.0, + "grad_norm": 0.2818653881549835, + "learning_rate": 0.0002977526869022985, + "loss": 0.2394, + "mean_token_accuracy": 0.9328221660852433, + "num_tokens": 1004280.0, "step": 175 }, { "epoch": 0.4828002414001207, - "grad_norm": 0.36249637603759766, - "learning_rate": 0.0002870212299981334, - "loss": 0.194, - "mean_token_accuracy": 0.9448786079883575, - "num_tokens": 1124044.0, + "grad_norm": 0.39628875255584717, + "learning_rate": 0.0002966306180728982, + "loss": 0.2069, + "mean_token_accuracy": 0.9405643939971924, + "num_tokens": 1131356.0, "step": 200 }, { "epoch": 0.5431502715751357, - "grad_norm": 0.23520268499851227, - "learning_rate": 0.00028276253419097193, - "loss": 0.16, - "mean_token_accuracy": 0.954469358921051, - "num_tokens": 1279690.0, + "grad_norm": 0.2750284671783447, + "learning_rate": 0.0002952851440181598, + "loss": 0.1639, + "mean_token_accuracy": 0.9533388954401016, + "num_tokens": 1286496.0, "step": 225 }, { "epoch": 0.6035003017501509, - "grad_norm": 0.3840561807155609, - "learning_rate": 0.00027794190153442033, - "loss": 0.1449, - "mean_token_accuracy": 0.958166065812111, - "num_tokens": 1406252.0, + "grad_norm": 0.4104975759983063, + "learning_rate": 0.0002937183146914856, + "loss": 0.1533, + "mean_token_accuracy": 0.9560526990890503, + "num_tokens": 1413124.0, "step": 250 }, { "epoch": 0.663850331925166, - "grad_norm": 0.2286107838153839, - "learning_rate": 0.00027257973606146575, - "loss": 0.1221, - "mean_token_accuracy": 0.9660421818494797, - "num_tokens": 1563270.0, + "grad_norm": 0.2589656710624695, + "learning_rate": 0.000291932517301382, + "loss": 0.1192, + "mean_token_accuracy": 0.9663791972398758, + "num_tokens": 1568083.0, "step": 275 }, { "epoch": 0.724200362100181, - "grad_norm": 0.3460133969783783, - "learning_rate": 0.0002666987339219681, - "loss": 0.1082, - "mean_token_accuracy": 0.9702390593290329, - "num_tokens": 1689260.0, + "grad_norm": 0.42289865016937256, + "learning_rate": 0.00028993047267432864, + "loss": 0.119, + "mean_token_accuracy": 0.9662033247947693, + "num_tokens": 1693195.0, "step": 300 }, { "epoch": 0.7845503922751962, - "grad_norm": 0.21432659029960632, - "learning_rate": 0.0002603237873178853, - "loss": 0.1048, - "mean_token_accuracy": 0.9709047842025756, - "num_tokens": 1845134.0, + "grad_norm": 0.3269366919994354, + "learning_rate": 0.0002877152311093483, + "loss": 0.1098, + "mean_token_accuracy": 0.9693831586837769, + "num_tokens": 1847137.0, "step": 325 }, { "epoch": 0.8449004224502112, - "grad_norm": 0.3365967869758606, - "learning_rate": 0.0002534818791433866, - "loss": 0.08, - "mean_token_accuracy": 0.9766195774078369, - "num_tokens": 1972693.0, + "grad_norm": 0.29110169410705566, + "learning_rate": 0.00028529016773059656, + "loss": 0.097, + "mean_token_accuracy": 0.9721069341897964, + "num_tokens": 1974433.0, "step": 350 }, { "epoch": 0.9052504526252263, - "grad_norm": 0.2480434626340866, - "learning_rate": 0.00024620196877580576, - "loss": 0.0892, - "mean_token_accuracy": 0.9751642364263534, - "num_tokens": 2130559.0, + "grad_norm": 0.2041333168745041, + "learning_rate": 0.00028265897734504976, + "loss": 0.1078, + "mean_token_accuracy": 0.9707016760110855, + "num_tokens": 2130536.0, "step": 375 }, { "epoch": 0.9656004828002414, - "grad_norm": 0.343098521232605, - "learning_rate": 0.00023851486950083892, - "loss": 0.0665, - "mean_token_accuracy": 0.9801602983474731, - "num_tokens": 2257894.0, + "grad_norm": 0.27978503704071045, + "learning_rate": 0.0002798256688131267, + "loss": 0.0788, + "mean_token_accuracy": 0.9774614202976227, + "num_tokens": 2257778.0, "step": 400 }, + { + "epoch": 1.0, + "eval_loss": 0.08150900155305862, + "eval_mean_token_accuracy": 0.977405073836043, + "eval_num_tokens": 2337343.0, + "eval_runtime": 66.2124, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 415 + }, { "epoch": 1.024140012070006, - "grad_norm": 0.1706114411354065, - "learning_rate": 0.00023045311809080567, - "loss": 0.089, - "mean_token_accuracy": 0.9756335769731974, - "num_tokens": 2405953.0, + "grad_norm": 0.2321971356868744, + "learning_rate": 0.0002767945589408217, + "loss": 0.1016, + "mean_token_accuracy": 0.9726764823972565, + "num_tokens": 2406156.0, "step": 425 }, { "epoch": 1.0844900422450212, - "grad_norm": 0.23476898670196533, - "learning_rate": 0.00022205083708799942, - "loss": 0.051, - "mean_token_accuracy": 0.9842809218168259, - "num_tokens": 2548144.0, + "grad_norm": 0.15238304436206818, + "learning_rate": 0.0002735702659026533, + "loss": 0.0566, + "mean_token_accuracy": 0.9829730612039566, + "num_tokens": 2547712.0, "step": 450 }, { "epoch": 1.1448400724200363, - "grad_norm": 0.1139589250087738, - "learning_rate": 0.0002133435903760353, - "loss": 0.0686, - "mean_token_accuracy": 0.9804027622938156, - "num_tokens": 2689130.0, + "grad_norm": 0.24308571219444275, + "learning_rate": 0.0002701577022054515, + "loss": 0.0787, + "mean_token_accuracy": 0.9782290506362915, + "num_tokens": 2689644.0, "step": 475 }, { "epoch": 1.2051901025950513, - "grad_norm": 0.19008274376392365, - "learning_rate": 0.0002043682326505094, - "loss": 0.0472, - "mean_token_accuracy": 0.9851819169521332, - "num_tokens": 2831241.0, + "grad_norm": 0.10345127433538437, + "learning_rate": 0.0002665620672037014, + "loss": 0.0559, + "mean_token_accuracy": 0.9833064198493957, + "num_tokens": 2830847.0, "step": 500 }, { "epoch": 1.2655401327700664, - "grad_norm": 0.11993619054555893, - "learning_rate": 0.000195162753426108, - "loss": 0.0642, - "mean_token_accuracy": 0.9814973390102386, - "num_tokens": 2972054.0, + "grad_norm": 0.12139427661895752, + "learning_rate": 0.0002627888391778493, + "loss": 0.0734, + "mean_token_accuracy": 0.9794088786840439, + "num_tokens": 2970752.0, "step": 525 }, { "epoch": 1.3258901629450814, - "grad_norm": 0.07476775348186493, - "learning_rate": 0.00018576611624042852, - "loss": 0.0479, - "mean_token_accuracy": 0.98527980864048, - "num_tokens": 3114893.0, + "grad_norm": 0.19484972953796387, + "learning_rate": 0.0002588437669876384, + "loss": 0.05, + "mean_token_accuracy": 0.9848471087217331, + "num_tokens": 3110651.0, "step": 550 }, { "epoch": 1.3862401931200965, - "grad_norm": 0.11915570497512817, - "learning_rate": 0.00017621809373510641, - "loss": 0.0578, - "mean_token_accuracy": 0.9826212483644485, - "num_tokens": 3254969.0, + "grad_norm": 0.15929530560970306, + "learning_rate": 0.00025473286131319283, + "loss": 0.0647, + "mean_token_accuracy": 0.9809465759992599, + "num_tokens": 3250565.0, "step": 575 }, { "epoch": 1.4465902232951118, - "grad_norm": 0.1064341738820076, - "learning_rate": 0.00016655909931229048, - "loss": 0.0435, - "mean_token_accuracy": 0.9862636721134186, - "num_tokens": 3394974.0, + "grad_norm": 0.1795138716697693, + "learning_rate": 0.0002504623854971937, + "loss": 0.051, + "mean_token_accuracy": 0.9846814131736755, + "num_tokens": 3391351.0, "step": 600 }, { "epoch": 1.5069402534701268, - "grad_norm": 0.08885128051042557, - "learning_rate": 0.00015683001607900553, - "loss": 0.0534, - "mean_token_accuracy": 0.984228019118309, - "num_tokens": 3535129.0, + "grad_norm": 0.14619964361190796, + "learning_rate": 0.00024603884600210097, + "loss": 0.0628, + "mean_token_accuracy": 0.9824352604150772, + "num_tokens": 3530305.0, "step": 625 }, { "epoch": 1.567290283645142, - "grad_norm": 0.10837211459875107, - "learning_rate": 0.00014707202380342108, - "loss": 0.0403, - "mean_token_accuracy": 0.9873021399974823, - "num_tokens": 3675282.0, + "grad_norm": 0.11213799566030502, + "learning_rate": 0.00024146898249695974, + "loss": 0.0478, + "mean_token_accuracy": 0.9856422942876816, + "num_tokens": 3672598.0, "step": 650 }, { "epoch": 1.627640313820157, - "grad_norm": 0.09635572135448456, - "learning_rate": 0.00013732642461545747, - "loss": 0.063, - "mean_token_accuracy": 0.9824581468105316, - "num_tokens": 3816603.0, + "grad_norm": 0.126531720161438, + "learning_rate": 0.00023675975758889506, + "loss": 0.059, + "mean_token_accuracy": 0.9828101003170013, + "num_tokens": 3814546.0, "step": 675 }, { "epoch": 1.687990343995172, - "grad_norm": 0.11107978969812393, - "learning_rate": 0.00012763446818947865, - "loss": 0.0412, - "mean_token_accuracy": 0.9869128787517547, - "num_tokens": 3959403.0, + "grad_norm": 0.13861851394176483, + "learning_rate": 0.00023191834621493968, + "loss": 0.0453, + "mean_token_accuracy": 0.9860042548179626, + "num_tokens": 3955590.0, "step": 700 }, { "epoch": 1.748340374170187, - "grad_norm": 0.11180992424488068, - "learning_rate": 0.00011803717714901029, - "loss": 0.0485, - "mean_token_accuracy": 0.9856361907720566, - "num_tokens": 4098655.0, + "grad_norm": 0.057172827422618866, + "learning_rate": 0.00022695212471035816, + "loss": 0.053, + "mean_token_accuracy": 0.984272707104683, + "num_tokens": 4096678.0, "step": 725 }, { "epoch": 1.8086904043452021, - "grad_norm": 0.05261543393135071, - "learning_rate": 0.00010857517343248423, - "loss": 0.0374, - "mean_token_accuracy": 0.9879239189624787, - "num_tokens": 4239623.0, + "grad_norm": 0.13785910606384277, + "learning_rate": 0.0002218686595701219, + "loss": 0.0438, + "mean_token_accuracy": 0.9864085137844085, + "num_tokens": 4238701.0, "step": 750 }, { "epoch": 1.8690404345202172, - "grad_norm": 0.07431406527757645, - "learning_rate": 9.9288506354941e-05, - "loss": 0.0534, - "mean_token_accuracy": 0.9845235311985016, - "num_tokens": 4380214.0, + "grad_norm": 0.09842238575220108, + "learning_rate": 0.0002166756959206587, + "loss": 0.0538, + "mean_token_accuracy": 0.9840266466140747, + "num_tokens": 4378606.0, "step": 775 }, { "epoch": 1.9293904646952322, - "grad_norm": 0.07465440034866333, - "learning_rate": 9.021648309344443e-05, - "loss": 0.0364, - "mean_token_accuracy": 0.9882479470968246, - "num_tokens": 4521129.0, + "grad_norm": 0.09276362508535385, + "learning_rate": 0.00021138114571944054, + "loss": 0.0415, + "mean_token_accuracy": 0.9867525893449783, + "num_tokens": 4521351.0, "step": 800 }, { "epoch": 1.9897404948702473, - "grad_norm": 0.08167650550603867, - "learning_rate": 8.139750231370407e-05, - "loss": 0.0392, - "mean_token_accuracy": 0.9880437237024308, - "num_tokens": 4654469.0, + "grad_norm": 0.07594375312328339, + "learning_rate": 0.000205993075700389, + "loss": 0.0452, + "mean_token_accuracy": 0.9859298402071, + "num_tokens": 4655013.0, "step": 825 }, + { + "epoch": 2.0, + "eval_loss": 0.05184464156627655, + "eval_mean_token_accuracy": 0.9849317776190268, + "eval_num_tokens": 4674686.0, + "eval_runtime": 66.2113, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 830 + }, { "epoch": 2.048280024140012, - "grad_norm": 0.08596479892730713, - "learning_rate": 7.28688916421049e-05, - "loss": 0.0378, - "mean_token_accuracy": 0.9878681424966792, - "num_tokens": 4801163.0, + "grad_norm": 0.06794245541095734, + "learning_rate": 0.00020051969508346498, + "loss": 0.039, + "mean_token_accuracy": 0.9873216932581872, + "num_tokens": 4801249.0, "step": 850 }, { "epoch": 2.1086300543150274, - "grad_norm": 0.07815541326999664, - "learning_rate": 6.466674967106751e-05, - "loss": 0.0268, - "mean_token_accuracy": 0.9908934444189071, - "num_tokens": 4932557.0, + "grad_norm": 0.06261667609214783, + "learning_rate": 0.00019496934306716706, + "loss": 0.0306, + "mean_token_accuracy": 0.9899670380353928, + "num_tokens": 4934594.0, "step": 875 }, { "epoch": 2.1689800844900424, - "grad_norm": 0.05883782356977463, - "learning_rate": 5.682579316647408e-05, - "loss": 0.0359, - "mean_token_accuracy": 0.9879110449552536, - "num_tokens": 5080542.0, + "grad_norm": 0.12159217149019241, + "learning_rate": 0.00018935047612299625, + "loss": 0.0441, + "mean_token_accuracy": 0.9860250049829483, + "num_tokens": 5086726.0, "step": 900 }, { "epoch": 2.2293301146650575, - "grad_norm": 0.04818252846598625, - "learning_rate": 4.937921012387816e-05, - "loss": 0.0256, - "mean_token_accuracy": 0.9911421006917953, - "num_tokens": 5212758.0, + "grad_norm": 0.10762438178062439, + "learning_rate": 0.00018367165511124414, + "loss": 0.0318, + "mean_token_accuracy": 0.9893095499277115, + "num_tokens": 5218004.0, "step": 925 }, { "epoch": 2.2896801448400725, - "grad_norm": 0.06735564768314362, - "learning_rate": 4.235851929545771e-05, - "loss": 0.0364, - "mean_token_accuracy": 0.9879087799787521, - "num_tokens": 5364069.0, + "grad_norm": 0.08475406467914581, + "learning_rate": 0.00017794153223773558, + "loss": 0.0431, + "mean_token_accuracy": 0.9860376048088074, + "num_tokens": 5368044.0, "step": 950 }, { "epoch": 2.3500301750150876, - "grad_norm": 0.06283754855394363, - "learning_rate": 3.579343678228525e-05, - "loss": 0.0257, - "mean_token_accuracy": 0.9907898318767547, - "num_tokens": 5496173.0, + "grad_norm": 0.07939684391021729, + "learning_rate": 0.00017216883787139772, + "loss": 0.0301, + "mean_token_accuracy": 0.9899585354328155, + "num_tokens": 5500546.0, "step": 975 }, { "epoch": 2.4103802051901027, - "grad_norm": 0.06447657197713852, - "learning_rate": 2.9711750256582538e-05, - "loss": 0.0389, - "mean_token_accuracy": 0.9874769997596741, - "num_tokens": 5646044.0, + "grad_norm": 0.07427559047937393, + "learning_rate": 0.00016636236724274, + "loss": 0.0422, + "mean_token_accuracy": 0.9869222700595855, + "num_tokens": 5651095.0, "step": 1000 }, { "epoch": 2.4707302353651177, - "grad_norm": 0.04464152455329895, - "learning_rate": 2.413920134633272e-05, - "loss": 0.0249, - "mean_token_accuracy": 0.9911341750621796, - "num_tokens": 5778793.0, + "grad_norm": 0.08105447888374329, + "learning_rate": 0.00016053096704351255, + "loss": 0.0275, + "mean_token_accuracy": 0.9905411404371262, + "num_tokens": 5784344.0, "step": 1025 }, { "epoch": 2.5310802655401328, - "grad_norm": 0.06707244366407394, - "learning_rate": 1.909937668007352e-05, - "loss": 0.0395, - "mean_token_accuracy": 0.9876207703351975, - "num_tokens": 5930094.0, + "grad_norm": 0.05421376973390579, + "learning_rate": 0.00015468352194795791, + "loss": 0.0431, + "mean_token_accuracy": 0.986332500576973, + "num_tokens": 5935006.0, "step": 1050 }, { "epoch": 2.591430295715148, - "grad_norm": 0.09106618911027908, - "learning_rate": 1.461360805304146e-05, - "loss": 0.026, - "mean_token_accuracy": 0.9912489461898804, - "num_tokens": 6062403.0, + "grad_norm": 0.07994894683361053, + "learning_rate": 0.00014882894107619277, + "loss": 0.0281, + "mean_token_accuracy": 0.9903588914871215, + "num_tokens": 6066956.0, "step": 1075 }, { "epoch": 2.651780325890163, - "grad_norm": 0.08772443979978561, - "learning_rate": 1.0700882137227434e-05, - "loss": 0.0329, - "mean_token_accuracy": 0.9891680490970611, - "num_tokens": 6211836.0, + "grad_norm": 0.06251101195812225, + "learning_rate": 0.00014297614442034518, + "loss": 0.0377, + "mean_token_accuracy": 0.9876210725307465, + "num_tokens": 6213232.0, "step": 1100 }, { "epoch": 2.712130356065178, - "grad_norm": 0.06003103032708168, - "learning_rate": 7.377760117509834e-06, - "loss": 0.0262, - "mean_token_accuracy": 0.9908367162942886, - "num_tokens": 6345240.0, + "grad_norm": 0.040336403995752335, + "learning_rate": 0.000137134049254126, + "loss": 0.0284, + "mean_token_accuracy": 0.9903542894124985, + "num_tokens": 6343819.0, "step": 1125 }, { "epoch": 2.772480386240193, - "grad_norm": 0.09687343239784241, - "learning_rate": 4.65830759401658e-06, - "loss": 0.0361, - "mean_token_accuracy": 0.9885525786876679, - "num_tokens": 6494440.0, + "grad_norm": 0.0834837481379509, + "learning_rate": 0.000131311556546543, + "loss": 0.0396, + "mean_token_accuracy": 0.9873842394351959, + "num_tokens": 6494517.0, "step": 1150 }, { "epoch": 2.832830416415208, - "grad_norm": 0.06425528228282928, - "learning_rate": 2.554035047414732e-06, - "loss": 0.0271, - "mean_token_accuracy": 0.9905071079730987, - "num_tokens": 6626211.0, + "grad_norm": 0.13212957978248596, + "learning_rate": 0.0001255175374004563, + "loss": 0.0288, + "mean_token_accuracy": 0.9902939075231552, + "num_tokens": 6627652.0, "step": 1175 }, { "epoch": 2.8931804465902236, - "grad_norm": 0.07502556592226028, - "learning_rate": 1.0738491191171372e-06, - "loss": 0.0314, - "mean_token_accuracy": 0.9892856758832932, - "num_tokens": 6776185.0, + "grad_norm": 0.06576525419950485, + "learning_rate": 0.0001197608195366377, + "loss": 0.0382, + "mean_token_accuracy": 0.9877642679214478, + "num_tokens": 6777206.0, "step": 1200 }, { "epoch": 2.9535304767652386, - "grad_norm": 0.09318600594997406, - "learning_rate": 2.2401491261947456e-07, - "loss": 0.0247, - "mean_token_accuracy": 0.9915217423439026, - "num_tokens": 6908155.0, + "grad_norm": 0.05414114519953728, + "learning_rate": 0.00011405017384392655, + "loss": 0.0273, + "mean_token_accuracy": 0.9905436062812805, + "num_tokens": 6909991.0, "step": 1225 + }, + { + "epoch": 3.0, + "eval_loss": 0.043681155890226364, + "eval_mean_token_accuracy": 0.9871059862343041, + "eval_num_tokens": 7012029.0, + "eval_runtime": 66.1941, + "eval_samples_per_second": 5.575, + "eval_steps_per_second": 2.795, + "step": 1245 } ], "logging_steps": 25, - "max_steps": 1245, + "max_steps": 2075, "num_input_tokens_seen": 0, - "num_train_epochs": 3, - "save_steps": 750, + "num_train_epochs": 5, + "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { @@ -463,12 +493,12 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": true + "should_training_stop": false }, "attributes": {} } }, - "total_flos": 3.1964922351990374e+17, + "total_flos": 3.1964539875928474e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null diff --git a/checkpoint-1245/training_args.bin b/checkpoint-1245/training_args.bin index 6d3ecf13b8be6e134c24e75b19034d7ffafca147..184dacc2ae7ef0096c0f2c3ef91b16c840ae960c 100644 --- a/checkpoint-1245/training_args.bin +++ b/checkpoint-1245/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d6208b115a1cf87544c66b6796b905dcb5871c4ad0a2a9a5ff55ee26e68a1bce +oid sha256:4370f125a1b8cc4780ee7319c46b5e76e386e5bd5061ee6e638e993ff73b5e6f size 6033 diff --git a/checkpoint-1660/README.md b/checkpoint-1660/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0d31128190920e45b61115944d16e773c2ec94c3 --- /dev/null +++ b/checkpoint-1660/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoint-1660/adapter_config.json b/checkpoint-1660/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9a94c10785ed353013e980625c777fea6b3cae9 --- /dev/null +++ b/checkpoint-1660/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1660/adapter_model.safetensors b/checkpoint-1660/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb373f0bd832c5d007938f1d8f9c5b1950a68d46 --- /dev/null +++ b/checkpoint-1660/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0580ae7096c27ebd4741d0aa1071c4641d3ac7097dfb2fff98f61358a0ccfa4c +size 335604696 diff --git a/checkpoint-1660/chat_template.jinja b/checkpoint-1660/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-1660/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-1660/optimizer.pt b/checkpoint-1660/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c08ce1a4fc592fcfb07f097927044b8e9513a467 --- /dev/null +++ b/checkpoint-1660/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eba25f5d9f894daf153eb84b000950dd4e4c00392789a8bc54b63c6976f92d0 +size 671365003 diff --git a/checkpoint-1660/rng_state.pth b/checkpoint-1660/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fca3561c953ceefe387b919e618ebcb617d239a --- /dev/null +++ b/checkpoint-1660/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05bdcb140dcbe6eeb7e9713b846e9b7e707fe9b7f87a040dcdfe24fe857efd5d +size 14645 diff --git a/checkpoint-1660/scheduler.pt b/checkpoint-1660/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ca19dd2dbeb35ca3ac7b3fc48b8483ce576d481 --- /dev/null +++ b/checkpoint-1660/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db489836d67eb6a2f9cf352ac12b33db8531786c612f502c376b4d1f7cd034a4 +size 1465 diff --git a/checkpoint-1660/special_tokens_map.json b/checkpoint-1660/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-1660/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-1660/tokenizer.json b/checkpoint-1660/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..9645e8546890ad31273c3a00dd4cc1bd4f7aafbb --- /dev/null +++ b/checkpoint-1660/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7389e002692fc9e3a3d9f9884a9e9d5657e2a4406bf9759a3703abe8a3485402 +size 17210085 diff --git a/checkpoint-1660/tokenizer_config.json b/checkpoint-1660/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..563042ba0748ab38bd6000afa193a5a9634d31cc --- /dev/null +++ b/checkpoint-1660/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-1660/trainer_state.json b/checkpoint-1660/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..670155dc12e60f3f817503f3c6a2fc0d5a8f91c5 --- /dev/null +++ b/checkpoint-1660/trainer_state.json @@ -0,0 +1,668 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 1660, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.060350030175015085, + "grad_norm": 0.5709517598152161, + "learning_rate": 0.00011428571428571427, + "loss": 1.8697, + "mean_token_accuracy": 0.6171649008989334, + "num_tokens": 158430.0, + "step": 25 + }, + { + "epoch": 0.12070006035003017, + "grad_norm": 0.4126085937023163, + "learning_rate": 0.0002333333333333333, + "loss": 0.8694, + "mean_token_accuracy": 0.7941986697912217, + "num_tokens": 285311.0, + "step": 50 + }, + { + "epoch": 0.18105009052504525, + "grad_norm": 0.3864216208457947, + "learning_rate": 0.00029997787517981614, + "loss": 0.6122, + "mean_token_accuracy": 0.8420314061641693, + "num_tokens": 442499.0, + "step": 75 + }, + { + "epoch": 0.24140012070006034, + "grad_norm": 0.43023112416267395, + "learning_rate": 0.0002997630832860032, + "loss": 0.5051, + "mean_token_accuracy": 0.8632901525497436, + "num_tokens": 567883.0, + "step": 100 + }, + { + "epoch": 0.30175015087507545, + "grad_norm": 0.32171109318733215, + "learning_rate": 0.0002993201135681549, + "loss": 0.3632, + "mean_token_accuracy": 0.8987007009983062, + "num_tokens": 723609.0, + "step": 125 + }, + { + "epoch": 0.3621001810500905, + "grad_norm": 0.398703008890152, + "learning_rate": 0.0002986496409313553, + "loss": 0.3154, + "mean_token_accuracy": 0.911303380727768, + "num_tokens": 850241.0, + "step": 150 + }, + { + "epoch": 0.4224502112251056, + "grad_norm": 0.2818653881549835, + "learning_rate": 0.0002977526869022985, + "loss": 0.2394, + "mean_token_accuracy": 0.9328221660852433, + "num_tokens": 1004280.0, + "step": 175 + }, + { + "epoch": 0.4828002414001207, + "grad_norm": 0.39628875255584717, + "learning_rate": 0.0002966306180728982, + "loss": 0.2069, + "mean_token_accuracy": 0.9405643939971924, + "num_tokens": 1131356.0, + "step": 200 + }, + { + "epoch": 0.5431502715751357, + "grad_norm": 0.2750284671783447, + "learning_rate": 0.0002952851440181598, + "loss": 0.1639, + "mean_token_accuracy": 0.9533388954401016, + "num_tokens": 1286496.0, + "step": 225 + }, + { + "epoch": 0.6035003017501509, + "grad_norm": 0.4104975759983063, + "learning_rate": 0.0002937183146914856, + "loss": 0.1533, + "mean_token_accuracy": 0.9560526990890503, + "num_tokens": 1413124.0, + "step": 250 + }, + { + "epoch": 0.663850331925166, + "grad_norm": 0.2589656710624695, + "learning_rate": 0.000291932517301382, + "loss": 0.1192, + "mean_token_accuracy": 0.9663791972398758, + "num_tokens": 1568083.0, + "step": 275 + }, + { + "epoch": 0.724200362100181, + "grad_norm": 0.42289865016937256, + "learning_rate": 0.00028993047267432864, + "loss": 0.119, + "mean_token_accuracy": 0.9662033247947693, + "num_tokens": 1693195.0, + "step": 300 + }, + { + "epoch": 0.7845503922751962, + "grad_norm": 0.3269366919994354, + "learning_rate": 0.0002877152311093483, + "loss": 0.1098, + "mean_token_accuracy": 0.9693831586837769, + "num_tokens": 1847137.0, + "step": 325 + }, + { + "epoch": 0.8449004224502112, + "grad_norm": 0.29110169410705566, + "learning_rate": 0.00028529016773059656, + "loss": 0.097, + "mean_token_accuracy": 0.9721069341897964, + "num_tokens": 1974433.0, + "step": 350 + }, + { + "epoch": 0.9052504526252263, + "grad_norm": 0.2041333168745041, + "learning_rate": 0.00028265897734504976, + "loss": 0.1078, + "mean_token_accuracy": 0.9707016760110855, + "num_tokens": 2130536.0, + "step": 375 + }, + { + "epoch": 0.9656004828002414, + "grad_norm": 0.27978503704071045, + "learning_rate": 0.0002798256688131267, + "loss": 0.0788, + "mean_token_accuracy": 0.9774614202976227, + "num_tokens": 2257778.0, + "step": 400 + }, + { + "epoch": 1.0, + "eval_loss": 0.08150900155305862, + "eval_mean_token_accuracy": 0.977405073836043, + "eval_num_tokens": 2337343.0, + "eval_runtime": 66.2124, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 415 + }, + { + "epoch": 1.024140012070006, + "grad_norm": 0.2321971356868744, + "learning_rate": 0.0002767945589408217, + "loss": 0.1016, + "mean_token_accuracy": 0.9726764823972565, + "num_tokens": 2406156.0, + "step": 425 + }, + { + "epoch": 1.0844900422450212, + "grad_norm": 0.15238304436206818, + "learning_rate": 0.0002735702659026533, + "loss": 0.0566, + "mean_token_accuracy": 0.9829730612039566, + "num_tokens": 2547712.0, + "step": 450 + }, + { + "epoch": 1.1448400724200363, + "grad_norm": 0.24308571219444275, + "learning_rate": 0.0002701577022054515, + "loss": 0.0787, + "mean_token_accuracy": 0.9782290506362915, + "num_tokens": 2689644.0, + "step": 475 + }, + { + "epoch": 1.2051901025950513, + "grad_norm": 0.10345127433538437, + "learning_rate": 0.0002665620672037014, + "loss": 0.0559, + "mean_token_accuracy": 0.9833064198493957, + "num_tokens": 2830847.0, + "step": 500 + }, + { + "epoch": 1.2655401327700664, + "grad_norm": 0.12139427661895752, + "learning_rate": 0.0002627888391778493, + "loss": 0.0734, + "mean_token_accuracy": 0.9794088786840439, + "num_tokens": 2970752.0, + "step": 525 + }, + { + "epoch": 1.3258901629450814, + "grad_norm": 0.19484972953796387, + "learning_rate": 0.0002588437669876384, + "loss": 0.05, + "mean_token_accuracy": 0.9848471087217331, + "num_tokens": 3110651.0, + "step": 550 + }, + { + "epoch": 1.3862401931200965, + "grad_norm": 0.15929530560970306, + "learning_rate": 0.00025473286131319283, + "loss": 0.0647, + "mean_token_accuracy": 0.9809465759992599, + "num_tokens": 3250565.0, + "step": 575 + }, + { + "epoch": 1.4465902232951118, + "grad_norm": 0.1795138716697693, + "learning_rate": 0.0002504623854971937, + "loss": 0.051, + "mean_token_accuracy": 0.9846814131736755, + "num_tokens": 3391351.0, + "step": 600 + }, + { + "epoch": 1.5069402534701268, + "grad_norm": 0.14619964361190796, + "learning_rate": 0.00024603884600210097, + "loss": 0.0628, + "mean_token_accuracy": 0.9824352604150772, + "num_tokens": 3530305.0, + "step": 625 + }, + { + "epoch": 1.567290283645142, + "grad_norm": 0.11213799566030502, + "learning_rate": 0.00024146898249695974, + "loss": 0.0478, + "mean_token_accuracy": 0.9856422942876816, + "num_tokens": 3672598.0, + "step": 650 + }, + { + "epoch": 1.627640313820157, + "grad_norm": 0.126531720161438, + "learning_rate": 0.00023675975758889506, + "loss": 0.059, + "mean_token_accuracy": 0.9828101003170013, + "num_tokens": 3814546.0, + "step": 675 + }, + { + "epoch": 1.687990343995172, + "grad_norm": 0.13861851394176483, + "learning_rate": 0.00023191834621493968, + "loss": 0.0453, + "mean_token_accuracy": 0.9860042548179626, + "num_tokens": 3955590.0, + "step": 700 + }, + { + "epoch": 1.748340374170187, + "grad_norm": 0.057172827422618866, + "learning_rate": 0.00022695212471035816, + "loss": 0.053, + "mean_token_accuracy": 0.984272707104683, + "num_tokens": 4096678.0, + "step": 725 + }, + { + "epoch": 1.8086904043452021, + "grad_norm": 0.13785910606384277, + "learning_rate": 0.0002218686595701219, + "loss": 0.0438, + "mean_token_accuracy": 0.9864085137844085, + "num_tokens": 4238701.0, + "step": 750 + }, + { + "epoch": 1.8690404345202172, + "grad_norm": 0.09842238575220108, + "learning_rate": 0.0002166756959206587, + "loss": 0.0538, + "mean_token_accuracy": 0.9840266466140747, + "num_tokens": 4378606.0, + "step": 775 + }, + { + "epoch": 1.9293904646952322, + "grad_norm": 0.09276362508535385, + "learning_rate": 0.00021138114571944054, + "loss": 0.0415, + "mean_token_accuracy": 0.9867525893449783, + "num_tokens": 4521351.0, + "step": 800 + }, + { + "epoch": 1.9897404948702473, + "grad_norm": 0.07594375312328339, + "learning_rate": 0.000205993075700389, + "loss": 0.0452, + "mean_token_accuracy": 0.9859298402071, + "num_tokens": 4655013.0, + "step": 825 + }, + { + "epoch": 2.0, + "eval_loss": 0.05184464156627655, + "eval_mean_token_accuracy": 0.9849317776190268, + "eval_num_tokens": 4674686.0, + "eval_runtime": 66.2113, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 830 + }, + { + "epoch": 2.048280024140012, + "grad_norm": 0.06794245541095734, + "learning_rate": 0.00020051969508346498, + "loss": 0.039, + "mean_token_accuracy": 0.9873216932581872, + "num_tokens": 4801249.0, + "step": 850 + }, + { + "epoch": 2.1086300543150274, + "grad_norm": 0.06261667609214783, + "learning_rate": 0.00019496934306716706, + "loss": 0.0306, + "mean_token_accuracy": 0.9899670380353928, + "num_tokens": 4934594.0, + "step": 875 + }, + { + "epoch": 2.1689800844900424, + "grad_norm": 0.12159217149019241, + "learning_rate": 0.00018935047612299625, + "loss": 0.0441, + "mean_token_accuracy": 0.9860250049829483, + "num_tokens": 5086726.0, + "step": 900 + }, + { + "epoch": 2.2293301146650575, + "grad_norm": 0.10762438178062439, + "learning_rate": 0.00018367165511124414, + "loss": 0.0318, + "mean_token_accuracy": 0.9893095499277115, + "num_tokens": 5218004.0, + "step": 925 + }, + { + "epoch": 2.2896801448400725, + "grad_norm": 0.08475406467914581, + "learning_rate": 0.00017794153223773558, + "loss": 0.0431, + "mean_token_accuracy": 0.9860376048088074, + "num_tokens": 5368044.0, + "step": 950 + }, + { + "epoch": 2.3500301750150876, + "grad_norm": 0.07939684391021729, + "learning_rate": 0.00017216883787139772, + "loss": 0.0301, + "mean_token_accuracy": 0.9899585354328155, + "num_tokens": 5500546.0, + "step": 975 + }, + { + "epoch": 2.4103802051901027, + "grad_norm": 0.07427559047937393, + "learning_rate": 0.00016636236724274, + "loss": 0.0422, + "mean_token_accuracy": 0.9869222700595855, + "num_tokens": 5651095.0, + "step": 1000 + }, + { + "epoch": 2.4707302353651177, + "grad_norm": 0.08105447888374329, + "learning_rate": 0.00016053096704351255, + "loss": 0.0275, + "mean_token_accuracy": 0.9905411404371262, + "num_tokens": 5784344.0, + "step": 1025 + }, + { + "epoch": 2.5310802655401328, + "grad_norm": 0.05421376973390579, + "learning_rate": 0.00015468352194795791, + "loss": 0.0431, + "mean_token_accuracy": 0.986332500576973, + "num_tokens": 5935006.0, + "step": 1050 + }, + { + "epoch": 2.591430295715148, + "grad_norm": 0.07994894683361053, + "learning_rate": 0.00014882894107619277, + "loss": 0.0281, + "mean_token_accuracy": 0.9903588914871215, + "num_tokens": 6066956.0, + "step": 1075 + }, + { + "epoch": 2.651780325890163, + "grad_norm": 0.06251101195812225, + "learning_rate": 0.00014297614442034518, + "loss": 0.0377, + "mean_token_accuracy": 0.9876210725307465, + "num_tokens": 6213232.0, + "step": 1100 + }, + { + "epoch": 2.712130356065178, + "grad_norm": 0.040336403995752335, + "learning_rate": 0.000137134049254126, + "loss": 0.0284, + "mean_token_accuracy": 0.9903542894124985, + "num_tokens": 6343819.0, + "step": 1125 + }, + { + "epoch": 2.772480386240193, + "grad_norm": 0.0834837481379509, + "learning_rate": 0.000131311556546543, + "loss": 0.0396, + "mean_token_accuracy": 0.9873842394351959, + "num_tokens": 6494517.0, + "step": 1150 + }, + { + "epoch": 2.832830416415208, + "grad_norm": 0.13212957978248596, + "learning_rate": 0.0001255175374004563, + "loss": 0.0288, + "mean_token_accuracy": 0.9902939075231552, + "num_tokens": 6627652.0, + "step": 1175 + }, + { + "epoch": 2.8931804465902236, + "grad_norm": 0.06576525419950485, + "learning_rate": 0.0001197608195366377, + "loss": 0.0382, + "mean_token_accuracy": 0.9877642679214478, + "num_tokens": 6777206.0, + "step": 1200 + }, + { + "epoch": 2.9535304767652386, + "grad_norm": 0.05414114519953728, + "learning_rate": 0.00011405017384392655, + "loss": 0.0273, + "mean_token_accuracy": 0.9905436062812805, + "num_tokens": 6909991.0, + "step": 1225 + }, + { + "epoch": 3.0, + "eval_loss": 0.043681155890226364, + "eval_mean_token_accuracy": 0.9871059862343041, + "eval_num_tokens": 7012029.0, + "eval_runtime": 66.1941, + "eval_samples_per_second": 5.575, + "eval_steps_per_second": 2.795, + "step": 1245 + }, + { + "epoch": 3.012070006035003, + "grad_norm": 0.051925178617239, + "learning_rate": 0.00010839430101597464, + "loss": 0.0378, + "mean_token_accuracy": 0.9880035630206472, + "num_tokens": 7048246.0, + "step": 1250 + }, + { + "epoch": 3.0724200362100182, + "grad_norm": 0.049059975892305374, + "learning_rate": 0.00010280181829493925, + "loss": 0.025, + "mean_token_accuracy": 0.9913980680704116, + "num_tokens": 7195485.0, + "step": 1275 + }, + { + "epoch": 3.1327700663850333, + "grad_norm": 0.10053006559610367, + "learning_rate": 9.728124634232282e-05, + "loss": 0.0267, + "mean_token_accuracy": 0.9907277238368988, + "num_tokens": 7332046.0, + "step": 1300 + }, + { + "epoch": 3.1931200965600484, + "grad_norm": 0.05228966102004051, + "learning_rate": 9.184099625696183e-05, + "loss": 0.0243, + "mean_token_accuracy": 0.9914121353626251, + "num_tokens": 7480272.0, + "step": 1325 + }, + { + "epoch": 3.2534701267350634, + "grad_norm": 0.07380355894565582, + "learning_rate": 8.648935675994459e-05, + "loss": 0.0278, + "mean_token_accuracy": 0.9903601062297821, + "num_tokens": 7617159.0, + "step": 1350 + }, + { + "epoch": 3.3138201569100785, + "grad_norm": 0.049229696393013, + "learning_rate": 8.123448156598283e-05, + "loss": 0.0233, + "mean_token_accuracy": 0.9916152226924896, + "num_tokens": 7765702.0, + "step": 1375 + }, + { + "epoch": 3.3741701870850935, + "grad_norm": 0.1148751899600029, + "learning_rate": 7.608437696047756e-05, + "loss": 0.028, + "mean_token_accuracy": 0.9899373424053192, + "num_tokens": 7900194.0, + "step": 1400 + }, + { + "epoch": 3.4345202172601086, + "grad_norm": 0.07034936547279358, + "learning_rate": 7.104688960120769e-05, + "loss": 0.0243, + "mean_token_accuracy": 0.9914365601539612, + "num_tokens": 8046353.0, + "step": 1425 + }, + { + "epoch": 3.4948702474351236, + "grad_norm": 0.07448563724756241, + "learning_rate": 6.612969456322507e-05, + "loss": 0.0261, + "mean_token_accuracy": 0.9907774782180786, + "num_tokens": 8181174.0, + "step": 1450 + }, + { + "epoch": 3.5552202776101387, + "grad_norm": 0.04661368206143379, + "learning_rate": 6.134028364517273e-05, + "loss": 0.0225, + "mean_token_accuracy": 0.9918044358491898, + "num_tokens": 8327696.0, + "step": 1475 + }, + { + "epoch": 3.6155703077851538, + "grad_norm": 0.08300278335809708, + "learning_rate": 5.6685953954840553e-05, + "loss": 0.0295, + "mean_token_accuracy": 0.9901704251766205, + "num_tokens": 8463109.0, + "step": 1500 + }, + { + "epoch": 3.675920337960169, + "grad_norm": 0.0406249575316906, + "learning_rate": 5.2173796791351116e-05, + "loss": 0.0231, + "mean_token_accuracy": 0.9916487127542496, + "num_tokens": 8609741.0, + "step": 1525 + }, + { + "epoch": 3.736270368135184, + "grad_norm": 0.11077206581830978, + "learning_rate": 4.781068684091327e-05, + "loss": 0.025, + "mean_token_accuracy": 0.9910765969753266, + "num_tokens": 8746681.0, + "step": 1550 + }, + { + "epoch": 3.796620398310199, + "grad_norm": 0.05278284102678299, + "learning_rate": 4.360327170260604e-05, + "loss": 0.0223, + "mean_token_accuracy": 0.9916610676050186, + "num_tokens": 8893017.0, + "step": 1575 + }, + { + "epoch": 3.856970428485214, + "grad_norm": 0.08629264682531357, + "learning_rate": 3.955796176015015e-05, + "loss": 0.0243, + "mean_token_accuracy": 0.9913058090209961, + "num_tokens": 9026413.0, + "step": 1600 + }, + { + "epoch": 3.9173204586602295, + "grad_norm": 0.05037279054522514, + "learning_rate": 3.5680920415099366e-05, + "loss": 0.0222, + "mean_token_accuracy": 0.9920467078685761, + "num_tokens": 9172881.0, + "step": 1625 + }, + { + "epoch": 3.9776704888352445, + "grad_norm": 0.0597236230969429, + "learning_rate": 3.197805469633152e-05, + "loss": 0.0228, + "mean_token_accuracy": 0.9919342297315598, + "num_tokens": 9303196.0, + "step": 1650 + }, + { + "epoch": 4.0, + "eval_loss": 0.042119670659303665, + "eval_mean_token_accuracy": 0.9881797494115057, + "eval_num_tokens": 9349372.0, + "eval_runtime": 66.2898, + "eval_samples_per_second": 5.566, + "eval_steps_per_second": 2.791, + "step": 1660 + } + ], + "logging_steps": 25, + "max_steps": 2075, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.261978111939707e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1660/training_args.bin b/checkpoint-1660/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..184dacc2ae7ef0096c0f2c3ef91b16c840ae960c --- /dev/null +++ b/checkpoint-1660/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4370f125a1b8cc4780ee7319c46b5e76e386e5bd5061ee6e638e993ff73b5e6f +size 6033 diff --git a/checkpoint-2075/README.md b/checkpoint-2075/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0d31128190920e45b61115944d16e773c2ec94c3 --- /dev/null +++ b/checkpoint-2075/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoint-2075/adapter_config.json b/checkpoint-2075/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9a94c10785ed353013e980625c777fea6b3cae9 --- /dev/null +++ b/checkpoint-2075/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-2075/adapter_model.safetensors b/checkpoint-2075/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..edbdb3357f63c72105a7eacba6bc0606639890fc --- /dev/null +++ b/checkpoint-2075/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:234ca98ac0c07569d0c6482aba6c90a7a279051b76dadd40207eabce4792af00 +size 335604696 diff --git a/checkpoint-2075/chat_template.jinja b/checkpoint-2075/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-2075/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-2075/optimizer.pt b/checkpoint-2075/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..041311ef74e348b67462352e2097c12f668d5bcb --- /dev/null +++ b/checkpoint-2075/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de0a724643700d6db1c3a817f7f2c9602ca6d66879e4e775b3bad1d1fff97f7 +size 671365003 diff --git a/checkpoint-2075/rng_state.pth b/checkpoint-2075/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..173d453625cfc1ee9b279603ce4bf5f59ff6d584 --- /dev/null +++ b/checkpoint-2075/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d810af01c33b10a845278d70e8a76e55b4cea7ac522cc6f9e139d6100adbcfe +size 14645 diff --git a/checkpoint-2075/scheduler.pt b/checkpoint-2075/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f66aaa7e5467e82eb75f9de2c842429b61ad8b49 --- /dev/null +++ b/checkpoint-2075/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9800b41968ee3ebe37d1b494ee7ae782a6ac9a2a1c60981b79b83606f190f395 +size 1465 diff --git a/checkpoint-2075/special_tokens_map.json b/checkpoint-2075/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-2075/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-2075/tokenizer.json b/checkpoint-2075/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..9645e8546890ad31273c3a00dd4cc1bd4f7aafbb --- /dev/null +++ b/checkpoint-2075/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7389e002692fc9e3a3d9f9884a9e9d5657e2a4406bf9759a3703abe8a3485402 +size 17210085 diff --git a/checkpoint-2075/tokenizer_config.json b/checkpoint-2075/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..563042ba0748ab38bd6000afa193a5a9634d31cc --- /dev/null +++ b/checkpoint-2075/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-2075/trainer_state.json b/checkpoint-2075/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b57fe76bdc5b30910dabaa99ef81343ac57fe716 --- /dev/null +++ b/checkpoint-2075/trainer_state.json @@ -0,0 +1,831 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 2075, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.060350030175015085, + "grad_norm": 0.5709517598152161, + "learning_rate": 0.00011428571428571427, + "loss": 1.8697, + "mean_token_accuracy": 0.6171649008989334, + "num_tokens": 158430.0, + "step": 25 + }, + { + "epoch": 0.12070006035003017, + "grad_norm": 0.4126085937023163, + "learning_rate": 0.0002333333333333333, + "loss": 0.8694, + "mean_token_accuracy": 0.7941986697912217, + "num_tokens": 285311.0, + "step": 50 + }, + { + "epoch": 0.18105009052504525, + "grad_norm": 0.3864216208457947, + "learning_rate": 0.00029997787517981614, + "loss": 0.6122, + "mean_token_accuracy": 0.8420314061641693, + "num_tokens": 442499.0, + "step": 75 + }, + { + "epoch": 0.24140012070006034, + "grad_norm": 0.43023112416267395, + "learning_rate": 0.0002997630832860032, + "loss": 0.5051, + "mean_token_accuracy": 0.8632901525497436, + "num_tokens": 567883.0, + "step": 100 + }, + { + "epoch": 0.30175015087507545, + "grad_norm": 0.32171109318733215, + "learning_rate": 0.0002993201135681549, + "loss": 0.3632, + "mean_token_accuracy": 0.8987007009983062, + "num_tokens": 723609.0, + "step": 125 + }, + { + "epoch": 0.3621001810500905, + "grad_norm": 0.398703008890152, + "learning_rate": 0.0002986496409313553, + "loss": 0.3154, + "mean_token_accuracy": 0.911303380727768, + "num_tokens": 850241.0, + "step": 150 + }, + { + "epoch": 0.4224502112251056, + "grad_norm": 0.2818653881549835, + "learning_rate": 0.0002977526869022985, + "loss": 0.2394, + "mean_token_accuracy": 0.9328221660852433, + "num_tokens": 1004280.0, + "step": 175 + }, + { + "epoch": 0.4828002414001207, + "grad_norm": 0.39628875255584717, + "learning_rate": 0.0002966306180728982, + "loss": 0.2069, + "mean_token_accuracy": 0.9405643939971924, + "num_tokens": 1131356.0, + "step": 200 + }, + { + "epoch": 0.5431502715751357, + "grad_norm": 0.2750284671783447, + "learning_rate": 0.0002952851440181598, + "loss": 0.1639, + "mean_token_accuracy": 0.9533388954401016, + "num_tokens": 1286496.0, + "step": 225 + }, + { + "epoch": 0.6035003017501509, + "grad_norm": 0.4104975759983063, + "learning_rate": 0.0002937183146914856, + "loss": 0.1533, + "mean_token_accuracy": 0.9560526990890503, + "num_tokens": 1413124.0, + "step": 250 + }, + { + "epoch": 0.663850331925166, + "grad_norm": 0.2589656710624695, + "learning_rate": 0.000291932517301382, + "loss": 0.1192, + "mean_token_accuracy": 0.9663791972398758, + "num_tokens": 1568083.0, + "step": 275 + }, + { + "epoch": 0.724200362100181, + "grad_norm": 0.42289865016937256, + "learning_rate": 0.00028993047267432864, + "loss": 0.119, + "mean_token_accuracy": 0.9662033247947693, + "num_tokens": 1693195.0, + "step": 300 + }, + { + "epoch": 0.7845503922751962, + "grad_norm": 0.3269366919994354, + "learning_rate": 0.0002877152311093483, + "loss": 0.1098, + "mean_token_accuracy": 0.9693831586837769, + "num_tokens": 1847137.0, + "step": 325 + }, + { + "epoch": 0.8449004224502112, + "grad_norm": 0.29110169410705566, + "learning_rate": 0.00028529016773059656, + "loss": 0.097, + "mean_token_accuracy": 0.9721069341897964, + "num_tokens": 1974433.0, + "step": 350 + }, + { + "epoch": 0.9052504526252263, + "grad_norm": 0.2041333168745041, + "learning_rate": 0.00028265897734504976, + "loss": 0.1078, + "mean_token_accuracy": 0.9707016760110855, + "num_tokens": 2130536.0, + "step": 375 + }, + { + "epoch": 0.9656004828002414, + "grad_norm": 0.27978503704071045, + "learning_rate": 0.0002798256688131267, + "loss": 0.0788, + "mean_token_accuracy": 0.9774614202976227, + "num_tokens": 2257778.0, + "step": 400 + }, + { + "epoch": 1.0, + "eval_loss": 0.08150900155305862, + "eval_mean_token_accuracy": 0.977405073836043, + "eval_num_tokens": 2337343.0, + "eval_runtime": 66.2124, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 415 + }, + { + "epoch": 1.024140012070006, + "grad_norm": 0.2321971356868744, + "learning_rate": 0.0002767945589408217, + "loss": 0.1016, + "mean_token_accuracy": 0.9726764823972565, + "num_tokens": 2406156.0, + "step": 425 + }, + { + "epoch": 1.0844900422450212, + "grad_norm": 0.15238304436206818, + "learning_rate": 0.0002735702659026533, + "loss": 0.0566, + "mean_token_accuracy": 0.9829730612039566, + "num_tokens": 2547712.0, + "step": 450 + }, + { + "epoch": 1.1448400724200363, + "grad_norm": 0.24308571219444275, + "learning_rate": 0.0002701577022054515, + "loss": 0.0787, + "mean_token_accuracy": 0.9782290506362915, + "num_tokens": 2689644.0, + "step": 475 + }, + { + "epoch": 1.2051901025950513, + "grad_norm": 0.10345127433538437, + "learning_rate": 0.0002665620672037014, + "loss": 0.0559, + "mean_token_accuracy": 0.9833064198493957, + "num_tokens": 2830847.0, + "step": 500 + }, + { + "epoch": 1.2655401327700664, + "grad_norm": 0.12139427661895752, + "learning_rate": 0.0002627888391778493, + "loss": 0.0734, + "mean_token_accuracy": 0.9794088786840439, + "num_tokens": 2970752.0, + "step": 525 + }, + { + "epoch": 1.3258901629450814, + "grad_norm": 0.19484972953796387, + "learning_rate": 0.0002588437669876384, + "loss": 0.05, + "mean_token_accuracy": 0.9848471087217331, + "num_tokens": 3110651.0, + "step": 550 + }, + { + "epoch": 1.3862401931200965, + "grad_norm": 0.15929530560970306, + "learning_rate": 0.00025473286131319283, + "loss": 0.0647, + "mean_token_accuracy": 0.9809465759992599, + "num_tokens": 3250565.0, + "step": 575 + }, + { + "epoch": 1.4465902232951118, + "grad_norm": 0.1795138716697693, + "learning_rate": 0.0002504623854971937, + "loss": 0.051, + "mean_token_accuracy": 0.9846814131736755, + "num_tokens": 3391351.0, + "step": 600 + }, + { + "epoch": 1.5069402534701268, + "grad_norm": 0.14619964361190796, + "learning_rate": 0.00024603884600210097, + "loss": 0.0628, + "mean_token_accuracy": 0.9824352604150772, + "num_tokens": 3530305.0, + "step": 625 + }, + { + "epoch": 1.567290283645142, + "grad_norm": 0.11213799566030502, + "learning_rate": 0.00024146898249695974, + "loss": 0.0478, + "mean_token_accuracy": 0.9856422942876816, + "num_tokens": 3672598.0, + "step": 650 + }, + { + "epoch": 1.627640313820157, + "grad_norm": 0.126531720161438, + "learning_rate": 0.00023675975758889506, + "loss": 0.059, + "mean_token_accuracy": 0.9828101003170013, + "num_tokens": 3814546.0, + "step": 675 + }, + { + "epoch": 1.687990343995172, + "grad_norm": 0.13861851394176483, + "learning_rate": 0.00023191834621493968, + "loss": 0.0453, + "mean_token_accuracy": 0.9860042548179626, + "num_tokens": 3955590.0, + "step": 700 + }, + { + "epoch": 1.748340374170187, + "grad_norm": 0.057172827422618866, + "learning_rate": 0.00022695212471035816, + "loss": 0.053, + "mean_token_accuracy": 0.984272707104683, + "num_tokens": 4096678.0, + "step": 725 + }, + { + "epoch": 1.8086904043452021, + "grad_norm": 0.13785910606384277, + "learning_rate": 0.0002218686595701219, + "loss": 0.0438, + "mean_token_accuracy": 0.9864085137844085, + "num_tokens": 4238701.0, + "step": 750 + }, + { + "epoch": 1.8690404345202172, + "grad_norm": 0.09842238575220108, + "learning_rate": 0.0002166756959206587, + "loss": 0.0538, + "mean_token_accuracy": 0.9840266466140747, + "num_tokens": 4378606.0, + "step": 775 + }, + { + "epoch": 1.9293904646952322, + "grad_norm": 0.09276362508535385, + "learning_rate": 0.00021138114571944054, + "loss": 0.0415, + "mean_token_accuracy": 0.9867525893449783, + "num_tokens": 4521351.0, + "step": 800 + }, + { + "epoch": 1.9897404948702473, + "grad_norm": 0.07594375312328339, + "learning_rate": 0.000205993075700389, + "loss": 0.0452, + "mean_token_accuracy": 0.9859298402071, + "num_tokens": 4655013.0, + "step": 825 + }, + { + "epoch": 2.0, + "eval_loss": 0.05184464156627655, + "eval_mean_token_accuracy": 0.9849317776190268, + "eval_num_tokens": 4674686.0, + "eval_runtime": 66.2113, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 830 + }, + { + "epoch": 2.048280024140012, + "grad_norm": 0.06794245541095734, + "learning_rate": 0.00020051969508346498, + "loss": 0.039, + "mean_token_accuracy": 0.9873216932581872, + "num_tokens": 4801249.0, + "step": 850 + }, + { + "epoch": 2.1086300543150274, + "grad_norm": 0.06261667609214783, + "learning_rate": 0.00019496934306716706, + "loss": 0.0306, + "mean_token_accuracy": 0.9899670380353928, + "num_tokens": 4934594.0, + "step": 875 + }, + { + "epoch": 2.1689800844900424, + "grad_norm": 0.12159217149019241, + "learning_rate": 0.00018935047612299625, + "loss": 0.0441, + "mean_token_accuracy": 0.9860250049829483, + "num_tokens": 5086726.0, + "step": 900 + }, + { + "epoch": 2.2293301146650575, + "grad_norm": 0.10762438178062439, + "learning_rate": 0.00018367165511124414, + "loss": 0.0318, + "mean_token_accuracy": 0.9893095499277115, + "num_tokens": 5218004.0, + "step": 925 + }, + { + "epoch": 2.2896801448400725, + "grad_norm": 0.08475406467914581, + "learning_rate": 0.00017794153223773558, + "loss": 0.0431, + "mean_token_accuracy": 0.9860376048088074, + "num_tokens": 5368044.0, + "step": 950 + }, + { + "epoch": 2.3500301750150876, + "grad_norm": 0.07939684391021729, + "learning_rate": 0.00017216883787139772, + "loss": 0.0301, + "mean_token_accuracy": 0.9899585354328155, + "num_tokens": 5500546.0, + "step": 975 + }, + { + "epoch": 2.4103802051901027, + "grad_norm": 0.07427559047937393, + "learning_rate": 0.00016636236724274, + "loss": 0.0422, + "mean_token_accuracy": 0.9869222700595855, + "num_tokens": 5651095.0, + "step": 1000 + }, + { + "epoch": 2.4707302353651177, + "grad_norm": 0.08105447888374329, + "learning_rate": 0.00016053096704351255, + "loss": 0.0275, + "mean_token_accuracy": 0.9905411404371262, + "num_tokens": 5784344.0, + "step": 1025 + }, + { + "epoch": 2.5310802655401328, + "grad_norm": 0.05421376973390579, + "learning_rate": 0.00015468352194795791, + "loss": 0.0431, + "mean_token_accuracy": 0.986332500576973, + "num_tokens": 5935006.0, + "step": 1050 + }, + { + "epoch": 2.591430295715148, + "grad_norm": 0.07994894683361053, + "learning_rate": 0.00014882894107619277, + "loss": 0.0281, + "mean_token_accuracy": 0.9903588914871215, + "num_tokens": 6066956.0, + "step": 1075 + }, + { + "epoch": 2.651780325890163, + "grad_norm": 0.06251101195812225, + "learning_rate": 0.00014297614442034518, + "loss": 0.0377, + "mean_token_accuracy": 0.9876210725307465, + "num_tokens": 6213232.0, + "step": 1100 + }, + { + "epoch": 2.712130356065178, + "grad_norm": 0.040336403995752335, + "learning_rate": 0.000137134049254126, + "loss": 0.0284, + "mean_token_accuracy": 0.9903542894124985, + "num_tokens": 6343819.0, + "step": 1125 + }, + { + "epoch": 2.772480386240193, + "grad_norm": 0.0834837481379509, + "learning_rate": 0.000131311556546543, + "loss": 0.0396, + "mean_token_accuracy": 0.9873842394351959, + "num_tokens": 6494517.0, + "step": 1150 + }, + { + "epoch": 2.832830416415208, + "grad_norm": 0.13212957978248596, + "learning_rate": 0.0001255175374004563, + "loss": 0.0288, + "mean_token_accuracy": 0.9902939075231552, + "num_tokens": 6627652.0, + "step": 1175 + }, + { + "epoch": 2.8931804465902236, + "grad_norm": 0.06576525419950485, + "learning_rate": 0.0001197608195366377, + "loss": 0.0382, + "mean_token_accuracy": 0.9877642679214478, + "num_tokens": 6777206.0, + "step": 1200 + }, + { + "epoch": 2.9535304767652386, + "grad_norm": 0.05414114519953728, + "learning_rate": 0.00011405017384392655, + "loss": 0.0273, + "mean_token_accuracy": 0.9905436062812805, + "num_tokens": 6909991.0, + "step": 1225 + }, + { + "epoch": 3.0, + "eval_loss": 0.043681155890226364, + "eval_mean_token_accuracy": 0.9871059862343041, + "eval_num_tokens": 7012029.0, + "eval_runtime": 66.1941, + "eval_samples_per_second": 5.575, + "eval_steps_per_second": 2.795, + "step": 1245 + }, + { + "epoch": 3.012070006035003, + "grad_norm": 0.051925178617239, + "learning_rate": 0.00010839430101597464, + "loss": 0.0378, + "mean_token_accuracy": 0.9880035630206472, + "num_tokens": 7048246.0, + "step": 1250 + }, + { + "epoch": 3.0724200362100182, + "grad_norm": 0.049059975892305374, + "learning_rate": 0.00010280181829493925, + "loss": 0.025, + "mean_token_accuracy": 0.9913980680704116, + "num_tokens": 7195485.0, + "step": 1275 + }, + { + "epoch": 3.1327700663850333, + "grad_norm": 0.10053006559610367, + "learning_rate": 9.728124634232282e-05, + "loss": 0.0267, + "mean_token_accuracy": 0.9907277238368988, + "num_tokens": 7332046.0, + "step": 1300 + }, + { + "epoch": 3.1931200965600484, + "grad_norm": 0.05228966102004051, + "learning_rate": 9.184099625696183e-05, + "loss": 0.0243, + "mean_token_accuracy": 0.9914121353626251, + "num_tokens": 7480272.0, + "step": 1325 + }, + { + "epoch": 3.2534701267350634, + "grad_norm": 0.07380355894565582, + "learning_rate": 8.648935675994459e-05, + "loss": 0.0278, + "mean_token_accuracy": 0.9903601062297821, + "num_tokens": 7617159.0, + "step": 1350 + }, + { + "epoch": 3.3138201569100785, + "grad_norm": 0.049229696393013, + "learning_rate": 8.123448156598283e-05, + "loss": 0.0233, + "mean_token_accuracy": 0.9916152226924896, + "num_tokens": 7765702.0, + "step": 1375 + }, + { + "epoch": 3.3741701870850935, + "grad_norm": 0.1148751899600029, + "learning_rate": 7.608437696047756e-05, + "loss": 0.028, + "mean_token_accuracy": 0.9899373424053192, + "num_tokens": 7900194.0, + "step": 1400 + }, + { + "epoch": 3.4345202172601086, + "grad_norm": 0.07034936547279358, + "learning_rate": 7.104688960120769e-05, + "loss": 0.0243, + "mean_token_accuracy": 0.9914365601539612, + "num_tokens": 8046353.0, + "step": 1425 + }, + { + "epoch": 3.4948702474351236, + "grad_norm": 0.07448563724756241, + "learning_rate": 6.612969456322507e-05, + "loss": 0.0261, + "mean_token_accuracy": 0.9907774782180786, + "num_tokens": 8181174.0, + "step": 1450 + }, + { + "epoch": 3.5552202776101387, + "grad_norm": 0.04661368206143379, + "learning_rate": 6.134028364517273e-05, + "loss": 0.0225, + "mean_token_accuracy": 0.9918044358491898, + "num_tokens": 8327696.0, + "step": 1475 + }, + { + "epoch": 3.6155703077851538, + "grad_norm": 0.08300278335809708, + "learning_rate": 5.6685953954840553e-05, + "loss": 0.0295, + "mean_token_accuracy": 0.9901704251766205, + "num_tokens": 8463109.0, + "step": 1500 + }, + { + "epoch": 3.675920337960169, + "grad_norm": 0.0406249575316906, + "learning_rate": 5.2173796791351116e-05, + "loss": 0.0231, + "mean_token_accuracy": 0.9916487127542496, + "num_tokens": 8609741.0, + "step": 1525 + }, + { + "epoch": 3.736270368135184, + "grad_norm": 0.11077206581830978, + "learning_rate": 4.781068684091327e-05, + "loss": 0.025, + "mean_token_accuracy": 0.9910765969753266, + "num_tokens": 8746681.0, + "step": 1550 + }, + { + "epoch": 3.796620398310199, + "grad_norm": 0.05278284102678299, + "learning_rate": 4.360327170260604e-05, + "loss": 0.0223, + "mean_token_accuracy": 0.9916610676050186, + "num_tokens": 8893017.0, + "step": 1575 + }, + { + "epoch": 3.856970428485214, + "grad_norm": 0.08629264682531357, + "learning_rate": 3.955796176015015e-05, + "loss": 0.0243, + "mean_token_accuracy": 0.9913058090209961, + "num_tokens": 9026413.0, + "step": 1600 + }, + { + "epoch": 3.9173204586602295, + "grad_norm": 0.05037279054522514, + "learning_rate": 3.5680920415099366e-05, + "loss": 0.0222, + "mean_token_accuracy": 0.9920467078685761, + "num_tokens": 9172881.0, + "step": 1625 + }, + { + "epoch": 3.9776704888352445, + "grad_norm": 0.0597236230969429, + "learning_rate": 3.197805469633152e-05, + "loss": 0.0228, + "mean_token_accuracy": 0.9919342297315598, + "num_tokens": 9303196.0, + "step": 1650 + }, + { + "epoch": 4.0, + "eval_loss": 0.042119670659303665, + "eval_mean_token_accuracy": 0.9881797494115057, + "eval_num_tokens": 9349372.0, + "eval_runtime": 66.2898, + "eval_samples_per_second": 5.566, + "eval_steps_per_second": 2.791, + "step": 1660 + }, + { + "epoch": 4.036210018105009, + "grad_norm": 0.05765984207391739, + "learning_rate": 2.8455006260147228e-05, + "loss": 0.0238, + "mean_token_accuracy": 0.9917030002652985, + "num_tokens": 9449530.0, + "step": 1675 + }, + { + "epoch": 4.096560048280024, + "grad_norm": 0.03671165555715561, + "learning_rate": 2.5117142794687618e-05, + "loss": 0.0192, + "mean_token_accuracy": 0.9927842736244201, + "num_tokens": 9587797.0, + "step": 1700 + }, + { + "epoch": 4.15691007845504, + "grad_norm": 0.043139275163412094, + "learning_rate": 2.1969549841768168e-05, + "loss": 0.0204, + "mean_token_accuracy": 0.992429872751236, + "num_tokens": 9731352.0, + "step": 1725 + }, + { + "epoch": 4.217260108630055, + "grad_norm": 0.03763241320848465, + "learning_rate": 1.901702304858842e-05, + "loss": 0.019, + "mean_token_accuracy": 0.9929107385873794, + "num_tokens": 9867983.0, + "step": 1750 + }, + { + "epoch": 4.27761013880507, + "grad_norm": 0.041600313037633896, + "learning_rate": 1.6264060861122442e-05, + "loss": 0.0204, + "mean_token_accuracy": 0.9922807848453522, + "num_tokens": 10012504.0, + "step": 1775 + }, + { + "epoch": 4.337960168980085, + "grad_norm": 0.04624481871724129, + "learning_rate": 1.3714857670322927e-05, + "loss": 0.0179, + "mean_token_accuracy": 0.9930372297763824, + "num_tokens": 10148132.0, + "step": 1800 + }, + { + "epoch": 4.3983101991551, + "grad_norm": 0.060075584799051285, + "learning_rate": 1.1373297421581129e-05, + "loss": 0.0204, + "mean_token_accuracy": 0.9924710804224014, + "num_tokens": 10293830.0, + "step": 1825 + }, + { + "epoch": 4.458660229330115, + "grad_norm": 0.041872959583997726, + "learning_rate": 9.242947697178927e-06, + "loss": 0.019, + "mean_token_accuracy": 0.9929444575309754, + "num_tokens": 10430586.0, + "step": 1850 + }, + { + "epoch": 4.51901025950513, + "grad_norm": 0.04955296963453293, + "learning_rate": 7.3270542807491675e-06, + "loss": 0.0209, + "mean_token_accuracy": 0.9922113794088364, + "num_tokens": 10576940.0, + "step": 1875 + }, + { + "epoch": 4.579360289680145, + "grad_norm": 0.058738283812999725, + "learning_rate": 5.628536212026197e-06, + "loss": 0.0185, + "mean_token_accuracy": 0.9930185693502426, + "num_tokens": 10713805.0, + "step": 1900 + }, + { + "epoch": 4.63971031985516, + "grad_norm": 0.04668419435620308, + "learning_rate": 4.149981339420344e-06, + "loss": 0.0214, + "mean_token_accuracy": 0.9923474651575088, + "num_tokens": 10859042.0, + "step": 1925 + }, + { + "epoch": 4.700060350030175, + "grad_norm": 0.04878915846347809, + "learning_rate": 2.8936423771929897e-06, + "loss": 0.0187, + "mean_token_accuracy": 0.9928801411390304, + "num_tokens": 10994902.0, + "step": 1950 + }, + { + "epoch": 4.76041038020519, + "grad_norm": 0.05259896069765091, + "learning_rate": 1.8614334732393544e-06, + "loss": 0.0214, + "mean_token_accuracy": 0.9922178369760514, + "num_tokens": 11140272.0, + "step": 1975 + }, + { + "epoch": 4.820760410380205, + "grad_norm": 0.05369729548692703, + "learning_rate": 1.0549272927081964e-06, + "loss": 0.0181, + "mean_token_accuracy": 0.9932708668708802, + "num_tokens": 11277983.0, + "step": 2000 + }, + { + "epoch": 4.88111044055522, + "grad_norm": 0.06313612312078476, + "learning_rate": 4.753526219018755e-07, + "loss": 0.0214, + "mean_token_accuracy": 0.9922978645563125, + "num_tokens": 11423268.0, + "step": 2025 + }, + { + "epoch": 4.941460470730235, + "grad_norm": 0.07955753803253174, + "learning_rate": 1.235924961075496e-07, + "loss": 0.0183, + "mean_token_accuracy": 0.9928239184617996, + "num_tokens": 11560873.0, + "step": 2050 + }, + { + "epoch": 5.0, + "grad_norm": 0.13425925374031067, + "learning_rate": 1.8285421163888313e-10, + "loss": 0.0201, + "mean_token_accuracy": 0.9927458523475018, + "num_tokens": 11686715.0, + "step": 2075 + }, + { + "epoch": 5.0, + "eval_loss": 0.043636951595544815, + "eval_mean_token_accuracy": 0.9883439669737945, + "eval_num_tokens": 11686715.0, + "eval_runtime": 66.1362, + "eval_samples_per_second": 5.579, + "eval_steps_per_second": 2.797, + "step": 2075 + } + ], + "logging_steps": 25, + "max_steps": 2075, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 5.327425285745541e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2075/training_args.bin b/checkpoint-2075/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..184dacc2ae7ef0096c0f2c3ef91b16c840ae960c --- /dev/null +++ b/checkpoint-2075/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4370f125a1b8cc4780ee7319c46b5e76e386e5bd5061ee6e638e993ff73b5e6f +size 6033 diff --git a/checkpoint-415/README.md b/checkpoint-415/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0d31128190920e45b61115944d16e773c2ec94c3 --- /dev/null +++ b/checkpoint-415/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoint-415/adapter_config.json b/checkpoint-415/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9a94c10785ed353013e980625c777fea6b3cae9 --- /dev/null +++ b/checkpoint-415/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-415/adapter_model.safetensors b/checkpoint-415/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23921a77a9ac29d5263efdda49347928c3025c55 --- /dev/null +++ b/checkpoint-415/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50c38ea25c275abf8ff7fda9d3486c6d040d78643c258c735781283d4ff7f82c +size 335604696 diff --git a/checkpoint-415/chat_template.jinja b/checkpoint-415/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-415/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-415/optimizer.pt b/checkpoint-415/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4113960902b85fca03a75086ed4a11a3885da3b8 --- /dev/null +++ b/checkpoint-415/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3034ef2b8a6f4df19ae402a1df47b3cd2b62d2b3139d41a6b7ad45f58c9f9f +size 671365003 diff --git a/checkpoint-415/rng_state.pth b/checkpoint-415/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e97f69cae69ecb63a7f6cd707efc91a3fdcb41f9 --- /dev/null +++ b/checkpoint-415/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5270b12893f9a5c8f1003738ae72c986eea1a51a382b44ea622d1b0f05bb84 +size 14645 diff --git a/checkpoint-415/scheduler.pt b/checkpoint-415/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3244a645db6c240be6818a5a0149ceab8d6cf6b8 --- /dev/null +++ b/checkpoint-415/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311f04ff148a0643da9dd6ab46cae0944077a3236a48d5f9c8c146cef8cdb57f +size 1465 diff --git a/checkpoint-415/special_tokens_map.json b/checkpoint-415/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-415/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-415/tokenizer.json b/checkpoint-415/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..9645e8546890ad31273c3a00dd4cc1bd4f7aafbb --- /dev/null +++ b/checkpoint-415/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7389e002692fc9e3a3d9f9884a9e9d5657e2a4406bf9759a3703abe8a3485402 +size 17210085 diff --git a/checkpoint-415/tokenizer_config.json b/checkpoint-415/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..563042ba0748ab38bd6000afa193a5a9634d31cc --- /dev/null +++ b/checkpoint-415/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-415/trainer_state.json b/checkpoint-415/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e1de0a215bc76b73d63608762e3ee301742f0770 --- /dev/null +++ b/checkpoint-415/trainer_state.json @@ -0,0 +1,188 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 415, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.060350030175015085, + "grad_norm": 0.5709517598152161, + "learning_rate": 0.00011428571428571427, + "loss": 1.8697, + "mean_token_accuracy": 0.6171649008989334, + "num_tokens": 158430.0, + "step": 25 + }, + { + "epoch": 0.12070006035003017, + "grad_norm": 0.4126085937023163, + "learning_rate": 0.0002333333333333333, + "loss": 0.8694, + "mean_token_accuracy": 0.7941986697912217, + "num_tokens": 285311.0, + "step": 50 + }, + { + "epoch": 0.18105009052504525, + "grad_norm": 0.3864216208457947, + "learning_rate": 0.00029997787517981614, + "loss": 0.6122, + "mean_token_accuracy": 0.8420314061641693, + "num_tokens": 442499.0, + "step": 75 + }, + { + "epoch": 0.24140012070006034, + "grad_norm": 0.43023112416267395, + "learning_rate": 0.0002997630832860032, + "loss": 0.5051, + "mean_token_accuracy": 0.8632901525497436, + "num_tokens": 567883.0, + "step": 100 + }, + { + "epoch": 0.30175015087507545, + "grad_norm": 0.32171109318733215, + "learning_rate": 0.0002993201135681549, + "loss": 0.3632, + "mean_token_accuracy": 0.8987007009983062, + "num_tokens": 723609.0, + "step": 125 + }, + { + "epoch": 0.3621001810500905, + "grad_norm": 0.398703008890152, + "learning_rate": 0.0002986496409313553, + "loss": 0.3154, + "mean_token_accuracy": 0.911303380727768, + "num_tokens": 850241.0, + "step": 150 + }, + { + "epoch": 0.4224502112251056, + "grad_norm": 0.2818653881549835, + "learning_rate": 0.0002977526869022985, + "loss": 0.2394, + "mean_token_accuracy": 0.9328221660852433, + "num_tokens": 1004280.0, + "step": 175 + }, + { + "epoch": 0.4828002414001207, + "grad_norm": 0.39628875255584717, + "learning_rate": 0.0002966306180728982, + "loss": 0.2069, + "mean_token_accuracy": 0.9405643939971924, + "num_tokens": 1131356.0, + "step": 200 + }, + { + "epoch": 0.5431502715751357, + "grad_norm": 0.2750284671783447, + "learning_rate": 0.0002952851440181598, + "loss": 0.1639, + "mean_token_accuracy": 0.9533388954401016, + "num_tokens": 1286496.0, + "step": 225 + }, + { + "epoch": 0.6035003017501509, + "grad_norm": 0.4104975759983063, + "learning_rate": 0.0002937183146914856, + "loss": 0.1533, + "mean_token_accuracy": 0.9560526990890503, + "num_tokens": 1413124.0, + "step": 250 + }, + { + "epoch": 0.663850331925166, + "grad_norm": 0.2589656710624695, + "learning_rate": 0.000291932517301382, + "loss": 0.1192, + "mean_token_accuracy": 0.9663791972398758, + "num_tokens": 1568083.0, + "step": 275 + }, + { + "epoch": 0.724200362100181, + "grad_norm": 0.42289865016937256, + "learning_rate": 0.00028993047267432864, + "loss": 0.119, + "mean_token_accuracy": 0.9662033247947693, + "num_tokens": 1693195.0, + "step": 300 + }, + { + "epoch": 0.7845503922751962, + "grad_norm": 0.3269366919994354, + "learning_rate": 0.0002877152311093483, + "loss": 0.1098, + "mean_token_accuracy": 0.9693831586837769, + "num_tokens": 1847137.0, + "step": 325 + }, + { + "epoch": 0.8449004224502112, + "grad_norm": 0.29110169410705566, + "learning_rate": 0.00028529016773059656, + "loss": 0.097, + "mean_token_accuracy": 0.9721069341897964, + "num_tokens": 1974433.0, + "step": 350 + }, + { + "epoch": 0.9052504526252263, + "grad_norm": 0.2041333168745041, + "learning_rate": 0.00028265897734504976, + "loss": 0.1078, + "mean_token_accuracy": 0.9707016760110855, + "num_tokens": 2130536.0, + "step": 375 + }, + { + "epoch": 0.9656004828002414, + "grad_norm": 0.27978503704071045, + "learning_rate": 0.0002798256688131267, + "loss": 0.0788, + "mean_token_accuracy": 0.9774614202976227, + "num_tokens": 2257778.0, + "step": 400 + }, + { + "epoch": 1.0, + "eval_loss": 0.08150900155305862, + "eval_mean_token_accuracy": 0.977405073836043, + "eval_num_tokens": 2337343.0, + "eval_runtime": 66.2124, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 415 + } + ], + "logging_steps": 25, + "max_steps": 2075, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.065446263148544e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-415/training_args.bin b/checkpoint-415/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..184dacc2ae7ef0096c0f2c3ef91b16c840ae960c --- /dev/null +++ b/checkpoint-415/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4370f125a1b8cc4780ee7319c46b5e76e386e5bd5061ee6e638e993ff73b5e6f +size 6033 diff --git a/checkpoint-830/README.md b/checkpoint-830/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0d31128190920e45b61115944d16e773c2ec94c3 --- /dev/null +++ b/checkpoint-830/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.15.2 \ No newline at end of file diff --git a/checkpoint-830/adapter_config.json b/checkpoint-830/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9a94c10785ed353013e980625c777fea6b3cae9 --- /dev/null +++ b/checkpoint-830/adapter_config.json @@ -0,0 +1,39 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_bias": false, + "lora_dropout": 0.1, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "o_proj", + "up_proj", + "down_proj", + "gate_proj", + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-830/adapter_model.safetensors b/checkpoint-830/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d70f0be1bb3e04aca17d914c8969f05db0ee5e4 --- /dev/null +++ b/checkpoint-830/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9273305761f9603fc6aa044fb77a501c13c89d21409ed6057062bd95a7d9a9d7 +size 335604696 diff --git a/checkpoint-830/chat_template.jinja b/checkpoint-830/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-830/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-830/optimizer.pt b/checkpoint-830/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6df4f38fb44928049334bb9c70127707f55ce357 --- /dev/null +++ b/checkpoint-830/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714afc453a5f558c3f91aaa50e49330bbd110c4f34f5285eeabdc68ca339c2fd +size 671365003 diff --git a/checkpoint-830/rng_state.pth b/checkpoint-830/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4e9bb806b8d53f61f9e19a2d97db62470ed3e09 --- /dev/null +++ b/checkpoint-830/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dee373a221ac6d296573a5a8ce0a3d4357d7d50d60488ca64a162b1e67d8727 +size 14645 diff --git a/checkpoint-830/scheduler.pt b/checkpoint-830/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9b59ad7cb18e0e5665068adb1f70b264ac92859 --- /dev/null +++ b/checkpoint-830/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ebe35a4f401ad5492b7b53a3bf12f2d34ae11eae48d3c7b237ffa7851c70a5 +size 1465 diff --git a/checkpoint-830/special_tokens_map.json b/checkpoint-830/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/checkpoint-830/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/checkpoint-830/tokenizer.json b/checkpoint-830/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..9645e8546890ad31273c3a00dd4cc1bd4f7aafbb --- /dev/null +++ b/checkpoint-830/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7389e002692fc9e3a3d9f9884a9e9d5657e2a4406bf9759a3703abe8a3485402 +size 17210085 diff --git a/checkpoint-830/tokenizer_config.json b/checkpoint-830/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..563042ba0748ab38bd6000afa193a5a9634d31cc --- /dev/null +++ b/checkpoint-830/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/checkpoint-830/trainer_state.json b/checkpoint-830/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b696bf368bf0fa77a4d8d2d31c945e9e594f38ed --- /dev/null +++ b/checkpoint-830/trainer_state.json @@ -0,0 +1,351 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 830, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.060350030175015085, + "grad_norm": 0.5709517598152161, + "learning_rate": 0.00011428571428571427, + "loss": 1.8697, + "mean_token_accuracy": 0.6171649008989334, + "num_tokens": 158430.0, + "step": 25 + }, + { + "epoch": 0.12070006035003017, + "grad_norm": 0.4126085937023163, + "learning_rate": 0.0002333333333333333, + "loss": 0.8694, + "mean_token_accuracy": 0.7941986697912217, + "num_tokens": 285311.0, + "step": 50 + }, + { + "epoch": 0.18105009052504525, + "grad_norm": 0.3864216208457947, + "learning_rate": 0.00029997787517981614, + "loss": 0.6122, + "mean_token_accuracy": 0.8420314061641693, + "num_tokens": 442499.0, + "step": 75 + }, + { + "epoch": 0.24140012070006034, + "grad_norm": 0.43023112416267395, + "learning_rate": 0.0002997630832860032, + "loss": 0.5051, + "mean_token_accuracy": 0.8632901525497436, + "num_tokens": 567883.0, + "step": 100 + }, + { + "epoch": 0.30175015087507545, + "grad_norm": 0.32171109318733215, + "learning_rate": 0.0002993201135681549, + "loss": 0.3632, + "mean_token_accuracy": 0.8987007009983062, + "num_tokens": 723609.0, + "step": 125 + }, + { + "epoch": 0.3621001810500905, + "grad_norm": 0.398703008890152, + "learning_rate": 0.0002986496409313553, + "loss": 0.3154, + "mean_token_accuracy": 0.911303380727768, + "num_tokens": 850241.0, + "step": 150 + }, + { + "epoch": 0.4224502112251056, + "grad_norm": 0.2818653881549835, + "learning_rate": 0.0002977526869022985, + "loss": 0.2394, + "mean_token_accuracy": 0.9328221660852433, + "num_tokens": 1004280.0, + "step": 175 + }, + { + "epoch": 0.4828002414001207, + "grad_norm": 0.39628875255584717, + "learning_rate": 0.0002966306180728982, + "loss": 0.2069, + "mean_token_accuracy": 0.9405643939971924, + "num_tokens": 1131356.0, + "step": 200 + }, + { + "epoch": 0.5431502715751357, + "grad_norm": 0.2750284671783447, + "learning_rate": 0.0002952851440181598, + "loss": 0.1639, + "mean_token_accuracy": 0.9533388954401016, + "num_tokens": 1286496.0, + "step": 225 + }, + { + "epoch": 0.6035003017501509, + "grad_norm": 0.4104975759983063, + "learning_rate": 0.0002937183146914856, + "loss": 0.1533, + "mean_token_accuracy": 0.9560526990890503, + "num_tokens": 1413124.0, + "step": 250 + }, + { + "epoch": 0.663850331925166, + "grad_norm": 0.2589656710624695, + "learning_rate": 0.000291932517301382, + "loss": 0.1192, + "mean_token_accuracy": 0.9663791972398758, + "num_tokens": 1568083.0, + "step": 275 + }, + { + "epoch": 0.724200362100181, + "grad_norm": 0.42289865016937256, + "learning_rate": 0.00028993047267432864, + "loss": 0.119, + "mean_token_accuracy": 0.9662033247947693, + "num_tokens": 1693195.0, + "step": 300 + }, + { + "epoch": 0.7845503922751962, + "grad_norm": 0.3269366919994354, + "learning_rate": 0.0002877152311093483, + "loss": 0.1098, + "mean_token_accuracy": 0.9693831586837769, + "num_tokens": 1847137.0, + "step": 325 + }, + { + "epoch": 0.8449004224502112, + "grad_norm": 0.29110169410705566, + "learning_rate": 0.00028529016773059656, + "loss": 0.097, + "mean_token_accuracy": 0.9721069341897964, + "num_tokens": 1974433.0, + "step": 350 + }, + { + "epoch": 0.9052504526252263, + "grad_norm": 0.2041333168745041, + "learning_rate": 0.00028265897734504976, + "loss": 0.1078, + "mean_token_accuracy": 0.9707016760110855, + "num_tokens": 2130536.0, + "step": 375 + }, + { + "epoch": 0.9656004828002414, + "grad_norm": 0.27978503704071045, + "learning_rate": 0.0002798256688131267, + "loss": 0.0788, + "mean_token_accuracy": 0.9774614202976227, + "num_tokens": 2257778.0, + "step": 400 + }, + { + "epoch": 1.0, + "eval_loss": 0.08150900155305862, + "eval_mean_token_accuracy": 0.977405073836043, + "eval_num_tokens": 2337343.0, + "eval_runtime": 66.2124, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 415 + }, + { + "epoch": 1.024140012070006, + "grad_norm": 0.2321971356868744, + "learning_rate": 0.0002767945589408217, + "loss": 0.1016, + "mean_token_accuracy": 0.9726764823972565, + "num_tokens": 2406156.0, + "step": 425 + }, + { + "epoch": 1.0844900422450212, + "grad_norm": 0.15238304436206818, + "learning_rate": 0.0002735702659026533, + "loss": 0.0566, + "mean_token_accuracy": 0.9829730612039566, + "num_tokens": 2547712.0, + "step": 450 + }, + { + "epoch": 1.1448400724200363, + "grad_norm": 0.24308571219444275, + "learning_rate": 0.0002701577022054515, + "loss": 0.0787, + "mean_token_accuracy": 0.9782290506362915, + "num_tokens": 2689644.0, + "step": 475 + }, + { + "epoch": 1.2051901025950513, + "grad_norm": 0.10345127433538437, + "learning_rate": 0.0002665620672037014, + "loss": 0.0559, + "mean_token_accuracy": 0.9833064198493957, + "num_tokens": 2830847.0, + "step": 500 + }, + { + "epoch": 1.2655401327700664, + "grad_norm": 0.12139427661895752, + "learning_rate": 0.0002627888391778493, + "loss": 0.0734, + "mean_token_accuracy": 0.9794088786840439, + "num_tokens": 2970752.0, + "step": 525 + }, + { + "epoch": 1.3258901629450814, + "grad_norm": 0.19484972953796387, + "learning_rate": 0.0002588437669876384, + "loss": 0.05, + "mean_token_accuracy": 0.9848471087217331, + "num_tokens": 3110651.0, + "step": 550 + }, + { + "epoch": 1.3862401931200965, + "grad_norm": 0.15929530560970306, + "learning_rate": 0.00025473286131319283, + "loss": 0.0647, + "mean_token_accuracy": 0.9809465759992599, + "num_tokens": 3250565.0, + "step": 575 + }, + { + "epoch": 1.4465902232951118, + "grad_norm": 0.1795138716697693, + "learning_rate": 0.0002504623854971937, + "loss": 0.051, + "mean_token_accuracy": 0.9846814131736755, + "num_tokens": 3391351.0, + "step": 600 + }, + { + "epoch": 1.5069402534701268, + "grad_norm": 0.14619964361190796, + "learning_rate": 0.00024603884600210097, + "loss": 0.0628, + "mean_token_accuracy": 0.9824352604150772, + "num_tokens": 3530305.0, + "step": 625 + }, + { + "epoch": 1.567290283645142, + "grad_norm": 0.11213799566030502, + "learning_rate": 0.00024146898249695974, + "loss": 0.0478, + "mean_token_accuracy": 0.9856422942876816, + "num_tokens": 3672598.0, + "step": 650 + }, + { + "epoch": 1.627640313820157, + "grad_norm": 0.126531720161438, + "learning_rate": 0.00023675975758889506, + "loss": 0.059, + "mean_token_accuracy": 0.9828101003170013, + "num_tokens": 3814546.0, + "step": 675 + }, + { + "epoch": 1.687990343995172, + "grad_norm": 0.13861851394176483, + "learning_rate": 0.00023191834621493968, + "loss": 0.0453, + "mean_token_accuracy": 0.9860042548179626, + "num_tokens": 3955590.0, + "step": 700 + }, + { + "epoch": 1.748340374170187, + "grad_norm": 0.057172827422618866, + "learning_rate": 0.00022695212471035816, + "loss": 0.053, + "mean_token_accuracy": 0.984272707104683, + "num_tokens": 4096678.0, + "step": 725 + }, + { + "epoch": 1.8086904043452021, + "grad_norm": 0.13785910606384277, + "learning_rate": 0.0002218686595701219, + "loss": 0.0438, + "mean_token_accuracy": 0.9864085137844085, + "num_tokens": 4238701.0, + "step": 750 + }, + { + "epoch": 1.8690404345202172, + "grad_norm": 0.09842238575220108, + "learning_rate": 0.0002166756959206587, + "loss": 0.0538, + "mean_token_accuracy": 0.9840266466140747, + "num_tokens": 4378606.0, + "step": 775 + }, + { + "epoch": 1.9293904646952322, + "grad_norm": 0.09276362508535385, + "learning_rate": 0.00021138114571944054, + "loss": 0.0415, + "mean_token_accuracy": 0.9867525893449783, + "num_tokens": 4521351.0, + "step": 800 + }, + { + "epoch": 1.9897404948702473, + "grad_norm": 0.07594375312328339, + "learning_rate": 0.000205993075700389, + "loss": 0.0452, + "mean_token_accuracy": 0.9859298402071, + "num_tokens": 4655013.0, + "step": 825 + }, + { + "epoch": 2.0, + "eval_loss": 0.05184464156627655, + "eval_mean_token_accuracy": 0.9849317776190268, + "eval_num_tokens": 4674686.0, + "eval_runtime": 66.2113, + "eval_samples_per_second": 5.573, + "eval_steps_per_second": 2.794, + "step": 830 + } + ], + "logging_steps": 25, + "max_steps": 2075, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.1308916156397978e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-830/training_args.bin b/checkpoint-830/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..184dacc2ae7ef0096c0f2c3ef91b16c840ae960c --- /dev/null +++ b/checkpoint-830/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4370f125a1b8cc4780ee7319c46b5e76e386e5bd5061ee6e638e993ff73b5e6f +size 6033 diff --git a/runs/Aug06_19-31-38_pan/events.out.tfevents.1754523100.pan.768634.0 b/runs/Aug06_19-31-38_pan/events.out.tfevents.1754523100.pan.768634.0 new file mode 100644 index 0000000000000000000000000000000000000000..6daa5f6f68cbaf1ff156a6419019673551a5b400 --- /dev/null +++ b/runs/Aug06_19-31-38_pan/events.out.tfevents.1754523100.pan.768634.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46abcf740f557f8080d4a315beedd667e85376471951d4b3bdc541632ebb14f1 +size 35703 diff --git a/runs/Aug06_19-31-38_pan/events.out.tfevents.1754530641.pan.768634.1 b/runs/Aug06_19-31-38_pan/events.out.tfevents.1754530641.pan.768634.1 new file mode 100644 index 0000000000000000000000000000000000000000..6983fd440504784481728e3c4b792d3a7d2f8d15 --- /dev/null +++ b/runs/Aug06_19-31-38_pan/events.out.tfevents.1754530641.pan.768634.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b0c588234af6e03a025ddddc58ad0ecd8da920f4ea83a28e21c5fbdefd702f2 +size 478 diff --git a/runs/Aug06_21-55-01_pan/events.out.tfevents.1754531766.pan.775255.0 b/runs/Aug06_21-55-01_pan/events.out.tfevents.1754531766.pan.775255.0 new file mode 100644 index 0000000000000000000000000000000000000000..bb635571da3601a0733b8829b5db0378327c2b74 --- /dev/null +++ b/runs/Aug06_21-55-01_pan/events.out.tfevents.1754531766.pan.775255.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba8a603a1ff465438295bbabecaf2bbd9769b4524fc6ee0a1575a219e90c8a3 +size 473