| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.33224431031618584, | |
| "eval_steps": 500, | |
| "global_step": 102000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003257297159962606, | |
| "grad_norm": 2.2308592796325684, | |
| "learning_rate": 4.99853416853153e-05, | |
| "loss": 1.4483, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0006514594319925212, | |
| "grad_norm": 2.3997225761413574, | |
| "learning_rate": 4.996905466899897e-05, | |
| "loss": 1.3276, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0009771891479887819, | |
| "grad_norm": 1.4687339067459106, | |
| "learning_rate": 4.995276765268264e-05, | |
| "loss": 1.3394, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0013029188639850425, | |
| "grad_norm": 0.6583470702171326, | |
| "learning_rate": 4.993648063636631e-05, | |
| "loss": 1.3245, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0016286485799813031, | |
| "grad_norm": 1.6252340078353882, | |
| "learning_rate": 4.992019362004997e-05, | |
| "loss": 1.3249, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0019543782959775637, | |
| "grad_norm": 2.0806777477264404, | |
| "learning_rate": 4.9903906603733634e-05, | |
| "loss": 1.32, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.002280108011973824, | |
| "grad_norm": 1.376539707183838, | |
| "learning_rate": 4.988761958741731e-05, | |
| "loss": 1.3133, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.002605837727970085, | |
| "grad_norm": 2.234644889831543, | |
| "learning_rate": 4.987133257110097e-05, | |
| "loss": 1.3179, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0029315674439663454, | |
| "grad_norm": 1.4599684476852417, | |
| "learning_rate": 4.985504555478464e-05, | |
| "loss": 1.3097, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.0032572971599626062, | |
| "grad_norm": 1.7078094482421875, | |
| "learning_rate": 4.9838758538468304e-05, | |
| "loss": 1.3083, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.0035830268759588666, | |
| "grad_norm": 0.6953567266464233, | |
| "learning_rate": 4.9822471522151976e-05, | |
| "loss": 1.3075, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.0039087565919551275, | |
| "grad_norm": 1.225602626800537, | |
| "learning_rate": 4.980618450583564e-05, | |
| "loss": 1.3054, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.004234486307951388, | |
| "grad_norm": 1.3010519742965698, | |
| "learning_rate": 4.978989748951931e-05, | |
| "loss": 1.3066, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.004560216023947648, | |
| "grad_norm": 0.6475724577903748, | |
| "learning_rate": 4.9773610473202974e-05, | |
| "loss": 1.3109, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.004885945739943909, | |
| "grad_norm": 1.046614646911621, | |
| "learning_rate": 4.975732345688664e-05, | |
| "loss": 1.3074, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.00521167545594017, | |
| "grad_norm": 1.113573670387268, | |
| "learning_rate": 4.974103644057031e-05, | |
| "loss": 1.3083, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.005537405171936431, | |
| "grad_norm": 1.4273550510406494, | |
| "learning_rate": 4.972474942425398e-05, | |
| "loss": 1.3018, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.005863134887932691, | |
| "grad_norm": 0.5519908666610718, | |
| "learning_rate": 4.970846240793764e-05, | |
| "loss": 1.2945, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.006188864603928952, | |
| "grad_norm": 0.6653416156768799, | |
| "learning_rate": 4.969217539162131e-05, | |
| "loss": 1.3004, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.0065145943199252125, | |
| "grad_norm": 0.732170581817627, | |
| "learning_rate": 4.9675888375304975e-05, | |
| "loss": 1.3014, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.006840324035921473, | |
| "grad_norm": 0.405608594417572, | |
| "learning_rate": 4.965960135898865e-05, | |
| "loss": 1.2939, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.007166053751917733, | |
| "grad_norm": 0.9849847555160522, | |
| "learning_rate": 4.9643314342672306e-05, | |
| "loss": 1.2922, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.007491783467913994, | |
| "grad_norm": 0.7152832746505737, | |
| "learning_rate": 4.962702732635598e-05, | |
| "loss": 1.2905, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.007817513183910255, | |
| "grad_norm": 1.1164734363555908, | |
| "learning_rate": 4.9610740310039644e-05, | |
| "loss": 1.3024, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.008143242899906516, | |
| "grad_norm": 0.574243426322937, | |
| "learning_rate": 4.959445329372332e-05, | |
| "loss": 1.2944, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.008468972615902777, | |
| "grad_norm": 0.6976324319839478, | |
| "learning_rate": 4.9578166277406976e-05, | |
| "loss": 1.2939, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.008794702331899037, | |
| "grad_norm": 0.4648737609386444, | |
| "learning_rate": 4.956187926109064e-05, | |
| "loss": 1.2841, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.009120432047895297, | |
| "grad_norm": 1.189271092414856, | |
| "learning_rate": 4.9545592244774314e-05, | |
| "loss": 1.294, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.009446161763891557, | |
| "grad_norm": 0.6437670588493347, | |
| "learning_rate": 4.952930522845798e-05, | |
| "loss": 1.2882, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.009771891479887818, | |
| "grad_norm": 1.591304898262024, | |
| "learning_rate": 4.9513018212141646e-05, | |
| "loss": 1.2805, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.010097621195884079, | |
| "grad_norm": 0.2836475670337677, | |
| "learning_rate": 4.949673119582531e-05, | |
| "loss": 1.2802, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.01042335091188034, | |
| "grad_norm": 1.304417610168457, | |
| "learning_rate": 4.9480444179508984e-05, | |
| "loss": 1.2833, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.0107490806278766, | |
| "grad_norm": 0.27579864859580994, | |
| "learning_rate": 4.946415716319265e-05, | |
| "loss": 1.2852, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.011074810343872862, | |
| "grad_norm": 1.1080585718154907, | |
| "learning_rate": 4.9447870146876315e-05, | |
| "loss": 1.289, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.011400540059869122, | |
| "grad_norm": 0.2783690392971039, | |
| "learning_rate": 4.943158313055998e-05, | |
| "loss": 1.2885, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.011726269775865382, | |
| "grad_norm": 0.6603112816810608, | |
| "learning_rate": 4.941529611424365e-05, | |
| "loss": 1.2882, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.012051999491861642, | |
| "grad_norm": 0.9498095512390137, | |
| "learning_rate": 4.939900909792732e-05, | |
| "loss": 1.2835, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.012377729207857903, | |
| "grad_norm": 0.5274548530578613, | |
| "learning_rate": 4.9382722081610985e-05, | |
| "loss": 1.279, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.012703458923854164, | |
| "grad_norm": 0.5299821496009827, | |
| "learning_rate": 4.936643506529465e-05, | |
| "loss": 1.2879, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.013029188639850425, | |
| "grad_norm": 1.0898863077163696, | |
| "learning_rate": 4.9350148048978316e-05, | |
| "loss": 1.2913, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.013354918355846686, | |
| "grad_norm": 0.6892501711845398, | |
| "learning_rate": 4.933386103266198e-05, | |
| "loss": 1.2835, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.013680648071842947, | |
| "grad_norm": 0.9103847146034241, | |
| "learning_rate": 4.9317574016345655e-05, | |
| "loss": 1.2876, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.014006377787839207, | |
| "grad_norm": 0.8750960826873779, | |
| "learning_rate": 4.9301287000029314e-05, | |
| "loss": 1.2761, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.014332107503835467, | |
| "grad_norm": 1.7296843528747559, | |
| "learning_rate": 4.9284999983712986e-05, | |
| "loss": 1.2825, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.014657837219831727, | |
| "grad_norm": 0.7019387483596802, | |
| "learning_rate": 4.926871296739665e-05, | |
| "loss": 1.2774, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.014983566935827988, | |
| "grad_norm": 0.9353660345077515, | |
| "learning_rate": 4.9252425951080324e-05, | |
| "loss": 1.2701, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.015309296651824249, | |
| "grad_norm": 0.7081932425498962, | |
| "learning_rate": 4.923613893476399e-05, | |
| "loss": 1.276, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.01563502636782051, | |
| "grad_norm": 0.8366962671279907, | |
| "learning_rate": 4.9219851918447656e-05, | |
| "loss": 1.2767, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.01596075608381677, | |
| "grad_norm": 1.765871286392212, | |
| "learning_rate": 4.920356490213132e-05, | |
| "loss": 1.2617, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.01628648579981303, | |
| "grad_norm": 0.2926379442214966, | |
| "learning_rate": 4.918727788581499e-05, | |
| "loss": 1.2762, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.01661221551580929, | |
| "grad_norm": 1.1176525354385376, | |
| "learning_rate": 4.917099086949866e-05, | |
| "loss": 1.2647, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.016937945231805553, | |
| "grad_norm": 0.384264200925827, | |
| "learning_rate": 4.915470385318232e-05, | |
| "loss": 1.2628, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.017263674947801812, | |
| "grad_norm": 1.5339140892028809, | |
| "learning_rate": 4.913841683686599e-05, | |
| "loss": 1.2692, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.017589404663798075, | |
| "grad_norm": 1.2026703357696533, | |
| "learning_rate": 4.912212982054966e-05, | |
| "loss": 1.2618, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.017915134379794334, | |
| "grad_norm": 0.6754997968673706, | |
| "learning_rate": 4.910584280423333e-05, | |
| "loss": 1.2495, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.018240864095790593, | |
| "grad_norm": 0.8240428566932678, | |
| "learning_rate": 4.908955578791699e-05, | |
| "loss": 1.2498, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.018566593811786856, | |
| "grad_norm": 0.6363087892532349, | |
| "learning_rate": 4.9073268771600654e-05, | |
| "loss": 1.2514, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.018892323527783115, | |
| "grad_norm": 1.393833875656128, | |
| "learning_rate": 4.905698175528433e-05, | |
| "loss": 1.2509, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.019218053243779377, | |
| "grad_norm": 0.6422170996665955, | |
| "learning_rate": 4.904069473896799e-05, | |
| "loss": 1.2405, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.019543782959775637, | |
| "grad_norm": 0.7575420141220093, | |
| "learning_rate": 4.902440772265166e-05, | |
| "loss": 1.2241, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.0198695126757719, | |
| "grad_norm": 0.7148196697235107, | |
| "learning_rate": 4.9008120706335324e-05, | |
| "loss": 1.2372, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.020195242391768158, | |
| "grad_norm": 1.1207329034805298, | |
| "learning_rate": 4.8991833690018996e-05, | |
| "loss": 1.2372, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.02052097210776442, | |
| "grad_norm": 1.3915568590164185, | |
| "learning_rate": 4.897554667370266e-05, | |
| "loss": 1.2129, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.02084670182376068, | |
| "grad_norm": 0.8674553036689758, | |
| "learning_rate": 4.895925965738633e-05, | |
| "loss": 1.2262, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.02117243153975694, | |
| "grad_norm": 0.7640644311904907, | |
| "learning_rate": 4.8942972641069994e-05, | |
| "loss": 1.1998, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.0214981612557532, | |
| "grad_norm": 0.7928606271743774, | |
| "learning_rate": 4.892668562475366e-05, | |
| "loss": 1.1776, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.02182389097174946, | |
| "grad_norm": 1.1644946336746216, | |
| "learning_rate": 4.891039860843733e-05, | |
| "loss": 1.1916, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.022149620687745723, | |
| "grad_norm": 1.1310213804244995, | |
| "learning_rate": 4.8894111592121e-05, | |
| "loss": 1.1786, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.022475350403741982, | |
| "grad_norm": 1.3858141899108887, | |
| "learning_rate": 4.887782457580466e-05, | |
| "loss": 1.1728, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.022801080119738245, | |
| "grad_norm": 3.814767360687256, | |
| "learning_rate": 4.886153755948833e-05, | |
| "loss": 1.1384, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.023126809835734504, | |
| "grad_norm": 1.2411885261535645, | |
| "learning_rate": 4.8845250543171995e-05, | |
| "loss": 1.1588, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.023452539551730763, | |
| "grad_norm": 1.4492881298065186, | |
| "learning_rate": 4.882896352685567e-05, | |
| "loss": 1.1266, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.023778269267727026, | |
| "grad_norm": 0.8389878869056702, | |
| "learning_rate": 4.8812676510539326e-05, | |
| "loss": 1.1446, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.024103998983723285, | |
| "grad_norm": 0.33955487608909607, | |
| "learning_rate": 4.8796389494223e-05, | |
| "loss": 1.1111, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.024429728699719547, | |
| "grad_norm": 0.7004753351211548, | |
| "learning_rate": 4.8780102477906664e-05, | |
| "loss": 1.0954, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.024755458415715807, | |
| "grad_norm": 0.7213209271430969, | |
| "learning_rate": 4.876381546159034e-05, | |
| "loss": 1.1123, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.02508118813171207, | |
| "grad_norm": 0.960991382598877, | |
| "learning_rate": 4.8747528445273996e-05, | |
| "loss": 1.0982, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.025406917847708328, | |
| "grad_norm": 0.6955804228782654, | |
| "learning_rate": 4.873124142895766e-05, | |
| "loss": 1.0827, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.02573264756370459, | |
| "grad_norm": 0.47498619556427, | |
| "learning_rate": 4.8714954412641334e-05, | |
| "loss": 1.1043, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.02605837727970085, | |
| "grad_norm": 0.304063618183136, | |
| "learning_rate": 4.8698667396325e-05, | |
| "loss": 1.0699, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.02638410699569711, | |
| "grad_norm": 0.9996088743209839, | |
| "learning_rate": 4.8682380380008666e-05, | |
| "loss": 1.0697, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.02670983671169337, | |
| "grad_norm": 0.5986392498016357, | |
| "learning_rate": 4.866609336369233e-05, | |
| "loss": 1.0733, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.02703556642768963, | |
| "grad_norm": 0.41347017884254456, | |
| "learning_rate": 4.8649806347376004e-05, | |
| "loss": 1.0643, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.027361296143685893, | |
| "grad_norm": 0.3976612687110901, | |
| "learning_rate": 4.863351933105967e-05, | |
| "loss": 1.0401, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.027687025859682152, | |
| "grad_norm": 1.1716387271881104, | |
| "learning_rate": 4.8617232314743335e-05, | |
| "loss": 1.0298, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.028012755575678415, | |
| "grad_norm": 0.7384105324745178, | |
| "learning_rate": 4.8600945298427e-05, | |
| "loss": 1.0223, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.028338485291674674, | |
| "grad_norm": 0.517280638217926, | |
| "learning_rate": 4.858465828211067e-05, | |
| "loss": 1.0445, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.028664215007670933, | |
| "grad_norm": 0.7129126787185669, | |
| "learning_rate": 4.856837126579434e-05, | |
| "loss": 1.0508, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.028989944723667196, | |
| "grad_norm": 0.35596320033073425, | |
| "learning_rate": 4.8552084249478005e-05, | |
| "loss": 1.0296, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.029315674439663455, | |
| "grad_norm": 0.9362590909004211, | |
| "learning_rate": 4.853579723316167e-05, | |
| "loss": 1.0785, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.029641404155659717, | |
| "grad_norm": 0.8223775625228882, | |
| "learning_rate": 4.8519510216845336e-05, | |
| "loss": 1.043, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.029967133871655977, | |
| "grad_norm": 0.7149192690849304, | |
| "learning_rate": 4.8503223200529e-05, | |
| "loss": 1.0036, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.03029286358765224, | |
| "grad_norm": 0.5907948017120361, | |
| "learning_rate": 4.8486936184212675e-05, | |
| "loss": 1.0408, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.030618593303648498, | |
| "grad_norm": 0.6083859801292419, | |
| "learning_rate": 4.847064916789634e-05, | |
| "loss": 1.0313, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.03094432301964476, | |
| "grad_norm": 0.5470224618911743, | |
| "learning_rate": 4.8454362151580006e-05, | |
| "loss": 1.0395, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.03127005273564102, | |
| "grad_norm": 0.9455150961875916, | |
| "learning_rate": 4.843807513526367e-05, | |
| "loss": 1.0132, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.03159578245163728, | |
| "grad_norm": 0.9068177938461304, | |
| "learning_rate": 4.8421788118947344e-05, | |
| "loss": 1.0219, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.03192151216763354, | |
| "grad_norm": 0.6018943190574646, | |
| "learning_rate": 4.840550110263101e-05, | |
| "loss": 0.9966, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.032247241883629804, | |
| "grad_norm": 1.1521615982055664, | |
| "learning_rate": 4.838921408631467e-05, | |
| "loss": 0.9782, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.03257297159962606, | |
| "grad_norm": 0.33281368017196655, | |
| "learning_rate": 4.837292706999834e-05, | |
| "loss": 1.0325, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.03289870131562232, | |
| "grad_norm": 0.8903327584266663, | |
| "learning_rate": 4.835664005368201e-05, | |
| "loss": 0.9889, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.03322443103161858, | |
| "grad_norm": 0.5526803731918335, | |
| "learning_rate": 4.834035303736568e-05, | |
| "loss": 1.0018, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.03355016074761485, | |
| "grad_norm": 0.8086706399917603, | |
| "learning_rate": 4.832406602104934e-05, | |
| "loss": 1.0189, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.03387589046361111, | |
| "grad_norm": 0.6990864276885986, | |
| "learning_rate": 4.830777900473301e-05, | |
| "loss": 0.996, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.034201620179607366, | |
| "grad_norm": 0.4859602451324463, | |
| "learning_rate": 4.829149198841668e-05, | |
| "loss": 0.992, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.034527349895603625, | |
| "grad_norm": 1.2284592390060425, | |
| "learning_rate": 4.827520497210034e-05, | |
| "loss": 1.0139, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.034853079611599884, | |
| "grad_norm": 0.6529733538627625, | |
| "learning_rate": 4.825891795578401e-05, | |
| "loss": 1.025, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.03517880932759615, | |
| "grad_norm": 0.6755232810974121, | |
| "learning_rate": 4.8242630939467674e-05, | |
| "loss": 1.0123, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.03550453904359241, | |
| "grad_norm": 0.9006055593490601, | |
| "learning_rate": 4.8226343923151347e-05, | |
| "loss": 0.9936, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.03583026875958867, | |
| "grad_norm": 0.7058572769165039, | |
| "learning_rate": 4.821005690683501e-05, | |
| "loss": 0.934, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.03615599847558493, | |
| "grad_norm": 0.4535008668899536, | |
| "learning_rate": 4.819376989051868e-05, | |
| "loss": 1.0269, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.036481728191581186, | |
| "grad_norm": 0.39823395013809204, | |
| "learning_rate": 4.8177482874202344e-05, | |
| "loss": 0.9866, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.03680745790757745, | |
| "grad_norm": 0.8109054565429688, | |
| "learning_rate": 4.816119585788601e-05, | |
| "loss": 1.0209, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.03713318762357371, | |
| "grad_norm": 0.760396420955658, | |
| "learning_rate": 4.814490884156968e-05, | |
| "loss": 0.9711, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.03745891733956997, | |
| "grad_norm": 0.8584955334663391, | |
| "learning_rate": 4.812862182525335e-05, | |
| "loss": 1.0151, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.03778464705556623, | |
| "grad_norm": 1.104041576385498, | |
| "learning_rate": 4.8112334808937013e-05, | |
| "loss": 0.9826, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.038110376771562496, | |
| "grad_norm": 0.6111257672309875, | |
| "learning_rate": 4.809604779262068e-05, | |
| "loss": 0.9524, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.038436106487558755, | |
| "grad_norm": 0.6601366996765137, | |
| "learning_rate": 4.807976077630435e-05, | |
| "loss": 0.9527, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.038761836203555014, | |
| "grad_norm": 0.4624398350715637, | |
| "learning_rate": 4.806347375998802e-05, | |
| "loss": 1.0077, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.03908756591955127, | |
| "grad_norm": 0.2786065638065338, | |
| "learning_rate": 4.8047186743671676e-05, | |
| "loss": 0.956, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.03941329563554753, | |
| "grad_norm": 1.0275955200195312, | |
| "learning_rate": 4.803089972735535e-05, | |
| "loss": 0.9484, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.0397390253515438, | |
| "grad_norm": 0.6198407411575317, | |
| "learning_rate": 4.8014612711039015e-05, | |
| "loss": 0.9847, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.04006475506754006, | |
| "grad_norm": 0.5880489945411682, | |
| "learning_rate": 4.799832569472269e-05, | |
| "loss": 0.9559, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.040390484783536316, | |
| "grad_norm": 0.39753594994544983, | |
| "learning_rate": 4.7982038678406346e-05, | |
| "loss": 0.9489, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.040716214499532576, | |
| "grad_norm": 0.5815085768699646, | |
| "learning_rate": 4.796575166209002e-05, | |
| "loss": 0.9567, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.04104194421552884, | |
| "grad_norm": 0.8463611602783203, | |
| "learning_rate": 4.7949464645773684e-05, | |
| "loss": 0.9706, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.0413676739315251, | |
| "grad_norm": 0.7260481715202332, | |
| "learning_rate": 4.793317762945736e-05, | |
| "loss": 1.0032, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.04169340364752136, | |
| "grad_norm": 0.6970434188842773, | |
| "learning_rate": 4.7916890613141016e-05, | |
| "loss": 0.9559, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.04201913336351762, | |
| "grad_norm": 0.6083927750587463, | |
| "learning_rate": 4.790060359682468e-05, | |
| "loss": 0.9558, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.04234486307951388, | |
| "grad_norm": 0.4736403524875641, | |
| "learning_rate": 4.7884316580508354e-05, | |
| "loss": 0.9444, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.042670592795510144, | |
| "grad_norm": 0.34586021304130554, | |
| "learning_rate": 4.786802956419202e-05, | |
| "loss": 0.9186, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.0429963225115064, | |
| "grad_norm": 0.5979019403457642, | |
| "learning_rate": 4.7851742547875685e-05, | |
| "loss": 0.9367, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.04332205222750266, | |
| "grad_norm": 1.0827624797821045, | |
| "learning_rate": 4.783545553155935e-05, | |
| "loss": 0.9324, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.04364778194349892, | |
| "grad_norm": 1.1920030117034912, | |
| "learning_rate": 4.7819168515243024e-05, | |
| "loss": 0.9367, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.04397351165949519, | |
| "grad_norm": 0.6469812989234924, | |
| "learning_rate": 4.780288149892669e-05, | |
| "loss": 0.9815, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.04429924137549145, | |
| "grad_norm": 0.8156530857086182, | |
| "learning_rate": 4.7786594482610355e-05, | |
| "loss": 0.9679, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.044624971091487706, | |
| "grad_norm": 1.2997325658798218, | |
| "learning_rate": 4.777030746629402e-05, | |
| "loss": 0.9358, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.044950700807483965, | |
| "grad_norm": 0.42360150814056396, | |
| "learning_rate": 4.7754020449977687e-05, | |
| "loss": 0.9326, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.045276430523480224, | |
| "grad_norm": 0.7316247820854187, | |
| "learning_rate": 4.773773343366136e-05, | |
| "loss": 0.9283, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.04560216023947649, | |
| "grad_norm": 0.5978175401687622, | |
| "learning_rate": 4.7721446417345025e-05, | |
| "loss": 0.9699, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.04592788995547275, | |
| "grad_norm": 0.5278334617614746, | |
| "learning_rate": 4.770515940102869e-05, | |
| "loss": 0.99, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.04625361967146901, | |
| "grad_norm": 0.7452822327613831, | |
| "learning_rate": 4.7688872384712356e-05, | |
| "loss": 0.8824, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.04657934938746527, | |
| "grad_norm": 0.4158065617084503, | |
| "learning_rate": 4.767258536839602e-05, | |
| "loss": 0.9076, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.046905079103461526, | |
| "grad_norm": 0.6929590106010437, | |
| "learning_rate": 4.7656298352079694e-05, | |
| "loss": 0.926, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.04723080881945779, | |
| "grad_norm": 0.8249752521514893, | |
| "learning_rate": 4.764001133576336e-05, | |
| "loss": 0.9342, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.04755653853545405, | |
| "grad_norm": 0.6523115038871765, | |
| "learning_rate": 4.7623724319447026e-05, | |
| "loss": 0.9312, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.04788226825145031, | |
| "grad_norm": 0.7809571027755737, | |
| "learning_rate": 4.760743730313069e-05, | |
| "loss": 0.927, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.04820799796744657, | |
| "grad_norm": 0.4370424747467041, | |
| "learning_rate": 4.7591150286814364e-05, | |
| "loss": 0.9275, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.048533727683442836, | |
| "grad_norm": 0.8082228302955627, | |
| "learning_rate": 4.757486327049803e-05, | |
| "loss": 0.9524, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.048859457399439095, | |
| "grad_norm": 0.7073273658752441, | |
| "learning_rate": 4.755857625418169e-05, | |
| "loss": 0.9069, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.049185187115435354, | |
| "grad_norm": 0.9150802493095398, | |
| "learning_rate": 4.754228923786536e-05, | |
| "loss": 0.9669, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.04951091683143161, | |
| "grad_norm": 0.6621295809745789, | |
| "learning_rate": 4.752600222154903e-05, | |
| "loss": 0.9117, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.04983664654742787, | |
| "grad_norm": 1.1658425331115723, | |
| "learning_rate": 4.75097152052327e-05, | |
| "loss": 0.9061, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.05016237626342414, | |
| "grad_norm": 1.1669522523880005, | |
| "learning_rate": 4.749342818891636e-05, | |
| "loss": 0.9625, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.0504881059794204, | |
| "grad_norm": 0.6995384693145752, | |
| "learning_rate": 4.747714117260003e-05, | |
| "loss": 0.9098, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.050813835695416656, | |
| "grad_norm": 0.5169076919555664, | |
| "learning_rate": 4.74608541562837e-05, | |
| "loss": 0.9243, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.051139565411412916, | |
| "grad_norm": 0.33565372228622437, | |
| "learning_rate": 4.744456713996736e-05, | |
| "loss": 0.9375, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.05146529512740918, | |
| "grad_norm": 0.4140024781227112, | |
| "learning_rate": 4.742828012365103e-05, | |
| "loss": 0.919, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.05179102484340544, | |
| "grad_norm": 0.9499224424362183, | |
| "learning_rate": 4.7411993107334694e-05, | |
| "loss": 0.9034, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.0521167545594017, | |
| "grad_norm": 0.8801336288452148, | |
| "learning_rate": 4.7395706091018366e-05, | |
| "loss": 0.881, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.05244248427539796, | |
| "grad_norm": 0.7208696007728577, | |
| "learning_rate": 4.737941907470203e-05, | |
| "loss": 0.8518, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.05276821399139422, | |
| "grad_norm": 0.5132054686546326, | |
| "learning_rate": 4.73631320583857e-05, | |
| "loss": 0.8933, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.053093943707390484, | |
| "grad_norm": 0.6521860957145691, | |
| "learning_rate": 4.7346845042069364e-05, | |
| "loss": 0.9332, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.05341967342338674, | |
| "grad_norm": 0.7121620178222656, | |
| "learning_rate": 4.733055802575303e-05, | |
| "loss": 0.9067, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.053745403139383, | |
| "grad_norm": 0.5065134763717651, | |
| "learning_rate": 4.73142710094367e-05, | |
| "loss": 0.9062, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.05407113285537926, | |
| "grad_norm": 0.5855521559715271, | |
| "learning_rate": 4.729798399312037e-05, | |
| "loss": 0.915, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.05439686257137553, | |
| "grad_norm": 0.5392531156539917, | |
| "learning_rate": 4.728169697680403e-05, | |
| "loss": 0.9124, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.05472259228737179, | |
| "grad_norm": 0.6617989540100098, | |
| "learning_rate": 4.72654099604877e-05, | |
| "loss": 0.8594, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.055048322003368046, | |
| "grad_norm": 0.6459785103797913, | |
| "learning_rate": 4.724912294417137e-05, | |
| "loss": 0.9262, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.055374051719364305, | |
| "grad_norm": 0.34565970301628113, | |
| "learning_rate": 4.723283592785504e-05, | |
| "loss": 0.8747, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.055699781435360564, | |
| "grad_norm": 0.9510948061943054, | |
| "learning_rate": 4.7216548911538696e-05, | |
| "loss": 0.9027, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.05602551115135683, | |
| "grad_norm": 0.577192485332489, | |
| "learning_rate": 4.720026189522237e-05, | |
| "loss": 0.9192, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.05635124086735309, | |
| "grad_norm": 0.38653406500816345, | |
| "learning_rate": 4.7183974878906034e-05, | |
| "loss": 0.8759, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.05667697058334935, | |
| "grad_norm": 0.6405381560325623, | |
| "learning_rate": 4.716768786258971e-05, | |
| "loss": 0.8486, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.05700270029934561, | |
| "grad_norm": 0.6968704462051392, | |
| "learning_rate": 4.7151400846273366e-05, | |
| "loss": 0.903, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.057328430015341866, | |
| "grad_norm": 0.8094695210456848, | |
| "learning_rate": 4.713511382995704e-05, | |
| "loss": 0.864, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.05765415973133813, | |
| "grad_norm": 0.8325287103652954, | |
| "learning_rate": 4.7118826813640704e-05, | |
| "loss": 0.8886, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.05797988944733439, | |
| "grad_norm": 0.5068339705467224, | |
| "learning_rate": 4.710253979732437e-05, | |
| "loss": 0.8767, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.05830561916333065, | |
| "grad_norm": 0.7535611391067505, | |
| "learning_rate": 4.7086252781008036e-05, | |
| "loss": 0.8661, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.05863134887932691, | |
| "grad_norm": 0.9104974865913391, | |
| "learning_rate": 4.70699657646917e-05, | |
| "loss": 0.8612, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.058957078595323176, | |
| "grad_norm": 0.9106101989746094, | |
| "learning_rate": 4.7053678748375374e-05, | |
| "loss": 0.8885, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.059282808311319435, | |
| "grad_norm": 0.9990994334220886, | |
| "learning_rate": 4.703739173205904e-05, | |
| "loss": 0.9097, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.059608538027315694, | |
| "grad_norm": 0.6219133138656616, | |
| "learning_rate": 4.7021104715742705e-05, | |
| "loss": 0.8349, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.05993426774331195, | |
| "grad_norm": 0.28884798288345337, | |
| "learning_rate": 4.700481769942637e-05, | |
| "loss": 0.8359, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.06025999745930821, | |
| "grad_norm": 0.6142743229866028, | |
| "learning_rate": 4.698853068311004e-05, | |
| "loss": 0.8686, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.06058572717530448, | |
| "grad_norm": 0.7121238708496094, | |
| "learning_rate": 4.697224366679371e-05, | |
| "loss": 0.8318, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.06091145689130074, | |
| "grad_norm": 0.3502013683319092, | |
| "learning_rate": 4.6955956650477375e-05, | |
| "loss": 0.8353, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.061237186607296996, | |
| "grad_norm": 0.869159460067749, | |
| "learning_rate": 4.693966963416104e-05, | |
| "loss": 0.8811, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.061562916323293256, | |
| "grad_norm": 0.4008027911186218, | |
| "learning_rate": 4.6923382617844706e-05, | |
| "loss": 0.8595, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.06188864603928952, | |
| "grad_norm": 0.6609760522842407, | |
| "learning_rate": 4.690709560152838e-05, | |
| "loss": 0.8591, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.06221437575528578, | |
| "grad_norm": 0.41599878668785095, | |
| "learning_rate": 4.6890808585212045e-05, | |
| "loss": 0.8792, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.06254010547128204, | |
| "grad_norm": 0.8219528794288635, | |
| "learning_rate": 4.687452156889571e-05, | |
| "loss": 0.8469, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.0628658351872783, | |
| "grad_norm": 0.5383628010749817, | |
| "learning_rate": 4.6858234552579376e-05, | |
| "loss": 0.8619, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.06319156490327456, | |
| "grad_norm": 1.0892442464828491, | |
| "learning_rate": 4.684194753626304e-05, | |
| "loss": 0.8219, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.06351729461927082, | |
| "grad_norm": 0.7258702516555786, | |
| "learning_rate": 4.6825660519946714e-05, | |
| "loss": 0.8243, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.06384302433526708, | |
| "grad_norm": 1.2622634172439575, | |
| "learning_rate": 4.680937350363038e-05, | |
| "loss": 0.8619, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.06416875405126335, | |
| "grad_norm": 0.3901592195034027, | |
| "learning_rate": 4.6793086487314046e-05, | |
| "loss": 0.8315, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.06449448376725961, | |
| "grad_norm": 0.5976518392562866, | |
| "learning_rate": 4.677679947099771e-05, | |
| "loss": 0.8193, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.06482021348325587, | |
| "grad_norm": 1.0668984651565552, | |
| "learning_rate": 4.676051245468138e-05, | |
| "loss": 0.8381, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.06514594319925213, | |
| "grad_norm": 0.6844903826713562, | |
| "learning_rate": 4.674422543836505e-05, | |
| "loss": 0.8202, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.06547167291524839, | |
| "grad_norm": 0.6987929344177246, | |
| "learning_rate": 4.672793842204871e-05, | |
| "loss": 0.844, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.06579740263124464, | |
| "grad_norm": 1.0227413177490234, | |
| "learning_rate": 4.671165140573238e-05, | |
| "loss": 0.8093, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.0661231323472409, | |
| "grad_norm": 0.5901645421981812, | |
| "learning_rate": 4.669536438941605e-05, | |
| "loss": 0.8068, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.06644886206323716, | |
| "grad_norm": 0.7951213717460632, | |
| "learning_rate": 4.667907737309972e-05, | |
| "loss": 0.8581, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.06677459177923342, | |
| "grad_norm": 0.617341160774231, | |
| "learning_rate": 4.666279035678338e-05, | |
| "loss": 0.8427, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.0671003214952297, | |
| "grad_norm": 0.694558322429657, | |
| "learning_rate": 4.6646503340467044e-05, | |
| "loss": 0.8619, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.06742605121122595, | |
| "grad_norm": 0.6441329717636108, | |
| "learning_rate": 4.663021632415072e-05, | |
| "loss": 0.8866, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.06775178092722221, | |
| "grad_norm": 0.46440285444259644, | |
| "learning_rate": 4.661392930783438e-05, | |
| "loss": 0.8435, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.06807751064321847, | |
| "grad_norm": 0.42911046743392944, | |
| "learning_rate": 4.659764229151805e-05, | |
| "loss": 0.8145, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.06840324035921473, | |
| "grad_norm": 0.7508918046951294, | |
| "learning_rate": 4.6581355275201714e-05, | |
| "loss": 0.8576, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.06872897007521099, | |
| "grad_norm": 0.6361901164054871, | |
| "learning_rate": 4.6565068258885386e-05, | |
| "loss": 0.7982, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.06905469979120725, | |
| "grad_norm": 0.804426372051239, | |
| "learning_rate": 4.654878124256905e-05, | |
| "loss": 0.8386, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.06938042950720351, | |
| "grad_norm": 0.5336636304855347, | |
| "learning_rate": 4.653249422625272e-05, | |
| "loss": 0.8296, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.06970615922319977, | |
| "grad_norm": 0.5880811810493469, | |
| "learning_rate": 4.6516207209936384e-05, | |
| "loss": 0.8065, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.07003188893919603, | |
| "grad_norm": 0.4607875347137451, | |
| "learning_rate": 4.649992019362005e-05, | |
| "loss": 0.8601, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.0703576186551923, | |
| "grad_norm": 0.6503331065177917, | |
| "learning_rate": 4.648363317730372e-05, | |
| "loss": 0.7925, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.07068334837118856, | |
| "grad_norm": 0.7841913104057312, | |
| "learning_rate": 4.646734616098739e-05, | |
| "loss": 0.8218, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.07100907808718482, | |
| "grad_norm": 0.45437848567962646, | |
| "learning_rate": 4.645105914467105e-05, | |
| "loss": 0.8663, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.07133480780318108, | |
| "grad_norm": 0.6052650213241577, | |
| "learning_rate": 4.643477212835472e-05, | |
| "loss": 0.8634, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.07166053751917734, | |
| "grad_norm": 0.5301306247711182, | |
| "learning_rate": 4.641848511203839e-05, | |
| "loss": 0.8215, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.0719862672351736, | |
| "grad_norm": 0.8724095821380615, | |
| "learning_rate": 4.640219809572206e-05, | |
| "loss": 0.8304, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.07231199695116985, | |
| "grad_norm": 0.8219661116600037, | |
| "learning_rate": 4.6385911079405716e-05, | |
| "loss": 0.8515, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.07263772666716611, | |
| "grad_norm": 0.6308414936065674, | |
| "learning_rate": 4.636962406308939e-05, | |
| "loss": 0.7233, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.07296345638316237, | |
| "grad_norm": 0.35772112011909485, | |
| "learning_rate": 4.6353337046773054e-05, | |
| "loss": 0.7792, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.07328918609915865, | |
| "grad_norm": 0.519975483417511, | |
| "learning_rate": 4.633705003045673e-05, | |
| "loss": 0.8265, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.0736149158151549, | |
| "grad_norm": 0.8935458660125732, | |
| "learning_rate": 4.6320763014140386e-05, | |
| "loss": 0.8276, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.07394064553115116, | |
| "grad_norm": 0.4765929877758026, | |
| "learning_rate": 4.630447599782406e-05, | |
| "loss": 0.8088, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.07426637524714742, | |
| "grad_norm": 0.5910876989364624, | |
| "learning_rate": 4.6288188981507724e-05, | |
| "loss": 0.8003, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.07459210496314368, | |
| "grad_norm": 0.6108260154724121, | |
| "learning_rate": 4.627190196519139e-05, | |
| "loss": 0.7949, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.07491783467913994, | |
| "grad_norm": 0.9665610194206238, | |
| "learning_rate": 4.625561494887506e-05, | |
| "loss": 0.7989, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.0752435643951362, | |
| "grad_norm": 0.43020346760749817, | |
| "learning_rate": 4.623932793255872e-05, | |
| "loss": 0.8052, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.07556929411113246, | |
| "grad_norm": 0.3901965022087097, | |
| "learning_rate": 4.6223040916242394e-05, | |
| "loss": 0.7756, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.07589502382712872, | |
| "grad_norm": 0.8132317066192627, | |
| "learning_rate": 4.620675389992606e-05, | |
| "loss": 0.797, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.07622075354312499, | |
| "grad_norm": 0.6211370825767517, | |
| "learning_rate": 4.619046688360973e-05, | |
| "loss": 0.7698, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.07654648325912125, | |
| "grad_norm": 0.8378313779830933, | |
| "learning_rate": 4.617417986729339e-05, | |
| "loss": 0.805, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.07687221297511751, | |
| "grad_norm": 0.9225132465362549, | |
| "learning_rate": 4.615789285097706e-05, | |
| "loss": 0.7999, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.07719794269111377, | |
| "grad_norm": 0.46878713369369507, | |
| "learning_rate": 4.614160583466073e-05, | |
| "loss": 0.75, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.07752367240711003, | |
| "grad_norm": 0.409138560295105, | |
| "learning_rate": 4.6125318818344395e-05, | |
| "loss": 0.7944, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.07784940212310629, | |
| "grad_norm": 0.4791303277015686, | |
| "learning_rate": 4.610903180202806e-05, | |
| "loss": 0.7912, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.07817513183910255, | |
| "grad_norm": 0.8759014010429382, | |
| "learning_rate": 4.6092744785711726e-05, | |
| "loss": 0.8198, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.0785008615550988, | |
| "grad_norm": 0.47595012187957764, | |
| "learning_rate": 4.60764577693954e-05, | |
| "loss": 0.7984, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.07882659127109506, | |
| "grad_norm": 0.7923133373260498, | |
| "learning_rate": 4.6060170753079065e-05, | |
| "loss": 0.7436, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.07915232098709134, | |
| "grad_norm": 0.39254361391067505, | |
| "learning_rate": 4.604388373676273e-05, | |
| "loss": 0.7771, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.0794780507030876, | |
| "grad_norm": 0.6828033924102783, | |
| "learning_rate": 4.6027596720446396e-05, | |
| "loss": 0.8083, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.07980378041908386, | |
| "grad_norm": 0.6189585328102112, | |
| "learning_rate": 4.601130970413006e-05, | |
| "loss": 0.7885, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.08012951013508011, | |
| "grad_norm": 0.6750975847244263, | |
| "learning_rate": 4.5995022687813734e-05, | |
| "loss": 0.759, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.08045523985107637, | |
| "grad_norm": 0.6616020798683167, | |
| "learning_rate": 4.59787356714974e-05, | |
| "loss": 0.8226, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.08078096956707263, | |
| "grad_norm": 0.7598117589950562, | |
| "learning_rate": 4.5962448655181066e-05, | |
| "loss": 0.7806, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.08110669928306889, | |
| "grad_norm": 0.41183263063430786, | |
| "learning_rate": 4.594616163886473e-05, | |
| "loss": 0.7939, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.08143242899906515, | |
| "grad_norm": 0.40911582112312317, | |
| "learning_rate": 4.59298746225484e-05, | |
| "loss": 0.7635, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.08175815871506141, | |
| "grad_norm": 0.8820083737373352, | |
| "learning_rate": 4.591358760623207e-05, | |
| "loss": 0.7886, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.08208388843105768, | |
| "grad_norm": 0.9055482745170593, | |
| "learning_rate": 4.589730058991573e-05, | |
| "loss": 0.7487, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.08240961814705394, | |
| "grad_norm": 0.5680561065673828, | |
| "learning_rate": 4.58810135735994e-05, | |
| "loss": 0.7505, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.0827353478630502, | |
| "grad_norm": 0.5064377188682556, | |
| "learning_rate": 4.586472655728307e-05, | |
| "loss": 0.768, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.08306107757904646, | |
| "grad_norm": 0.462200403213501, | |
| "learning_rate": 4.584843954096674e-05, | |
| "loss": 0.7399, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.08338680729504272, | |
| "grad_norm": 0.7820500731468201, | |
| "learning_rate": 4.58321525246504e-05, | |
| "loss": 0.8109, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.08371253701103898, | |
| "grad_norm": 0.4833464026451111, | |
| "learning_rate": 4.5815865508334064e-05, | |
| "loss": 0.764, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.08403826672703524, | |
| "grad_norm": 0.3821680247783661, | |
| "learning_rate": 4.5799578492017737e-05, | |
| "loss": 0.7397, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.0843639964430315, | |
| "grad_norm": 0.5084909200668335, | |
| "learning_rate": 4.57832914757014e-05, | |
| "loss": 0.7428, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.08468972615902776, | |
| "grad_norm": 0.925619900226593, | |
| "learning_rate": 4.576700445938507e-05, | |
| "loss": 0.7386, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.08501545587502403, | |
| "grad_norm": 0.8126088380813599, | |
| "learning_rate": 4.5750717443068734e-05, | |
| "loss": 0.7798, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.08534118559102029, | |
| "grad_norm": 1.0178046226501465, | |
| "learning_rate": 4.5734430426752406e-05, | |
| "loss": 0.7796, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.08566691530701655, | |
| "grad_norm": 0.4879295229911804, | |
| "learning_rate": 4.571814341043607e-05, | |
| "loss": 0.7762, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.0859926450230128, | |
| "grad_norm": 0.6722548604011536, | |
| "learning_rate": 4.570185639411974e-05, | |
| "loss": 0.7234, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.08631837473900907, | |
| "grad_norm": 0.6326486468315125, | |
| "learning_rate": 4.5685569377803403e-05, | |
| "loss": 0.72, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.08664410445500532, | |
| "grad_norm": 0.4354076087474823, | |
| "learning_rate": 4.566928236148707e-05, | |
| "loss": 0.7704, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.08696983417100158, | |
| "grad_norm": 0.7113054394721985, | |
| "learning_rate": 4.565299534517074e-05, | |
| "loss": 0.7623, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.08729556388699784, | |
| "grad_norm": 0.595664381980896, | |
| "learning_rate": 4.563670832885441e-05, | |
| "loss": 0.765, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.0876212936029941, | |
| "grad_norm": 0.5344740152359009, | |
| "learning_rate": 4.562042131253807e-05, | |
| "loss": 0.7201, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.08794702331899037, | |
| "grad_norm": 0.5330939292907715, | |
| "learning_rate": 4.560413429622174e-05, | |
| "loss": 0.7617, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.08827275303498663, | |
| "grad_norm": 0.45265939831733704, | |
| "learning_rate": 4.5587847279905405e-05, | |
| "loss": 0.7806, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.0885984827509829, | |
| "grad_norm": 0.5947338342666626, | |
| "learning_rate": 4.557156026358908e-05, | |
| "loss": 0.7524, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.08892421246697915, | |
| "grad_norm": 0.8656592965126038, | |
| "learning_rate": 4.555527324727274e-05, | |
| "loss": 0.7599, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.08924994218297541, | |
| "grad_norm": 0.645728349685669, | |
| "learning_rate": 4.553898623095641e-05, | |
| "loss": 0.7629, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.08957567189897167, | |
| "grad_norm": 0.8474392890930176, | |
| "learning_rate": 4.5522699214640074e-05, | |
| "loss": 0.7641, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.08990140161496793, | |
| "grad_norm": 0.7386724948883057, | |
| "learning_rate": 4.550641219832375e-05, | |
| "loss": 0.7523, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.09022713133096419, | |
| "grad_norm": 0.9216130971908569, | |
| "learning_rate": 4.549012518200741e-05, | |
| "loss": 0.7562, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.09055286104696045, | |
| "grad_norm": 0.8789349794387817, | |
| "learning_rate": 4.547383816569107e-05, | |
| "loss": 0.7229, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.0908785907629567, | |
| "grad_norm": 0.582091748714447, | |
| "learning_rate": 4.5457551149374744e-05, | |
| "loss": 0.7274, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.09120432047895298, | |
| "grad_norm": 0.6011328101158142, | |
| "learning_rate": 4.544126413305841e-05, | |
| "loss": 0.7297, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.09153005019494924, | |
| "grad_norm": 0.6041598916053772, | |
| "learning_rate": 4.542497711674208e-05, | |
| "loss": 0.7409, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.0918557799109455, | |
| "grad_norm": 0.7190874814987183, | |
| "learning_rate": 4.540869010042574e-05, | |
| "loss": 0.7149, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.09218150962694176, | |
| "grad_norm": 0.5705780982971191, | |
| "learning_rate": 4.5392403084109414e-05, | |
| "loss": 0.76, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.09250723934293802, | |
| "grad_norm": 0.7988401651382446, | |
| "learning_rate": 4.537611606779308e-05, | |
| "loss": 0.7594, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.09283296905893428, | |
| "grad_norm": 0.48971208930015564, | |
| "learning_rate": 4.5359829051476745e-05, | |
| "loss": 0.7505, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.09315869877493053, | |
| "grad_norm": 0.6600379347801208, | |
| "learning_rate": 4.534354203516041e-05, | |
| "loss": 0.7902, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.0934844284909268, | |
| "grad_norm": 0.6095920205116272, | |
| "learning_rate": 4.5327255018844077e-05, | |
| "loss": 0.7166, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.09381015820692305, | |
| "grad_norm": 0.6808424592018127, | |
| "learning_rate": 4.531096800252775e-05, | |
| "loss": 0.7148, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.09413588792291933, | |
| "grad_norm": 0.9923068284988403, | |
| "learning_rate": 4.5294680986211415e-05, | |
| "loss": 0.7226, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.09446161763891558, | |
| "grad_norm": 0.8952274918556213, | |
| "learning_rate": 4.527839396989508e-05, | |
| "loss": 0.7645, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.09478734735491184, | |
| "grad_norm": 0.7416999936103821, | |
| "learning_rate": 4.5262106953578746e-05, | |
| "loss": 0.7503, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.0951130770709081, | |
| "grad_norm": 0.7862002849578857, | |
| "learning_rate": 4.524581993726242e-05, | |
| "loss": 0.7469, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.09543880678690436, | |
| "grad_norm": 0.6296769380569458, | |
| "learning_rate": 4.5229532920946085e-05, | |
| "loss": 0.6873, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.09576453650290062, | |
| "grad_norm": 0.9056894779205322, | |
| "learning_rate": 4.521324590462975e-05, | |
| "loss": 0.7126, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.09609026621889688, | |
| "grad_norm": 0.624724268913269, | |
| "learning_rate": 4.5196958888313416e-05, | |
| "loss": 0.7668, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.09641599593489314, | |
| "grad_norm": 0.680957555770874, | |
| "learning_rate": 4.518067187199708e-05, | |
| "loss": 0.7783, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.0967417256508894, | |
| "grad_norm": 0.5778472423553467, | |
| "learning_rate": 4.5164384855680754e-05, | |
| "loss": 0.7355, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.09706745536688567, | |
| "grad_norm": 0.6346442699432373, | |
| "learning_rate": 4.514809783936442e-05, | |
| "loss": 0.7276, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.09739318508288193, | |
| "grad_norm": 0.9289300441741943, | |
| "learning_rate": 4.5131810823048086e-05, | |
| "loss": 0.7179, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.09771891479887819, | |
| "grad_norm": 0.7473464012145996, | |
| "learning_rate": 4.511552380673175e-05, | |
| "loss": 0.7172, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.09804464451487445, | |
| "grad_norm": 0.6801792979240417, | |
| "learning_rate": 4.509923679041542e-05, | |
| "loss": 0.7074, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.09837037423087071, | |
| "grad_norm": 0.6129624247550964, | |
| "learning_rate": 4.508294977409909e-05, | |
| "loss": 0.7166, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.09869610394686697, | |
| "grad_norm": 0.8195613026618958, | |
| "learning_rate": 4.506666275778275e-05, | |
| "loss": 0.7709, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.09902183366286323, | |
| "grad_norm": 0.4703550934791565, | |
| "learning_rate": 4.505037574146642e-05, | |
| "loss": 0.7037, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.09934756337885949, | |
| "grad_norm": 0.7674877047538757, | |
| "learning_rate": 4.503408872515009e-05, | |
| "loss": 0.7202, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.09967329309485574, | |
| "grad_norm": 0.8670388460159302, | |
| "learning_rate": 4.501780170883376e-05, | |
| "loss": 0.7183, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.09999902281085202, | |
| "grad_norm": 0.280652791261673, | |
| "learning_rate": 4.500151469251742e-05, | |
| "loss": 0.6998, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.10032475252684828, | |
| "grad_norm": 0.7346746325492859, | |
| "learning_rate": 4.4985227676201084e-05, | |
| "loss": 0.7358, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.10065048224284454, | |
| "grad_norm": 0.978670060634613, | |
| "learning_rate": 4.4968940659884756e-05, | |
| "loss": 0.7259, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.1009762119588408, | |
| "grad_norm": 0.5910704135894775, | |
| "learning_rate": 4.495265364356842e-05, | |
| "loss": 0.7074, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.10130194167483705, | |
| "grad_norm": 0.7966532707214355, | |
| "learning_rate": 4.493636662725209e-05, | |
| "loss": 0.7117, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.10162767139083331, | |
| "grad_norm": 0.9344640374183655, | |
| "learning_rate": 4.4920079610935754e-05, | |
| "loss": 0.7349, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.10195340110682957, | |
| "grad_norm": 0.8043787479400635, | |
| "learning_rate": 4.4903792594619426e-05, | |
| "loss": 0.7361, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.10227913082282583, | |
| "grad_norm": 0.6786687970161438, | |
| "learning_rate": 4.488750557830309e-05, | |
| "loss": 0.6969, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.10260486053882209, | |
| "grad_norm": 0.4679253399372101, | |
| "learning_rate": 4.487121856198676e-05, | |
| "loss": 0.7157, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.10293059025481836, | |
| "grad_norm": 0.5903817415237427, | |
| "learning_rate": 4.485493154567042e-05, | |
| "loss": 0.7352, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.10325631997081462, | |
| "grad_norm": 0.715834379196167, | |
| "learning_rate": 4.483864452935409e-05, | |
| "loss": 0.7532, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.10358204968681088, | |
| "grad_norm": 0.6664106249809265, | |
| "learning_rate": 4.482235751303776e-05, | |
| "loss": 0.6853, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.10390777940280714, | |
| "grad_norm": 0.700243353843689, | |
| "learning_rate": 4.480607049672143e-05, | |
| "loss": 0.6835, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.1042335091188034, | |
| "grad_norm": 0.7481942772865295, | |
| "learning_rate": 4.478978348040509e-05, | |
| "loss": 0.7343, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.10455923883479966, | |
| "grad_norm": 0.5347774028778076, | |
| "learning_rate": 4.477349646408876e-05, | |
| "loss": 0.6688, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.10488496855079592, | |
| "grad_norm": 0.541346549987793, | |
| "learning_rate": 4.4757209447772425e-05, | |
| "loss": 0.7088, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.10521069826679218, | |
| "grad_norm": 0.6126936674118042, | |
| "learning_rate": 4.47409224314561e-05, | |
| "loss": 0.7333, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.10553642798278844, | |
| "grad_norm": 0.952684760093689, | |
| "learning_rate": 4.472463541513976e-05, | |
| "loss": 0.7242, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.10586215769878471, | |
| "grad_norm": 0.72658771276474, | |
| "learning_rate": 4.470834839882343e-05, | |
| "loss": 0.7422, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.10618788741478097, | |
| "grad_norm": 0.5741873383522034, | |
| "learning_rate": 4.4692061382507094e-05, | |
| "loss": 0.7307, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.10651361713077723, | |
| "grad_norm": 0.646496057510376, | |
| "learning_rate": 4.467577436619077e-05, | |
| "loss": 0.7138, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.10683934684677349, | |
| "grad_norm": 0.40007448196411133, | |
| "learning_rate": 4.465948734987443e-05, | |
| "loss": 0.7045, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.10716507656276975, | |
| "grad_norm": 0.6594932675361633, | |
| "learning_rate": 4.464320033355809e-05, | |
| "loss": 0.6874, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.107490806278766, | |
| "grad_norm": 0.7663995623588562, | |
| "learning_rate": 4.4626913317241764e-05, | |
| "loss": 0.7303, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.10781653599476226, | |
| "grad_norm": 0.5867152810096741, | |
| "learning_rate": 4.461062630092543e-05, | |
| "loss": 0.7072, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.10814226571075852, | |
| "grad_norm": 0.5017038583755493, | |
| "learning_rate": 4.45943392846091e-05, | |
| "loss": 0.6879, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.10846799542675478, | |
| "grad_norm": 0.6196131110191345, | |
| "learning_rate": 4.457805226829276e-05, | |
| "loss": 0.7094, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.10879372514275105, | |
| "grad_norm": 0.643118679523468, | |
| "learning_rate": 4.4561765251976434e-05, | |
| "loss": 0.6763, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.10911945485874731, | |
| "grad_norm": 0.516583263874054, | |
| "learning_rate": 4.45454782356601e-05, | |
| "loss": 0.6744, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.10944518457474357, | |
| "grad_norm": 0.6565887928009033, | |
| "learning_rate": 4.4529191219343765e-05, | |
| "loss": 0.6818, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.10977091429073983, | |
| "grad_norm": 0.644209623336792, | |
| "learning_rate": 4.451290420302743e-05, | |
| "loss": 0.6795, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.11009664400673609, | |
| "grad_norm": 0.5720322132110596, | |
| "learning_rate": 4.4496617186711096e-05, | |
| "loss": 0.6444, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.11042237372273235, | |
| "grad_norm": 0.7580476999282837, | |
| "learning_rate": 4.448033017039477e-05, | |
| "loss": 0.7067, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.11074810343872861, | |
| "grad_norm": 0.3334468603134155, | |
| "learning_rate": 4.4464043154078435e-05, | |
| "loss": 0.7245, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.11107383315472487, | |
| "grad_norm": 0.7232679724693298, | |
| "learning_rate": 4.44477561377621e-05, | |
| "loss": 0.6476, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.11139956287072113, | |
| "grad_norm": 0.49447712302207947, | |
| "learning_rate": 4.4431469121445766e-05, | |
| "loss": 0.6813, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.11172529258671739, | |
| "grad_norm": 0.9112755656242371, | |
| "learning_rate": 4.441518210512943e-05, | |
| "loss": 0.7039, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.11205102230271366, | |
| "grad_norm": 0.9391865134239197, | |
| "learning_rate": 4.4398895088813104e-05, | |
| "loss": 0.7154, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.11237675201870992, | |
| "grad_norm": 0.6869890689849854, | |
| "learning_rate": 4.438260807249677e-05, | |
| "loss": 0.7462, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.11270248173470618, | |
| "grad_norm": 0.6954273581504822, | |
| "learning_rate": 4.4366321056180436e-05, | |
| "loss": 0.7151, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.11302821145070244, | |
| "grad_norm": 0.8512132167816162, | |
| "learning_rate": 4.43500340398641e-05, | |
| "loss": 0.7157, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.1133539411666987, | |
| "grad_norm": 0.7044045329093933, | |
| "learning_rate": 4.4333747023547774e-05, | |
| "loss": 0.6649, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.11367967088269496, | |
| "grad_norm": 0.6773298978805542, | |
| "learning_rate": 4.431746000723144e-05, | |
| "loss": 0.6137, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.11400540059869121, | |
| "grad_norm": 0.544491171836853, | |
| "learning_rate": 4.43011729909151e-05, | |
| "loss": 0.6577, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.11433113031468747, | |
| "grad_norm": 0.543596625328064, | |
| "learning_rate": 4.428488597459877e-05, | |
| "loss": 0.6699, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.11465686003068373, | |
| "grad_norm": 0.7878594398498535, | |
| "learning_rate": 4.426859895828244e-05, | |
| "loss": 0.709, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.11498258974668, | |
| "grad_norm": 0.8226998448371887, | |
| "learning_rate": 4.425231194196611e-05, | |
| "loss": 0.6954, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.11530831946267626, | |
| "grad_norm": 0.48608875274658203, | |
| "learning_rate": 4.423602492564977e-05, | |
| "loss": 0.7502, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.11563404917867252, | |
| "grad_norm": 0.6490182280540466, | |
| "learning_rate": 4.421973790933344e-05, | |
| "loss": 0.7085, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.11595977889466878, | |
| "grad_norm": 0.3032003343105316, | |
| "learning_rate": 4.420345089301711e-05, | |
| "loss": 0.6778, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.11628550861066504, | |
| "grad_norm": 0.7003344297409058, | |
| "learning_rate": 4.418716387670077e-05, | |
| "loss": 0.71, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.1166112383266613, | |
| "grad_norm": 0.6569785475730896, | |
| "learning_rate": 4.417087686038444e-05, | |
| "loss": 0.653, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.11693696804265756, | |
| "grad_norm": 0.5428867936134338, | |
| "learning_rate": 4.4154589844068104e-05, | |
| "loss": 0.6733, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.11726269775865382, | |
| "grad_norm": 0.6179760098457336, | |
| "learning_rate": 4.4138302827751776e-05, | |
| "loss": 0.7081, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.11758842747465008, | |
| "grad_norm": 0.7397803068161011, | |
| "learning_rate": 4.412201581143544e-05, | |
| "loss": 0.6894, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.11791415719064635, | |
| "grad_norm": 0.725395679473877, | |
| "learning_rate": 4.410572879511911e-05, | |
| "loss": 0.6874, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.11823988690664261, | |
| "grad_norm": 0.45658519864082336, | |
| "learning_rate": 4.4089441778802774e-05, | |
| "loss": 0.6821, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.11856561662263887, | |
| "grad_norm": 0.9002487063407898, | |
| "learning_rate": 4.407315476248644e-05, | |
| "loss": 0.641, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.11889134633863513, | |
| "grad_norm": 0.8738647103309631, | |
| "learning_rate": 4.405686774617011e-05, | |
| "loss": 0.6763, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.11921707605463139, | |
| "grad_norm": 1.0051002502441406, | |
| "learning_rate": 4.404058072985378e-05, | |
| "loss": 0.6775, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.11954280577062765, | |
| "grad_norm": 0.8074469566345215, | |
| "learning_rate": 4.402429371353744e-05, | |
| "loss": 0.7408, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.1198685354866239, | |
| "grad_norm": 0.485388845205307, | |
| "learning_rate": 4.400800669722111e-05, | |
| "loss": 0.6729, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.12019426520262017, | |
| "grad_norm": 0.7123886942863464, | |
| "learning_rate": 4.399171968090478e-05, | |
| "loss": 0.661, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.12051999491861642, | |
| "grad_norm": 0.4587586522102356, | |
| "learning_rate": 4.397543266458845e-05, | |
| "loss": 0.6662, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.1208457246346127, | |
| "grad_norm": 0.7726449966430664, | |
| "learning_rate": 4.395914564827211e-05, | |
| "loss": 0.7469, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.12117145435060896, | |
| "grad_norm": 0.8636273741722107, | |
| "learning_rate": 4.394285863195578e-05, | |
| "loss": 0.6669, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.12149718406660522, | |
| "grad_norm": 0.6817033886909485, | |
| "learning_rate": 4.3926571615639444e-05, | |
| "loss": 0.6874, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.12182291378260147, | |
| "grad_norm": 0.5549355149269104, | |
| "learning_rate": 4.391028459932312e-05, | |
| "loss": 0.6939, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.12214864349859773, | |
| "grad_norm": 0.6180316805839539, | |
| "learning_rate": 4.389399758300678e-05, | |
| "loss": 0.6299, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.12247437321459399, | |
| "grad_norm": 0.7779985070228577, | |
| "learning_rate": 4.387771056669045e-05, | |
| "loss": 0.7181, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.12280010293059025, | |
| "grad_norm": 0.7182669043540955, | |
| "learning_rate": 4.3861423550374114e-05, | |
| "loss": 0.6703, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.12312583264658651, | |
| "grad_norm": 0.7191387414932251, | |
| "learning_rate": 4.3845136534057787e-05, | |
| "loss": 0.6802, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.12345156236258277, | |
| "grad_norm": 0.6137369275093079, | |
| "learning_rate": 4.382884951774145e-05, | |
| "loss": 0.7028, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.12377729207857904, | |
| "grad_norm": 0.7508791089057922, | |
| "learning_rate": 4.381256250142511e-05, | |
| "loss": 0.642, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.1241030217945753, | |
| "grad_norm": 0.6414891481399536, | |
| "learning_rate": 4.3796275485108784e-05, | |
| "loss": 0.6255, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.12442875151057156, | |
| "grad_norm": 0.6669697165489197, | |
| "learning_rate": 4.377998846879245e-05, | |
| "loss": 0.6691, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.12475448122656782, | |
| "grad_norm": 0.8991898894309998, | |
| "learning_rate": 4.376370145247612e-05, | |
| "loss": 0.6727, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.12508021094256408, | |
| "grad_norm": 0.4924679398536682, | |
| "learning_rate": 4.374741443615978e-05, | |
| "loss": 0.6661, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.12540594065856034, | |
| "grad_norm": 0.3712103068828583, | |
| "learning_rate": 4.3731127419843453e-05, | |
| "loss": 0.7306, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.1257316703745566, | |
| "grad_norm": 0.9136518836021423, | |
| "learning_rate": 4.371484040352712e-05, | |
| "loss": 0.6453, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.12605740009055286, | |
| "grad_norm": 0.6828204393386841, | |
| "learning_rate": 4.3698553387210785e-05, | |
| "loss": 0.6587, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.12638312980654912, | |
| "grad_norm": 0.6366333961486816, | |
| "learning_rate": 4.368226637089445e-05, | |
| "loss": 0.6606, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.12670885952254538, | |
| "grad_norm": 0.39375558495521545, | |
| "learning_rate": 4.3665979354578116e-05, | |
| "loss": 0.6937, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.12703458923854163, | |
| "grad_norm": 0.46293410658836365, | |
| "learning_rate": 4.364969233826179e-05, | |
| "loss": 0.6504, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.1273603189545379, | |
| "grad_norm": 0.9897958040237427, | |
| "learning_rate": 4.3633405321945455e-05, | |
| "loss": 0.7126, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.12768604867053415, | |
| "grad_norm": 0.5616987347602844, | |
| "learning_rate": 4.361711830562912e-05, | |
| "loss": 0.5956, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.1280117783865304, | |
| "grad_norm": 0.4081191122531891, | |
| "learning_rate": 4.3600831289312786e-05, | |
| "loss": 0.6648, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.1283375081025267, | |
| "grad_norm": 0.485188364982605, | |
| "learning_rate": 4.358454427299645e-05, | |
| "loss": 0.6694, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.12866323781852296, | |
| "grad_norm": 0.7212422490119934, | |
| "learning_rate": 4.3568257256680124e-05, | |
| "loss": 0.6767, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.12898896753451922, | |
| "grad_norm": 0.5502139925956726, | |
| "learning_rate": 4.355197024036379e-05, | |
| "loss": 0.6721, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.12931469725051548, | |
| "grad_norm": 0.49975594878196716, | |
| "learning_rate": 4.3535683224047456e-05, | |
| "loss": 0.6669, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.12964042696651173, | |
| "grad_norm": 0.4203544557094574, | |
| "learning_rate": 4.351939620773112e-05, | |
| "loss": 0.6716, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.129966156682508, | |
| "grad_norm": 0.5464275479316711, | |
| "learning_rate": 4.3503109191414794e-05, | |
| "loss": 0.6544, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.13029188639850425, | |
| "grad_norm": 0.6473097801208496, | |
| "learning_rate": 4.348682217509846e-05, | |
| "loss": 0.6977, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.1306176161145005, | |
| "grad_norm": 0.39890334010124207, | |
| "learning_rate": 4.347053515878212e-05, | |
| "loss": 0.6704, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.13094334583049677, | |
| "grad_norm": 1.0785876512527466, | |
| "learning_rate": 4.345424814246579e-05, | |
| "loss": 0.6196, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.13126907554649303, | |
| "grad_norm": 0.6607077121734619, | |
| "learning_rate": 4.343796112614946e-05, | |
| "loss": 0.6608, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.1315948052624893, | |
| "grad_norm": 0.5987501740455627, | |
| "learning_rate": 4.342167410983313e-05, | |
| "loss": 0.6334, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.13192053497848555, | |
| "grad_norm": 0.3443163335323334, | |
| "learning_rate": 4.340538709351679e-05, | |
| "loss": 0.6621, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.1322462646944818, | |
| "grad_norm": 0.9362694025039673, | |
| "learning_rate": 4.338910007720046e-05, | |
| "loss": 0.6404, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.13257199441047807, | |
| "grad_norm": 0.5049243569374084, | |
| "learning_rate": 4.3372813060884127e-05, | |
| "loss": 0.6426, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.13289772412647433, | |
| "grad_norm": 0.787389874458313, | |
| "learning_rate": 4.335652604456779e-05, | |
| "loss": 0.6432, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.13322345384247058, | |
| "grad_norm": 0.8065658211708069, | |
| "learning_rate": 4.334023902825146e-05, | |
| "loss": 0.6477, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.13354918355846684, | |
| "grad_norm": 0.5166397094726562, | |
| "learning_rate": 4.3323952011935124e-05, | |
| "loss": 0.6384, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.1338749132744631, | |
| "grad_norm": 0.9597229957580566, | |
| "learning_rate": 4.3307664995618796e-05, | |
| "loss": 0.6832, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.1342006429904594, | |
| "grad_norm": 0.5936517715454102, | |
| "learning_rate": 4.329137797930246e-05, | |
| "loss": 0.6767, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.13452637270645565, | |
| "grad_norm": 0.8391766548156738, | |
| "learning_rate": 4.3275090962986135e-05, | |
| "loss": 0.6215, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.1348521024224519, | |
| "grad_norm": 0.977497398853302, | |
| "learning_rate": 4.3258803946669793e-05, | |
| "loss": 0.6307, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.13517783213844817, | |
| "grad_norm": 0.6750873923301697, | |
| "learning_rate": 4.324251693035346e-05, | |
| "loss": 0.631, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.13550356185444443, | |
| "grad_norm": 0.4655423164367676, | |
| "learning_rate": 4.322622991403713e-05, | |
| "loss": 0.7025, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.13582929157044069, | |
| "grad_norm": 0.43544334173202515, | |
| "learning_rate": 4.32099428977208e-05, | |
| "loss": 0.6555, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.13615502128643694, | |
| "grad_norm": 0.7595189213752747, | |
| "learning_rate": 4.319365588140446e-05, | |
| "loss": 0.6197, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.1364807510024332, | |
| "grad_norm": 0.4422534108161926, | |
| "learning_rate": 4.317736886508813e-05, | |
| "loss": 0.5798, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.13680648071842946, | |
| "grad_norm": 0.4622032344341278, | |
| "learning_rate": 4.31610818487718e-05, | |
| "loss": 0.6493, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.13713221043442572, | |
| "grad_norm": 0.7267939448356628, | |
| "learning_rate": 4.314479483245547e-05, | |
| "loss": 0.6228, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.13745794015042198, | |
| "grad_norm": 0.66838139295578, | |
| "learning_rate": 4.312850781613913e-05, | |
| "loss": 0.6507, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.13778366986641824, | |
| "grad_norm": 0.40865644812583923, | |
| "learning_rate": 4.31122207998228e-05, | |
| "loss": 0.6388, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.1381093995824145, | |
| "grad_norm": 0.7203364968299866, | |
| "learning_rate": 4.3095933783506464e-05, | |
| "loss": 0.589, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.13843512929841076, | |
| "grad_norm": 0.7719990015029907, | |
| "learning_rate": 4.307964676719014e-05, | |
| "loss": 0.6446, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.13876085901440702, | |
| "grad_norm": 0.35780540108680725, | |
| "learning_rate": 4.30633597508738e-05, | |
| "loss": 0.683, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.13908658873040328, | |
| "grad_norm": 0.5952534675598145, | |
| "learning_rate": 4.304707273455747e-05, | |
| "loss": 0.6697, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.13941231844639954, | |
| "grad_norm": 0.539117157459259, | |
| "learning_rate": 4.3030785718241134e-05, | |
| "loss": 0.6582, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.1397380481623958, | |
| "grad_norm": 0.8181525468826294, | |
| "learning_rate": 4.30144987019248e-05, | |
| "loss": 0.6695, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.14006377787839205, | |
| "grad_norm": 0.8720047473907471, | |
| "learning_rate": 4.299821168560847e-05, | |
| "loss": 0.5931, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.14038950759438834, | |
| "grad_norm": 0.9138098955154419, | |
| "learning_rate": 4.298192466929213e-05, | |
| "loss": 0.6874, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.1407152373103846, | |
| "grad_norm": 0.8015493750572205, | |
| "learning_rate": 4.2965637652975804e-05, | |
| "loss": 0.6574, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.14104096702638086, | |
| "grad_norm": 0.8426867723464966, | |
| "learning_rate": 4.294935063665947e-05, | |
| "loss": 0.6662, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.14136669674237712, | |
| "grad_norm": 0.3480939567089081, | |
| "learning_rate": 4.293306362034314e-05, | |
| "loss": 0.6351, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.14169242645837338, | |
| "grad_norm": 0.5666735172271729, | |
| "learning_rate": 4.29167766040268e-05, | |
| "loss": 0.641, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.14201815617436964, | |
| "grad_norm": 0.9445961117744446, | |
| "learning_rate": 4.2900489587710467e-05, | |
| "loss": 0.6608, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.1423438858903659, | |
| "grad_norm": 0.7916907072067261, | |
| "learning_rate": 4.288420257139414e-05, | |
| "loss": 0.6615, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.14266961560636215, | |
| "grad_norm": 0.9159532785415649, | |
| "learning_rate": 4.2867915555077805e-05, | |
| "loss": 0.5919, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.1429953453223584, | |
| "grad_norm": 0.5766249895095825, | |
| "learning_rate": 4.285162853876147e-05, | |
| "loss": 0.6724, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.14332107503835467, | |
| "grad_norm": 0.753519594669342, | |
| "learning_rate": 4.2835341522445136e-05, | |
| "loss": 0.6995, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.14364680475435093, | |
| "grad_norm": 1.1004271507263184, | |
| "learning_rate": 4.281905450612881e-05, | |
| "loss": 0.6636, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.1439725344703472, | |
| "grad_norm": 0.7064334154129028, | |
| "learning_rate": 4.2802767489812475e-05, | |
| "loss": 0.6793, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.14429826418634345, | |
| "grad_norm": 0.5158839225769043, | |
| "learning_rate": 4.278648047349614e-05, | |
| "loss": 0.6336, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.1446239939023397, | |
| "grad_norm": 1.0451433658599854, | |
| "learning_rate": 4.2770193457179806e-05, | |
| "loss": 0.6227, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.14494972361833597, | |
| "grad_norm": 0.5956864356994629, | |
| "learning_rate": 4.275390644086347e-05, | |
| "loss": 0.6517, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.14527545333433223, | |
| "grad_norm": 0.9525729417800903, | |
| "learning_rate": 4.2737619424547144e-05, | |
| "loss": 0.6245, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.1456011830503285, | |
| "grad_norm": 0.7456961274147034, | |
| "learning_rate": 4.272133240823081e-05, | |
| "loss": 0.6577, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.14592691276632475, | |
| "grad_norm": 0.5686585307121277, | |
| "learning_rate": 4.2705045391914476e-05, | |
| "loss": 0.6675, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.14625264248232103, | |
| "grad_norm": 0.5127500295639038, | |
| "learning_rate": 4.268875837559814e-05, | |
| "loss": 0.5966, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.1465783721983173, | |
| "grad_norm": 0.6099263429641724, | |
| "learning_rate": 4.267247135928181e-05, | |
| "loss": 0.6259, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.14690410191431355, | |
| "grad_norm": 0.5734119415283203, | |
| "learning_rate": 4.265618434296548e-05, | |
| "loss": 0.6251, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.1472298316303098, | |
| "grad_norm": 0.40758875012397766, | |
| "learning_rate": 4.263989732664914e-05, | |
| "loss": 0.5856, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.14755556134630607, | |
| "grad_norm": 0.5974459052085876, | |
| "learning_rate": 4.262361031033281e-05, | |
| "loss": 0.6443, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.14788129106230233, | |
| "grad_norm": 0.48085859417915344, | |
| "learning_rate": 4.260732329401648e-05, | |
| "loss": 0.6612, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.1482070207782986, | |
| "grad_norm": 0.5771530270576477, | |
| "learning_rate": 4.259103627770015e-05, | |
| "loss": 0.6272, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.14853275049429485, | |
| "grad_norm": 0.8463455438613892, | |
| "learning_rate": 4.2574749261383815e-05, | |
| "loss": 0.6008, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.1488584802102911, | |
| "grad_norm": 0.7014292478561401, | |
| "learning_rate": 4.255846224506748e-05, | |
| "loss": 0.5353, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.14918420992628736, | |
| "grad_norm": 0.6181588768959045, | |
| "learning_rate": 4.2542175228751146e-05, | |
| "loss": 0.6139, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.14950993964228362, | |
| "grad_norm": 0.6540141701698303, | |
| "learning_rate": 4.252588821243481e-05, | |
| "loss": 0.5997, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.14983566935827988, | |
| "grad_norm": 0.47981733083724976, | |
| "learning_rate": 4.2509601196118485e-05, | |
| "loss": 0.6511, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.15016139907427614, | |
| "grad_norm": 0.964857816696167, | |
| "learning_rate": 4.2493314179802144e-05, | |
| "loss": 0.6365, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.1504871287902724, | |
| "grad_norm": 0.6706714034080505, | |
| "learning_rate": 4.2477027163485816e-05, | |
| "loss": 0.664, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.15081285850626866, | |
| "grad_norm": 0.5073367953300476, | |
| "learning_rate": 4.246074014716948e-05, | |
| "loss": 0.5633, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.15113858822226492, | |
| "grad_norm": 0.37114378809928894, | |
| "learning_rate": 4.2444453130853154e-05, | |
| "loss": 0.6498, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.15146431793826118, | |
| "grad_norm": 1.153325080871582, | |
| "learning_rate": 4.242816611453681e-05, | |
| "loss": 0.6254, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.15179004765425744, | |
| "grad_norm": 0.7353873252868652, | |
| "learning_rate": 4.241187909822048e-05, | |
| "loss": 0.6573, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.15211577737025372, | |
| "grad_norm": 0.5379579067230225, | |
| "learning_rate": 4.239559208190415e-05, | |
| "loss": 0.6642, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.15244150708624998, | |
| "grad_norm": 0.341907799243927, | |
| "learning_rate": 4.237930506558782e-05, | |
| "loss": 0.6294, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.15276723680224624, | |
| "grad_norm": 0.3866462707519531, | |
| "learning_rate": 4.236301804927148e-05, | |
| "loss": 0.6212, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.1530929665182425, | |
| "grad_norm": 0.6686252951622009, | |
| "learning_rate": 4.234673103295515e-05, | |
| "loss": 0.64, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.15341869623423876, | |
| "grad_norm": 0.6398385167121887, | |
| "learning_rate": 4.233044401663882e-05, | |
| "loss": 0.6156, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.15374442595023502, | |
| "grad_norm": 0.8679475784301758, | |
| "learning_rate": 4.231415700032249e-05, | |
| "loss": 0.6492, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.15407015566623128, | |
| "grad_norm": 0.6425623297691345, | |
| "learning_rate": 4.229786998400615e-05, | |
| "loss": 0.6661, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.15439588538222754, | |
| "grad_norm": 0.7811526656150818, | |
| "learning_rate": 4.228158296768982e-05, | |
| "loss": 0.6416, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.1547216150982238, | |
| "grad_norm": 0.6820793747901917, | |
| "learning_rate": 4.2265295951373484e-05, | |
| "loss": 0.6426, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.15504734481422006, | |
| "grad_norm": 0.8748511672019958, | |
| "learning_rate": 4.224900893505716e-05, | |
| "loss": 0.6038, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.15537307453021632, | |
| "grad_norm": 0.6828723549842834, | |
| "learning_rate": 4.223272191874082e-05, | |
| "loss": 0.6408, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.15569880424621257, | |
| "grad_norm": 1.01051926612854, | |
| "learning_rate": 4.221643490242449e-05, | |
| "loss": 0.6218, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.15602453396220883, | |
| "grad_norm": 0.6920143961906433, | |
| "learning_rate": 4.2200147886108154e-05, | |
| "loss": 0.63, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.1563502636782051, | |
| "grad_norm": 0.6410394310951233, | |
| "learning_rate": 4.218386086979182e-05, | |
| "loss": 0.6176, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.15667599339420135, | |
| "grad_norm": 0.5157743692398071, | |
| "learning_rate": 4.216757385347549e-05, | |
| "loss": 0.5947, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.1570017231101976, | |
| "grad_norm": 0.6770983934402466, | |
| "learning_rate": 4.215128683715915e-05, | |
| "loss": 0.6192, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.15732745282619387, | |
| "grad_norm": 0.49714550375938416, | |
| "learning_rate": 4.2134999820842824e-05, | |
| "loss": 0.6121, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.15765318254219013, | |
| "grad_norm": 0.3486001789569855, | |
| "learning_rate": 4.211871280452649e-05, | |
| "loss": 0.5821, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.15797891225818642, | |
| "grad_norm": 0.4202999770641327, | |
| "learning_rate": 4.210242578821016e-05, | |
| "loss": 0.5909, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.15830464197418267, | |
| "grad_norm": 0.44769522547721863, | |
| "learning_rate": 4.208613877189382e-05, | |
| "loss": 0.6369, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.15863037169017893, | |
| "grad_norm": 0.6501901745796204, | |
| "learning_rate": 4.2069851755577486e-05, | |
| "loss": 0.6187, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.1589561014061752, | |
| "grad_norm": 0.8261470794677734, | |
| "learning_rate": 4.205356473926116e-05, | |
| "loss": 0.6136, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.15928183112217145, | |
| "grad_norm": 0.9979439973831177, | |
| "learning_rate": 4.2037277722944825e-05, | |
| "loss": 0.623, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.1596075608381677, | |
| "grad_norm": 0.5651659369468689, | |
| "learning_rate": 4.202099070662849e-05, | |
| "loss": 0.6742, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.15993329055416397, | |
| "grad_norm": 0.7412470579147339, | |
| "learning_rate": 4.2004703690312156e-05, | |
| "loss": 0.6272, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.16025902027016023, | |
| "grad_norm": 0.43271690607070923, | |
| "learning_rate": 4.198841667399583e-05, | |
| "loss": 0.5729, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.1605847499861565, | |
| "grad_norm": 0.5117851495742798, | |
| "learning_rate": 4.1972129657679494e-05, | |
| "loss": 0.6156, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.16091047970215275, | |
| "grad_norm": 0.7106539011001587, | |
| "learning_rate": 4.195584264136316e-05, | |
| "loss": 0.6052, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.161236209418149, | |
| "grad_norm": 0.6146919131278992, | |
| "learning_rate": 4.1939555625046826e-05, | |
| "loss": 0.5932, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.16156193913414527, | |
| "grad_norm": 0.49088531732559204, | |
| "learning_rate": 4.192326860873049e-05, | |
| "loss": 0.568, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.16188766885014153, | |
| "grad_norm": 0.9923317432403564, | |
| "learning_rate": 4.1906981592414164e-05, | |
| "loss": 0.596, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.16221339856613778, | |
| "grad_norm": 0.3995937705039978, | |
| "learning_rate": 4.189069457609783e-05, | |
| "loss": 0.6442, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.16253912828213404, | |
| "grad_norm": 0.5258984565734863, | |
| "learning_rate": 4.1874407559781496e-05, | |
| "loss": 0.5601, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.1628648579981303, | |
| "grad_norm": 0.19585928320884705, | |
| "learning_rate": 4.185812054346516e-05, | |
| "loss": 0.6509, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.16319058771412656, | |
| "grad_norm": 0.625548243522644, | |
| "learning_rate": 4.184183352714883e-05, | |
| "loss": 0.6411, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.16351631743012282, | |
| "grad_norm": 0.7014303207397461, | |
| "learning_rate": 4.18255465108325e-05, | |
| "loss": 0.6125, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.16384204714611908, | |
| "grad_norm": 0.5523779988288879, | |
| "learning_rate": 4.1809259494516165e-05, | |
| "loss": 0.5811, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.16416777686211537, | |
| "grad_norm": 0.5742841958999634, | |
| "learning_rate": 4.179297247819983e-05, | |
| "loss": 0.6282, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.16449350657811163, | |
| "grad_norm": 0.5776492357254028, | |
| "learning_rate": 4.17766854618835e-05, | |
| "loss": 0.6622, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.16481923629410788, | |
| "grad_norm": 0.7464694380760193, | |
| "learning_rate": 4.176039844556717e-05, | |
| "loss": 0.6309, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.16514496601010414, | |
| "grad_norm": 0.5271546244621277, | |
| "learning_rate": 4.1744111429250835e-05, | |
| "loss": 0.645, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.1654706957261004, | |
| "grad_norm": 0.6904231905937195, | |
| "learning_rate": 4.1727824412934494e-05, | |
| "loss": 0.5927, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.16579642544209666, | |
| "grad_norm": 0.578195333480835, | |
| "learning_rate": 4.1711537396618166e-05, | |
| "loss": 0.5812, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.16612215515809292, | |
| "grad_norm": 0.8716936707496643, | |
| "learning_rate": 4.169525038030183e-05, | |
| "loss": 0.6261, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.16644788487408918, | |
| "grad_norm": 0.6577697992324829, | |
| "learning_rate": 4.1678963363985505e-05, | |
| "loss": 0.6101, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.16677361459008544, | |
| "grad_norm": 0.7431929111480713, | |
| "learning_rate": 4.1662676347669164e-05, | |
| "loss": 0.6227, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.1670993443060817, | |
| "grad_norm": 0.9198315739631653, | |
| "learning_rate": 4.1646389331352836e-05, | |
| "loss": 0.6399, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.16742507402207796, | |
| "grad_norm": 0.5159572958946228, | |
| "learning_rate": 4.16301023150365e-05, | |
| "loss": 0.6329, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.16775080373807422, | |
| "grad_norm": 0.7744697332382202, | |
| "learning_rate": 4.161381529872017e-05, | |
| "loss": 0.5579, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.16807653345407048, | |
| "grad_norm": 0.4429173767566681, | |
| "learning_rate": 4.159752828240383e-05, | |
| "loss": 0.5786, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.16840226317006673, | |
| "grad_norm": 0.7796801924705505, | |
| "learning_rate": 4.15812412660875e-05, | |
| "loss": 0.6353, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.168727992886063, | |
| "grad_norm": 0.43117523193359375, | |
| "learning_rate": 4.156495424977117e-05, | |
| "loss": 0.5807, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.16905372260205925, | |
| "grad_norm": 0.44315412640571594, | |
| "learning_rate": 4.154866723345484e-05, | |
| "loss": 0.5979, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.1693794523180555, | |
| "grad_norm": 0.4306319057941437, | |
| "learning_rate": 4.15323802171385e-05, | |
| "loss": 0.6498, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.16970518203405177, | |
| "grad_norm": 0.283033549785614, | |
| "learning_rate": 4.151609320082217e-05, | |
| "loss": 0.6329, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.17003091175004806, | |
| "grad_norm": 0.4118421673774719, | |
| "learning_rate": 4.1499806184505834e-05, | |
| "loss": 0.5933, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.17035664146604432, | |
| "grad_norm": 0.9130700826644897, | |
| "learning_rate": 4.148351916818951e-05, | |
| "loss": 0.5349, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.17068237118204058, | |
| "grad_norm": 0.33348548412323, | |
| "learning_rate": 4.146723215187317e-05, | |
| "loss": 0.6182, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.17100810089803684, | |
| "grad_norm": 0.6642253398895264, | |
| "learning_rate": 4.145094513555684e-05, | |
| "loss": 0.5989, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.1713338306140331, | |
| "grad_norm": 0.7113855481147766, | |
| "learning_rate": 4.1434658119240504e-05, | |
| "loss": 0.6063, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.17165956033002935, | |
| "grad_norm": 1.0840643644332886, | |
| "learning_rate": 4.1418371102924177e-05, | |
| "loss": 0.615, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.1719852900460256, | |
| "grad_norm": 0.5277838706970215, | |
| "learning_rate": 4.140208408660784e-05, | |
| "loss": 0.6234, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.17231101976202187, | |
| "grad_norm": 0.5993104577064514, | |
| "learning_rate": 4.13857970702915e-05, | |
| "loss": 0.5905, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.17263674947801813, | |
| "grad_norm": 0.7363581657409668, | |
| "learning_rate": 4.1369510053975174e-05, | |
| "loss": 0.6032, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.1729624791940144, | |
| "grad_norm": 0.6299027800559998, | |
| "learning_rate": 4.135322303765884e-05, | |
| "loss": 0.5717, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.17328820891001065, | |
| "grad_norm": 0.49232372641563416, | |
| "learning_rate": 4.133693602134251e-05, | |
| "loss": 0.6031, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.1736139386260069, | |
| "grad_norm": 0.7371428608894348, | |
| "learning_rate": 4.132064900502617e-05, | |
| "loss": 0.5608, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.17393966834200317, | |
| "grad_norm": 1.0730559825897217, | |
| "learning_rate": 4.1304361988709843e-05, | |
| "loss": 0.6026, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.17426539805799943, | |
| "grad_norm": 0.674548327922821, | |
| "learning_rate": 4.128807497239351e-05, | |
| "loss": 0.5721, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.17459112777399569, | |
| "grad_norm": 0.5990965962409973, | |
| "learning_rate": 4.1271787956077175e-05, | |
| "loss": 0.6185, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.17491685748999194, | |
| "grad_norm": 0.61868816614151, | |
| "learning_rate": 4.125550093976084e-05, | |
| "loss": 0.6089, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.1752425872059882, | |
| "grad_norm": 0.4897661507129669, | |
| "learning_rate": 4.1239213923444506e-05, | |
| "loss": 0.6025, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.17556831692198446, | |
| "grad_norm": 0.2856525480747223, | |
| "learning_rate": 4.122292690712818e-05, | |
| "loss": 0.5609, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.17589404663798075, | |
| "grad_norm": 0.5488519668579102, | |
| "learning_rate": 4.1206639890811845e-05, | |
| "loss": 0.5781, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.176219776353977, | |
| "grad_norm": 0.7812597155570984, | |
| "learning_rate": 4.119035287449551e-05, | |
| "loss": 0.665, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.17654550606997327, | |
| "grad_norm": 0.5567785501480103, | |
| "learning_rate": 4.1174065858179176e-05, | |
| "loss": 0.6178, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.17687123578596953, | |
| "grad_norm": 0.7302952408790588, | |
| "learning_rate": 4.115777884186285e-05, | |
| "loss": 0.5912, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.1771969655019658, | |
| "grad_norm": 0.6872962713241577, | |
| "learning_rate": 4.1141491825546514e-05, | |
| "loss": 0.5698, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.17752269521796205, | |
| "grad_norm": 0.6139744520187378, | |
| "learning_rate": 4.112520480923018e-05, | |
| "loss": 0.6148, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.1778484249339583, | |
| "grad_norm": 0.6646268367767334, | |
| "learning_rate": 4.1108917792913846e-05, | |
| "loss": 0.5222, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.17817415464995456, | |
| "grad_norm": 0.4842844009399414, | |
| "learning_rate": 4.109263077659751e-05, | |
| "loss": 0.6225, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.17849988436595082, | |
| "grad_norm": 0.6158716082572937, | |
| "learning_rate": 4.1076343760281184e-05, | |
| "loss": 0.634, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.17882561408194708, | |
| "grad_norm": 0.5122677683830261, | |
| "learning_rate": 4.106005674396485e-05, | |
| "loss": 0.6355, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.17915134379794334, | |
| "grad_norm": 0.6086121201515198, | |
| "learning_rate": 4.1043769727648515e-05, | |
| "loss": 0.5787, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.1794770735139396, | |
| "grad_norm": 0.5853461623191833, | |
| "learning_rate": 4.102748271133218e-05, | |
| "loss": 0.5935, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.17980280322993586, | |
| "grad_norm": 0.9216148853302002, | |
| "learning_rate": 4.101119569501585e-05, | |
| "loss": 0.575, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.18012853294593212, | |
| "grad_norm": 0.6602348685264587, | |
| "learning_rate": 4.099490867869952e-05, | |
| "loss": 0.6324, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.18045426266192838, | |
| "grad_norm": 0.7494210004806519, | |
| "learning_rate": 4.0978621662383185e-05, | |
| "loss": 0.5859, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.18077999237792464, | |
| "grad_norm": 0.6391832232475281, | |
| "learning_rate": 4.096233464606685e-05, | |
| "loss": 0.6172, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.1811057220939209, | |
| "grad_norm": 0.5824201107025146, | |
| "learning_rate": 4.0946047629750517e-05, | |
| "loss": 0.6298, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.18143145180991715, | |
| "grad_norm": 0.6924212574958801, | |
| "learning_rate": 4.092976061343419e-05, | |
| "loss": 0.6105, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.1817571815259134, | |
| "grad_norm": 0.4423877000808716, | |
| "learning_rate": 4.0913473597117855e-05, | |
| "loss": 0.5613, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.1820829112419097, | |
| "grad_norm": 0.6090314984321594, | |
| "learning_rate": 4.0897186580801514e-05, | |
| "loss": 0.6643, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.18240864095790596, | |
| "grad_norm": 0.7554407119750977, | |
| "learning_rate": 4.0880899564485186e-05, | |
| "loss": 0.6017, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.18273437067390222, | |
| "grad_norm": 0.8148972988128662, | |
| "learning_rate": 4.086461254816885e-05, | |
| "loss": 0.6539, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.18306010038989848, | |
| "grad_norm": 0.5610066652297974, | |
| "learning_rate": 4.0848325531852525e-05, | |
| "loss": 0.5872, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.18338583010589474, | |
| "grad_norm": 0.6361645460128784, | |
| "learning_rate": 4.0832038515536183e-05, | |
| "loss": 0.5815, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.183711559821891, | |
| "grad_norm": 0.4567771553993225, | |
| "learning_rate": 4.0815751499219856e-05, | |
| "loss": 0.5799, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.18403728953788726, | |
| "grad_norm": 0.8705578446388245, | |
| "learning_rate": 4.079946448290352e-05, | |
| "loss": 0.6088, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.18436301925388351, | |
| "grad_norm": 0.8278294801712036, | |
| "learning_rate": 4.078317746658719e-05, | |
| "loss": 0.6064, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.18468874896987977, | |
| "grad_norm": 0.38864201307296753, | |
| "learning_rate": 4.076689045027085e-05, | |
| "loss": 0.5705, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.18501447868587603, | |
| "grad_norm": 0.6986147165298462, | |
| "learning_rate": 4.075060343395452e-05, | |
| "loss": 0.6071, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.1853402084018723, | |
| "grad_norm": 0.9127377867698669, | |
| "learning_rate": 4.073431641763819e-05, | |
| "loss": 0.608, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.18566593811786855, | |
| "grad_norm": 0.5072229504585266, | |
| "learning_rate": 4.071802940132186e-05, | |
| "loss": 0.583, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.1859916678338648, | |
| "grad_norm": 0.47545337677001953, | |
| "learning_rate": 4.070174238500552e-05, | |
| "loss": 0.5826, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.18631739754986107, | |
| "grad_norm": 0.5175743103027344, | |
| "learning_rate": 4.068545536868919e-05, | |
| "loss": 0.6184, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.18664312726585733, | |
| "grad_norm": 0.7252177596092224, | |
| "learning_rate": 4.0669168352372854e-05, | |
| "loss": 0.6042, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.1869688569818536, | |
| "grad_norm": 0.21297673881053925, | |
| "learning_rate": 4.065288133605653e-05, | |
| "loss": 0.5874, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.18729458669784985, | |
| "grad_norm": 0.6985592246055603, | |
| "learning_rate": 4.063659431974019e-05, | |
| "loss": 0.5641, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.1876203164138461, | |
| "grad_norm": 0.35783612728118896, | |
| "learning_rate": 4.062030730342386e-05, | |
| "loss": 0.5743, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.1879460461298424, | |
| "grad_norm": 0.40871796011924744, | |
| "learning_rate": 4.0604020287107524e-05, | |
| "loss": 0.6418, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.18827177584583865, | |
| "grad_norm": 0.6412025094032288, | |
| "learning_rate": 4.0587733270791197e-05, | |
| "loss": 0.6048, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.1885975055618349, | |
| "grad_norm": 0.6944416165351868, | |
| "learning_rate": 4.057144625447486e-05, | |
| "loss": 0.5647, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.18892323527783117, | |
| "grad_norm": 0.8592963218688965, | |
| "learning_rate": 4.055515923815852e-05, | |
| "loss": 0.5703, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.18924896499382743, | |
| "grad_norm": 0.7240419983863831, | |
| "learning_rate": 4.0538872221842194e-05, | |
| "loss": 0.6025, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.1895746947098237, | |
| "grad_norm": 0.3861270546913147, | |
| "learning_rate": 4.052258520552586e-05, | |
| "loss": 0.5864, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.18990042442581995, | |
| "grad_norm": 0.6718447208404541, | |
| "learning_rate": 4.050629818920953e-05, | |
| "loss": 0.6139, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.1902261541418162, | |
| "grad_norm": 0.7049744129180908, | |
| "learning_rate": 4.049001117289319e-05, | |
| "loss": 0.5697, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.19055188385781247, | |
| "grad_norm": 0.39576876163482666, | |
| "learning_rate": 4.047372415657686e-05, | |
| "loss": 0.5987, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.19087761357380872, | |
| "grad_norm": 0.7814981341362, | |
| "learning_rate": 4.045743714026053e-05, | |
| "loss": 0.5715, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.19120334328980498, | |
| "grad_norm": 1.0083011388778687, | |
| "learning_rate": 4.0441150123944195e-05, | |
| "loss": 0.6355, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.19152907300580124, | |
| "grad_norm": 0.7083866596221924, | |
| "learning_rate": 4.042486310762786e-05, | |
| "loss": 0.6666, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.1918548027217975, | |
| "grad_norm": 0.4740765690803528, | |
| "learning_rate": 4.0408576091311526e-05, | |
| "loss": 0.5773, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.19218053243779376, | |
| "grad_norm": 0.3599790632724762, | |
| "learning_rate": 4.03922890749952e-05, | |
| "loss": 0.5916, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.19250626215379002, | |
| "grad_norm": 0.6107310652732849, | |
| "learning_rate": 4.0376002058678865e-05, | |
| "loss": 0.63, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.19283199186978628, | |
| "grad_norm": 0.6388813257217407, | |
| "learning_rate": 4.035971504236253e-05, | |
| "loss": 0.6197, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.19315772158578254, | |
| "grad_norm": 0.4137844145298004, | |
| "learning_rate": 4.0343428026046196e-05, | |
| "loss": 0.6185, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.1934834513017788, | |
| "grad_norm": 0.6289616823196411, | |
| "learning_rate": 4.032714100972986e-05, | |
| "loss": 0.6367, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.19380918101777508, | |
| "grad_norm": 0.7528841495513916, | |
| "learning_rate": 4.0310853993413534e-05, | |
| "loss": 0.5783, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.19413491073377134, | |
| "grad_norm": 0.7345238924026489, | |
| "learning_rate": 4.02945669770972e-05, | |
| "loss": 0.6378, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.1944606404497676, | |
| "grad_norm": 0.7652753591537476, | |
| "learning_rate": 4.0278279960780866e-05, | |
| "loss": 0.5419, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.19478637016576386, | |
| "grad_norm": 0.3726235032081604, | |
| "learning_rate": 4.026199294446453e-05, | |
| "loss": 0.5933, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.19511209988176012, | |
| "grad_norm": 0.475990355014801, | |
| "learning_rate": 4.0245705928148204e-05, | |
| "loss": 0.5421, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.19543782959775638, | |
| "grad_norm": 0.8618846535682678, | |
| "learning_rate": 4.022941891183187e-05, | |
| "loss": 0.6149, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.19576355931375264, | |
| "grad_norm": 0.3643835484981537, | |
| "learning_rate": 4.0213131895515535e-05, | |
| "loss": 0.5898, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.1960892890297489, | |
| "grad_norm": 0.6492701172828674, | |
| "learning_rate": 4.01968448791992e-05, | |
| "loss": 0.6115, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.19641501874574516, | |
| "grad_norm": 0.46400219202041626, | |
| "learning_rate": 4.018055786288287e-05, | |
| "loss": 0.6093, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.19674074846174142, | |
| "grad_norm": 0.6529611349105835, | |
| "learning_rate": 4.016427084656654e-05, | |
| "loss": 0.5663, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.19706647817773768, | |
| "grad_norm": 0.8332497477531433, | |
| "learning_rate": 4.0147983830250205e-05, | |
| "loss": 0.557, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.19739220789373393, | |
| "grad_norm": 0.43394774198532104, | |
| "learning_rate": 4.013169681393387e-05, | |
| "loss": 0.5864, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.1977179376097302, | |
| "grad_norm": 0.3713783919811249, | |
| "learning_rate": 4.0115409797617537e-05, | |
| "loss": 0.597, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.19804366732572645, | |
| "grad_norm": 0.5605040788650513, | |
| "learning_rate": 4.00991227813012e-05, | |
| "loss": 0.5965, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.1983693970417227, | |
| "grad_norm": 0.4591531455516815, | |
| "learning_rate": 4.0082835764984875e-05, | |
| "loss": 0.5718, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.19869512675771897, | |
| "grad_norm": 0.7599985003471375, | |
| "learning_rate": 4.0066548748668534e-05, | |
| "loss": 0.6088, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.19902085647371523, | |
| "grad_norm": 0.7234918475151062, | |
| "learning_rate": 4.0050261732352206e-05, | |
| "loss": 0.6022, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.1993465861897115, | |
| "grad_norm": 0.8344034552574158, | |
| "learning_rate": 4.003397471603587e-05, | |
| "loss": 0.5978, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.19967231590570778, | |
| "grad_norm": 0.7539324164390564, | |
| "learning_rate": 4.0017687699719544e-05, | |
| "loss": 0.5979, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.19999804562170403, | |
| "grad_norm": 0.7535436153411865, | |
| "learning_rate": 4.00014006834032e-05, | |
| "loss": 0.5632, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.2003237753377003, | |
| "grad_norm": 1.0253859758377075, | |
| "learning_rate": 3.998511366708687e-05, | |
| "loss": 0.6245, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.20064950505369655, | |
| "grad_norm": 0.8442240357398987, | |
| "learning_rate": 3.996882665077054e-05, | |
| "loss": 0.56, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.2009752347696928, | |
| "grad_norm": 0.7696794867515564, | |
| "learning_rate": 3.995253963445421e-05, | |
| "loss": 0.5525, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.20130096448568907, | |
| "grad_norm": 1.0839108228683472, | |
| "learning_rate": 3.993625261813787e-05, | |
| "loss": 0.576, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.20162669420168533, | |
| "grad_norm": 0.4837821125984192, | |
| "learning_rate": 3.991996560182154e-05, | |
| "loss": 0.6654, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.2019524239176816, | |
| "grad_norm": 0.8696286082267761, | |
| "learning_rate": 3.990367858550521e-05, | |
| "loss": 0.5237, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.20227815363367785, | |
| "grad_norm": 0.5389662384986877, | |
| "learning_rate": 3.988739156918888e-05, | |
| "loss": 0.5765, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.2026038833496741, | |
| "grad_norm": 0.39996546506881714, | |
| "learning_rate": 3.987110455287254e-05, | |
| "loss": 0.5666, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.20292961306567037, | |
| "grad_norm": 0.5612654685974121, | |
| "learning_rate": 3.985481753655621e-05, | |
| "loss": 0.5975, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.20325534278166663, | |
| "grad_norm": 0.4764688014984131, | |
| "learning_rate": 3.9838530520239874e-05, | |
| "loss": 0.5973, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.20358107249766288, | |
| "grad_norm": 0.538745105266571, | |
| "learning_rate": 3.982224350392355e-05, | |
| "loss": 0.6108, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.20390680221365914, | |
| "grad_norm": 0.6589317321777344, | |
| "learning_rate": 3.980595648760721e-05, | |
| "loss": 0.5482, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.2042325319296554, | |
| "grad_norm": 0.8373557925224304, | |
| "learning_rate": 3.978966947129088e-05, | |
| "loss": 0.5671, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.20455826164565166, | |
| "grad_norm": 0.6305526494979858, | |
| "learning_rate": 3.9773382454974544e-05, | |
| "loss": 0.6205, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.20488399136164792, | |
| "grad_norm": 0.6550065875053406, | |
| "learning_rate": 3.9757095438658216e-05, | |
| "loss": 0.5805, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.20520972107764418, | |
| "grad_norm": 0.6951280236244202, | |
| "learning_rate": 3.974080842234188e-05, | |
| "loss": 0.6103, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.20553545079364044, | |
| "grad_norm": 0.5202652215957642, | |
| "learning_rate": 3.972452140602554e-05, | |
| "loss": 0.5623, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.20586118050963673, | |
| "grad_norm": 1.0889042615890503, | |
| "learning_rate": 3.9708234389709214e-05, | |
| "loss": 0.5879, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.20618691022563299, | |
| "grad_norm": 0.4142896234989166, | |
| "learning_rate": 3.969194737339288e-05, | |
| "loss": 0.6148, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.20651263994162924, | |
| "grad_norm": 0.6650342345237732, | |
| "learning_rate": 3.967566035707655e-05, | |
| "loss": 0.5902, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.2068383696576255, | |
| "grad_norm": 0.42452552914619446, | |
| "learning_rate": 3.965937334076021e-05, | |
| "loss": 0.4877, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.20716409937362176, | |
| "grad_norm": 0.6702756881713867, | |
| "learning_rate": 3.964308632444388e-05, | |
| "loss": 0.5943, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.20748982908961802, | |
| "grad_norm": 0.9007012248039246, | |
| "learning_rate": 3.962679930812755e-05, | |
| "loss": 0.5652, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.20781555880561428, | |
| "grad_norm": 0.8962705135345459, | |
| "learning_rate": 3.9610512291811215e-05, | |
| "loss": 0.5731, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.20814128852161054, | |
| "grad_norm": 0.8256299495697021, | |
| "learning_rate": 3.959422527549489e-05, | |
| "loss": 0.5596, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.2084670182376068, | |
| "grad_norm": 0.5674106478691101, | |
| "learning_rate": 3.9577938259178546e-05, | |
| "loss": 0.557, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.20879274795360306, | |
| "grad_norm": 0.564755916595459, | |
| "learning_rate": 3.956165124286222e-05, | |
| "loss": 0.5735, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.20911847766959932, | |
| "grad_norm": 1.0437874794006348, | |
| "learning_rate": 3.9545364226545884e-05, | |
| "loss": 0.5371, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.20944420738559558, | |
| "grad_norm": 0.877699077129364, | |
| "learning_rate": 3.952907721022956e-05, | |
| "loss": 0.538, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.20976993710159184, | |
| "grad_norm": 0.6481153964996338, | |
| "learning_rate": 3.9512790193913216e-05, | |
| "loss": 0.5763, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.2100956668175881, | |
| "grad_norm": 0.7963904142379761, | |
| "learning_rate": 3.949650317759688e-05, | |
| "loss": 0.5617, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.21042139653358435, | |
| "grad_norm": 1.1034698486328125, | |
| "learning_rate": 3.9480216161280554e-05, | |
| "loss": 0.5876, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.2107471262495806, | |
| "grad_norm": 0.7540128827095032, | |
| "learning_rate": 3.946392914496422e-05, | |
| "loss": 0.574, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.21107285596557687, | |
| "grad_norm": 0.7184910178184509, | |
| "learning_rate": 3.9447642128647886e-05, | |
| "loss": 0.5328, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.21139858568157313, | |
| "grad_norm": 0.7150009274482727, | |
| "learning_rate": 3.943135511233155e-05, | |
| "loss": 0.6049, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.21172431539756942, | |
| "grad_norm": 0.4451941251754761, | |
| "learning_rate": 3.9415068096015224e-05, | |
| "loss": 0.5958, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.21205004511356568, | |
| "grad_norm": 1.00858736038208, | |
| "learning_rate": 3.939878107969889e-05, | |
| "loss": 0.5752, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.21237577482956194, | |
| "grad_norm": 0.7953845858573914, | |
| "learning_rate": 3.9382494063382555e-05, | |
| "loss": 0.5555, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.2127015045455582, | |
| "grad_norm": 0.5992127060890198, | |
| "learning_rate": 3.936620704706622e-05, | |
| "loss": 0.59, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.21302723426155445, | |
| "grad_norm": 0.5878809690475464, | |
| "learning_rate": 3.934992003074989e-05, | |
| "loss": 0.5881, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.2133529639775507, | |
| "grad_norm": 0.9159529805183411, | |
| "learning_rate": 3.933363301443356e-05, | |
| "loss": 0.5951, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.21367869369354697, | |
| "grad_norm": 0.6340069770812988, | |
| "learning_rate": 3.9317345998117225e-05, | |
| "loss": 0.5799, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.21400442340954323, | |
| "grad_norm": 0.8940368890762329, | |
| "learning_rate": 3.930105898180089e-05, | |
| "loss": 0.5273, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.2143301531255395, | |
| "grad_norm": 0.7908622622489929, | |
| "learning_rate": 3.9284771965484556e-05, | |
| "loss": 0.5472, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.21465588284153575, | |
| "grad_norm": 0.9964277744293213, | |
| "learning_rate": 3.926848494916822e-05, | |
| "loss": 0.5719, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.214981612557532, | |
| "grad_norm": 0.6497515439987183, | |
| "learning_rate": 3.9252197932851895e-05, | |
| "loss": 0.5338, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.21530734227352827, | |
| "grad_norm": 0.8303185105323792, | |
| "learning_rate": 3.9235910916535554e-05, | |
| "loss": 0.5237, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.21563307198952453, | |
| "grad_norm": 0.8530830144882202, | |
| "learning_rate": 3.9219623900219226e-05, | |
| "loss": 0.5328, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.2159588017055208, | |
| "grad_norm": 0.9482616782188416, | |
| "learning_rate": 3.920333688390289e-05, | |
| "loss": 0.5548, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.21628453142151705, | |
| "grad_norm": 0.430633008480072, | |
| "learning_rate": 3.9187049867586564e-05, | |
| "loss": 0.551, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.2166102611375133, | |
| "grad_norm": 0.5612674355506897, | |
| "learning_rate": 3.917076285127022e-05, | |
| "loss": 0.5571, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.21693599085350956, | |
| "grad_norm": 0.7157821655273438, | |
| "learning_rate": 3.915447583495389e-05, | |
| "loss": 0.555, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.21726172056950582, | |
| "grad_norm": 0.6013966202735901, | |
| "learning_rate": 3.913818881863756e-05, | |
| "loss": 0.585, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.2175874502855021, | |
| "grad_norm": 0.4616648554801941, | |
| "learning_rate": 3.912190180232123e-05, | |
| "loss": 0.5832, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.21791318000149837, | |
| "grad_norm": 0.6870980858802795, | |
| "learning_rate": 3.910561478600489e-05, | |
| "loss": 0.5944, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.21823890971749463, | |
| "grad_norm": 0.629490315914154, | |
| "learning_rate": 3.908932776968856e-05, | |
| "loss": 0.5279, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.2185646394334909, | |
| "grad_norm": 0.5478650331497192, | |
| "learning_rate": 3.907304075337223e-05, | |
| "loss": 0.5815, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.21889036914948715, | |
| "grad_norm": 0.6581255793571472, | |
| "learning_rate": 3.90567537370559e-05, | |
| "loss": 0.5661, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.2192160988654834, | |
| "grad_norm": 0.7738802433013916, | |
| "learning_rate": 3.904046672073956e-05, | |
| "loss": 0.5901, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.21954182858147966, | |
| "grad_norm": 0.5748447179794312, | |
| "learning_rate": 3.902417970442323e-05, | |
| "loss": 0.5813, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.21986755829747592, | |
| "grad_norm": 0.7152987718582153, | |
| "learning_rate": 3.9007892688106894e-05, | |
| "loss": 0.5359, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.22019328801347218, | |
| "grad_norm": 0.867574155330658, | |
| "learning_rate": 3.899160567179057e-05, | |
| "loss": 0.5419, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.22051901772946844, | |
| "grad_norm": 0.8477634787559509, | |
| "learning_rate": 3.897531865547423e-05, | |
| "loss": 0.5788, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.2208447474454647, | |
| "grad_norm": 0.7993571758270264, | |
| "learning_rate": 3.89590316391579e-05, | |
| "loss": 0.528, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.22117047716146096, | |
| "grad_norm": 0.6607359647750854, | |
| "learning_rate": 3.8942744622841564e-05, | |
| "loss": 0.5647, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.22149620687745722, | |
| "grad_norm": 0.6910780072212219, | |
| "learning_rate": 3.892645760652523e-05, | |
| "loss": 0.5418, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.22182193659345348, | |
| "grad_norm": 0.4793308675289154, | |
| "learning_rate": 3.89101705902089e-05, | |
| "loss": 0.5913, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.22214766630944974, | |
| "grad_norm": 0.7222141027450562, | |
| "learning_rate": 3.889388357389257e-05, | |
| "loss": 0.6128, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.222473396025446, | |
| "grad_norm": 0.43712884187698364, | |
| "learning_rate": 3.8877596557576233e-05, | |
| "loss": 0.583, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.22279912574144226, | |
| "grad_norm": 0.5187420845031738, | |
| "learning_rate": 3.88613095412599e-05, | |
| "loss": 0.5758, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.22312485545743851, | |
| "grad_norm": 0.5550572872161865, | |
| "learning_rate": 3.884502252494357e-05, | |
| "loss": 0.5269, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.22345058517343477, | |
| "grad_norm": 0.7551735639572144, | |
| "learning_rate": 3.882873550862724e-05, | |
| "loss": 0.6005, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.22377631488943106, | |
| "grad_norm": 0.7213869690895081, | |
| "learning_rate": 3.8812448492310896e-05, | |
| "loss": 0.5174, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.22410204460542732, | |
| "grad_norm": 0.6445099115371704, | |
| "learning_rate": 3.879616147599457e-05, | |
| "loss": 0.5501, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.22442777432142358, | |
| "grad_norm": 0.7937589883804321, | |
| "learning_rate": 3.8779874459678235e-05, | |
| "loss": 0.5598, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.22475350403741984, | |
| "grad_norm": 0.5327324271202087, | |
| "learning_rate": 3.876358744336191e-05, | |
| "loss": 0.531, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.2250792337534161, | |
| "grad_norm": 0.7627710103988647, | |
| "learning_rate": 3.8747300427045566e-05, | |
| "loss": 0.578, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.22540496346941236, | |
| "grad_norm": 0.5054932832717896, | |
| "learning_rate": 3.873101341072924e-05, | |
| "loss": 0.5905, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.22573069318540862, | |
| "grad_norm": 0.6468352675437927, | |
| "learning_rate": 3.8714726394412904e-05, | |
| "loss": 0.5931, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.22605642290140487, | |
| "grad_norm": 0.37974539399147034, | |
| "learning_rate": 3.869843937809657e-05, | |
| "loss": 0.5777, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.22638215261740113, | |
| "grad_norm": 0.8011950850486755, | |
| "learning_rate": 3.8682152361780236e-05, | |
| "loss": 0.5187, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.2267078823333974, | |
| "grad_norm": 0.40006023645401, | |
| "learning_rate": 3.86658653454639e-05, | |
| "loss": 0.5292, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.22703361204939365, | |
| "grad_norm": 0.42605412006378174, | |
| "learning_rate": 3.8649578329147574e-05, | |
| "loss": 0.5704, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.2273593417653899, | |
| "grad_norm": 0.820277988910675, | |
| "learning_rate": 3.863329131283124e-05, | |
| "loss": 0.5641, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.22768507148138617, | |
| "grad_norm": 0.6671209931373596, | |
| "learning_rate": 3.8617004296514905e-05, | |
| "loss": 0.5942, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.22801080119738243, | |
| "grad_norm": 0.7214267253875732, | |
| "learning_rate": 3.860071728019857e-05, | |
| "loss": 0.6078, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.2283365309133787, | |
| "grad_norm": 0.5705024003982544, | |
| "learning_rate": 3.858443026388224e-05, | |
| "loss": 0.5111, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.22866226062937495, | |
| "grad_norm": 0.7017680406570435, | |
| "learning_rate": 3.856814324756591e-05, | |
| "loss": 0.5386, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.2289879903453712, | |
| "grad_norm": 0.36700716614723206, | |
| "learning_rate": 3.8551856231249575e-05, | |
| "loss": 0.5947, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.22931372006136747, | |
| "grad_norm": 1.018539309501648, | |
| "learning_rate": 3.853556921493324e-05, | |
| "loss": 0.5739, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.22963944977736375, | |
| "grad_norm": 0.8273037672042847, | |
| "learning_rate": 3.851928219861691e-05, | |
| "loss": 0.5247, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.22996517949336, | |
| "grad_norm": 1.0655425786972046, | |
| "learning_rate": 3.850299518230058e-05, | |
| "loss": 0.5397, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.23029090920935627, | |
| "grad_norm": 0.38495421409606934, | |
| "learning_rate": 3.8486708165984245e-05, | |
| "loss": 0.5844, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.23061663892535253, | |
| "grad_norm": 0.9659711122512817, | |
| "learning_rate": 3.847042114966791e-05, | |
| "loss": 0.5873, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.2309423686413488, | |
| "grad_norm": 0.7230137586593628, | |
| "learning_rate": 3.8454134133351576e-05, | |
| "loss": 0.593, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.23126809835734505, | |
| "grad_norm": 0.9325969219207764, | |
| "learning_rate": 3.843784711703524e-05, | |
| "loss": 0.5965, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.2315938280733413, | |
| "grad_norm": 0.6791651248931885, | |
| "learning_rate": 3.8421560100718915e-05, | |
| "loss": 0.6223, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.23191955778933757, | |
| "grad_norm": 0.8241651058197021, | |
| "learning_rate": 3.8405273084402573e-05, | |
| "loss": 0.5257, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.23224528750533383, | |
| "grad_norm": 0.8813059329986572, | |
| "learning_rate": 3.8388986068086246e-05, | |
| "loss": 0.5965, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.23257101722133008, | |
| "grad_norm": 0.7717010378837585, | |
| "learning_rate": 3.837269905176991e-05, | |
| "loss": 0.5502, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.23289674693732634, | |
| "grad_norm": 0.39482927322387695, | |
| "learning_rate": 3.8356412035453584e-05, | |
| "loss": 0.5618, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.2332224766533226, | |
| "grad_norm": 0.8985998630523682, | |
| "learning_rate": 3.834012501913724e-05, | |
| "loss": 0.5247, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.23354820636931886, | |
| "grad_norm": 0.4451032876968384, | |
| "learning_rate": 3.832383800282091e-05, | |
| "loss": 0.565, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.23387393608531512, | |
| "grad_norm": 0.46427956223487854, | |
| "learning_rate": 3.830755098650458e-05, | |
| "loss": 0.5511, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.23419966580131138, | |
| "grad_norm": 1.1371232271194458, | |
| "learning_rate": 3.829126397018825e-05, | |
| "loss": 0.5867, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.23452539551730764, | |
| "grad_norm": 0.5856015086174011, | |
| "learning_rate": 3.827497695387191e-05, | |
| "loss": 0.5425, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.2348511252333039, | |
| "grad_norm": 0.5723338723182678, | |
| "learning_rate": 3.825868993755558e-05, | |
| "loss": 0.5828, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.23517685494930016, | |
| "grad_norm": 0.6274189352989197, | |
| "learning_rate": 3.824240292123925e-05, | |
| "loss": 0.4961, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.23550258466529644, | |
| "grad_norm": 0.5841485857963562, | |
| "learning_rate": 3.822611590492292e-05, | |
| "loss": 0.5639, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.2358283143812927, | |
| "grad_norm": 0.9061130285263062, | |
| "learning_rate": 3.820982888860658e-05, | |
| "loss": 0.5126, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.23615404409728896, | |
| "grad_norm": 0.9499684572219849, | |
| "learning_rate": 3.819354187229025e-05, | |
| "loss": 0.5684, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.23647977381328522, | |
| "grad_norm": 0.7132393717765808, | |
| "learning_rate": 3.8177254855973914e-05, | |
| "loss": 0.5287, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.23680550352928148, | |
| "grad_norm": 0.8645475506782532, | |
| "learning_rate": 3.8160967839657587e-05, | |
| "loss": 0.564, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.23713123324527774, | |
| "grad_norm": 0.8675580024719238, | |
| "learning_rate": 3.814468082334125e-05, | |
| "loss": 0.5435, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.237456962961274, | |
| "grad_norm": 0.7194923162460327, | |
| "learning_rate": 3.812839380702492e-05, | |
| "loss": 0.5843, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.23778269267727026, | |
| "grad_norm": 0.782618522644043, | |
| "learning_rate": 3.8112106790708584e-05, | |
| "loss": 0.5609, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.23810842239326652, | |
| "grad_norm": 0.6671516299247742, | |
| "learning_rate": 3.809581977439225e-05, | |
| "loss": 0.4925, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.23843415210926278, | |
| "grad_norm": 0.8488081097602844, | |
| "learning_rate": 3.807953275807592e-05, | |
| "loss": 0.5536, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.23875988182525903, | |
| "grad_norm": 0.7259848117828369, | |
| "learning_rate": 3.806324574175959e-05, | |
| "loss": 0.5372, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.2390856115412553, | |
| "grad_norm": 0.5849174857139587, | |
| "learning_rate": 3.8046958725443253e-05, | |
| "loss": 0.5602, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.23941134125725155, | |
| "grad_norm": 0.36567142605781555, | |
| "learning_rate": 3.803067170912692e-05, | |
| "loss": 0.5976, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.2397370709732478, | |
| "grad_norm": 0.8540560007095337, | |
| "learning_rate": 3.801438469281059e-05, | |
| "loss": 0.576, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.24006280068924407, | |
| "grad_norm": 0.7733421921730042, | |
| "learning_rate": 3.799809767649426e-05, | |
| "loss": 0.5446, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.24038853040524033, | |
| "grad_norm": 0.6541240811347961, | |
| "learning_rate": 3.7981810660177916e-05, | |
| "loss": 0.5302, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.2407142601212366, | |
| "grad_norm": 0.6777580976486206, | |
| "learning_rate": 3.796552364386159e-05, | |
| "loss": 0.5742, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.24103998983723285, | |
| "grad_norm": 1.1045103073120117, | |
| "learning_rate": 3.7949236627545255e-05, | |
| "loss": 0.5391, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.2413657195532291, | |
| "grad_norm": 1.223781943321228, | |
| "learning_rate": 3.793294961122893e-05, | |
| "loss": 0.5754, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.2416914492692254, | |
| "grad_norm": 0.7645404934883118, | |
| "learning_rate": 3.7916662594912586e-05, | |
| "loss": 0.5424, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.24201717898522165, | |
| "grad_norm": 0.8637171983718872, | |
| "learning_rate": 3.790037557859626e-05, | |
| "loss": 0.5577, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.2423429087012179, | |
| "grad_norm": 0.633642315864563, | |
| "learning_rate": 3.7884088562279924e-05, | |
| "loss": 0.5513, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.24266863841721417, | |
| "grad_norm": 0.48609936237335205, | |
| "learning_rate": 3.786780154596359e-05, | |
| "loss": 0.6002, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.24299436813321043, | |
| "grad_norm": 0.3668748140335083, | |
| "learning_rate": 3.7851514529647256e-05, | |
| "loss": 0.5947, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.2433200978492067, | |
| "grad_norm": 0.735894501209259, | |
| "learning_rate": 3.783522751333092e-05, | |
| "loss": 0.5862, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.24364582756520295, | |
| "grad_norm": 0.8264063000679016, | |
| "learning_rate": 3.7818940497014594e-05, | |
| "loss": 0.5749, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.2439715572811992, | |
| "grad_norm": 0.482183575630188, | |
| "learning_rate": 3.780265348069826e-05, | |
| "loss": 0.5553, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.24429728699719547, | |
| "grad_norm": 0.6649850606918335, | |
| "learning_rate": 3.7786366464381925e-05, | |
| "loss": 0.6042, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.24462301671319173, | |
| "grad_norm": 0.5215208530426025, | |
| "learning_rate": 3.777007944806559e-05, | |
| "loss": 0.5134, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.24494874642918799, | |
| "grad_norm": 0.6028915643692017, | |
| "learning_rate": 3.775379243174926e-05, | |
| "loss": 0.5, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.24527447614518424, | |
| "grad_norm": 0.5038050413131714, | |
| "learning_rate": 3.773750541543293e-05, | |
| "loss": 0.6081, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.2456002058611805, | |
| "grad_norm": 0.568586528301239, | |
| "learning_rate": 3.7721218399116595e-05, | |
| "loss": 0.5484, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.24592593557717676, | |
| "grad_norm": 0.4442402720451355, | |
| "learning_rate": 3.770493138280026e-05, | |
| "loss": 0.5983, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.24625166529317302, | |
| "grad_norm": 0.775284469127655, | |
| "learning_rate": 3.7688644366483927e-05, | |
| "loss": 0.549, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.24657739500916928, | |
| "grad_norm": 0.7132833003997803, | |
| "learning_rate": 3.76723573501676e-05, | |
| "loss": 0.5317, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.24690312472516554, | |
| "grad_norm": 0.7935360074043274, | |
| "learning_rate": 3.7656070333851265e-05, | |
| "loss": 0.5389, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.2472288544411618, | |
| "grad_norm": 0.5749487280845642, | |
| "learning_rate": 3.7639783317534924e-05, | |
| "loss": 0.5918, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.2475545841571581, | |
| "grad_norm": 0.6536827087402344, | |
| "learning_rate": 3.7623496301218596e-05, | |
| "loss": 0.5245, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.24788031387315435, | |
| "grad_norm": 0.7014347314834595, | |
| "learning_rate": 3.760720928490226e-05, | |
| "loss": 0.5661, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.2482060435891506, | |
| "grad_norm": 0.8436623811721802, | |
| "learning_rate": 3.7590922268585934e-05, | |
| "loss": 0.5714, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.24853177330514686, | |
| "grad_norm": 0.6371897459030151, | |
| "learning_rate": 3.7574635252269593e-05, | |
| "loss": 0.5767, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.24885750302114312, | |
| "grad_norm": 0.7796430587768555, | |
| "learning_rate": 3.7558348235953266e-05, | |
| "loss": 0.5308, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.24918323273713938, | |
| "grad_norm": 0.6565324664115906, | |
| "learning_rate": 3.754206121963693e-05, | |
| "loss": 0.5377, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.24950896245313564, | |
| "grad_norm": 0.6670543551445007, | |
| "learning_rate": 3.75257742033206e-05, | |
| "loss": 0.6095, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.2498346921691319, | |
| "grad_norm": 0.8650514483451843, | |
| "learning_rate": 3.750948718700426e-05, | |
| "loss": 0.5586, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.25016042188512816, | |
| "grad_norm": 0.42015933990478516, | |
| "learning_rate": 3.749320017068793e-05, | |
| "loss": 0.5274, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.2504861516011244, | |
| "grad_norm": 0.5667533278465271, | |
| "learning_rate": 3.74769131543716e-05, | |
| "loss": 0.5628, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.2508118813171207, | |
| "grad_norm": 0.6887187361717224, | |
| "learning_rate": 3.746062613805527e-05, | |
| "loss": 0.5663, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.25113761103311694, | |
| "grad_norm": 0.4367005527019501, | |
| "learning_rate": 3.744433912173893e-05, | |
| "loss": 0.5368, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.2514633407491132, | |
| "grad_norm": 0.3392166197299957, | |
| "learning_rate": 3.74280521054226e-05, | |
| "loss": 0.5353, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.25178907046510945, | |
| "grad_norm": 0.5449352860450745, | |
| "learning_rate": 3.7411765089106264e-05, | |
| "loss": 0.5611, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.2521148001811057, | |
| "grad_norm": 0.6924061179161072, | |
| "learning_rate": 3.739547807278994e-05, | |
| "loss": 0.5918, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.252440529897102, | |
| "grad_norm": 0.8356592655181885, | |
| "learning_rate": 3.73791910564736e-05, | |
| "loss": 0.5713, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.25276625961309823, | |
| "grad_norm": 0.9207838177680969, | |
| "learning_rate": 3.736290404015727e-05, | |
| "loss": 0.5078, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.2530919893290945, | |
| "grad_norm": 0.6466575860977173, | |
| "learning_rate": 3.7346617023840934e-05, | |
| "loss": 0.5274, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.25341771904509075, | |
| "grad_norm": 0.5351524353027344, | |
| "learning_rate": 3.7330330007524606e-05, | |
| "loss": 0.5411, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.253743448761087, | |
| "grad_norm": 0.7786761522293091, | |
| "learning_rate": 3.731404299120827e-05, | |
| "loss": 0.4859, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.25406917847708327, | |
| "grad_norm": 0.6750699281692505, | |
| "learning_rate": 3.729775597489194e-05, | |
| "loss": 0.5689, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.2543949081930795, | |
| "grad_norm": 0.7088775038719177, | |
| "learning_rate": 3.7281468958575604e-05, | |
| "loss": 0.5325, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.2547206379090758, | |
| "grad_norm": 0.8920672535896301, | |
| "learning_rate": 3.726518194225927e-05, | |
| "loss": 0.5284, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.25504636762507205, | |
| "grad_norm": 0.6582838296890259, | |
| "learning_rate": 3.724889492594294e-05, | |
| "loss": 0.511, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.2553720973410683, | |
| "grad_norm": 0.6662094593048096, | |
| "learning_rate": 3.723260790962661e-05, | |
| "loss": 0.5618, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.25569782705706456, | |
| "grad_norm": 0.4346591830253601, | |
| "learning_rate": 3.721632089331027e-05, | |
| "loss": 0.54, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.2560235567730608, | |
| "grad_norm": 0.7967207431793213, | |
| "learning_rate": 3.720003387699394e-05, | |
| "loss": 0.5884, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.25634928648905714, | |
| "grad_norm": 0.4879821538925171, | |
| "learning_rate": 3.7183746860677605e-05, | |
| "loss": 0.5557, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.2566750162050534, | |
| "grad_norm": 0.5626016855239868, | |
| "learning_rate": 3.716745984436128e-05, | |
| "loss": 0.498, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.25700074592104966, | |
| "grad_norm": 0.5859974026679993, | |
| "learning_rate": 3.7151172828044936e-05, | |
| "loss": 0.5218, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.2573264756370459, | |
| "grad_norm": 0.7462596893310547, | |
| "learning_rate": 3.713488581172861e-05, | |
| "loss": 0.5093, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.2576522053530422, | |
| "grad_norm": 0.9555974006652832, | |
| "learning_rate": 3.7118598795412274e-05, | |
| "loss": 0.5348, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.25797793506903843, | |
| "grad_norm": 0.7466504573822021, | |
| "learning_rate": 3.710231177909595e-05, | |
| "loss": 0.5383, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.2583036647850347, | |
| "grad_norm": 0.8801865577697754, | |
| "learning_rate": 3.7086024762779606e-05, | |
| "loss": 0.4767, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.25862939450103095, | |
| "grad_norm": 0.48174184560775757, | |
| "learning_rate": 3.706973774646328e-05, | |
| "loss": 0.5528, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.2589551242170272, | |
| "grad_norm": 0.7198649048805237, | |
| "learning_rate": 3.7053450730146944e-05, | |
| "loss": 0.5953, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.25928085393302347, | |
| "grad_norm": 0.4515075385570526, | |
| "learning_rate": 3.703716371383061e-05, | |
| "loss": 0.5505, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.25960658364901973, | |
| "grad_norm": 0.706524670124054, | |
| "learning_rate": 3.7020876697514276e-05, | |
| "loss": 0.6011, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.259932313365016, | |
| "grad_norm": 0.6895307302474976, | |
| "learning_rate": 3.700458968119794e-05, | |
| "loss": 0.5188, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.26025804308101225, | |
| "grad_norm": 0.7927341461181641, | |
| "learning_rate": 3.6988302664881614e-05, | |
| "loss": 0.5739, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.2605837727970085, | |
| "grad_norm": 0.8496550917625427, | |
| "learning_rate": 3.697201564856528e-05, | |
| "loss": 0.5152, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.26090950251300477, | |
| "grad_norm": 0.47138693928718567, | |
| "learning_rate": 3.6955728632248945e-05, | |
| "loss": 0.5475, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.261235232229001, | |
| "grad_norm": 0.8020485639572144, | |
| "learning_rate": 3.693944161593261e-05, | |
| "loss": 0.5489, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.2615609619449973, | |
| "grad_norm": 0.6385429501533508, | |
| "learning_rate": 3.692315459961628e-05, | |
| "loss": 0.5457, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.26188669166099354, | |
| "grad_norm": 0.6027743220329285, | |
| "learning_rate": 3.690686758329995e-05, | |
| "loss": 0.5412, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.2622124213769898, | |
| "grad_norm": 0.6040454506874084, | |
| "learning_rate": 3.6890580566983615e-05, | |
| "loss": 0.5348, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.26253815109298606, | |
| "grad_norm": 0.6697177290916443, | |
| "learning_rate": 3.687429355066728e-05, | |
| "loss": 0.509, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.2628638808089823, | |
| "grad_norm": 0.8428653478622437, | |
| "learning_rate": 3.6858006534350946e-05, | |
| "loss": 0.5505, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.2631896105249786, | |
| "grad_norm": 0.9421257972717285, | |
| "learning_rate": 3.684171951803462e-05, | |
| "loss": 0.5587, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.26351534024097484, | |
| "grad_norm": 0.7752894759178162, | |
| "learning_rate": 3.6825432501718285e-05, | |
| "loss": 0.5308, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.2638410699569711, | |
| "grad_norm": 0.9658520817756653, | |
| "learning_rate": 3.6809145485401944e-05, | |
| "loss": 0.5394, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.26416679967296736, | |
| "grad_norm": 0.3100132644176483, | |
| "learning_rate": 3.6792858469085616e-05, | |
| "loss": 0.5616, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.2644925293889636, | |
| "grad_norm": 1.0838834047317505, | |
| "learning_rate": 3.677657145276928e-05, | |
| "loss": 0.5374, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.2648182591049599, | |
| "grad_norm": 0.9311345219612122, | |
| "learning_rate": 3.6760284436452954e-05, | |
| "loss": 0.5353, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.26514398882095613, | |
| "grad_norm": 0.32365360856056213, | |
| "learning_rate": 3.674399742013661e-05, | |
| "loss": 0.5493, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.2654697185369524, | |
| "grad_norm": 0.6390203833580017, | |
| "learning_rate": 3.6727710403820286e-05, | |
| "loss": 0.5205, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.26579544825294865, | |
| "grad_norm": 0.6106113195419312, | |
| "learning_rate": 3.671142338750395e-05, | |
| "loss": 0.5161, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.2661211779689449, | |
| "grad_norm": 0.4415883421897888, | |
| "learning_rate": 3.669513637118762e-05, | |
| "loss": 0.5235, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.26644690768494117, | |
| "grad_norm": 0.8828484416007996, | |
| "learning_rate": 3.667884935487128e-05, | |
| "loss": 0.5214, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.26677263740093743, | |
| "grad_norm": 0.8186760544776917, | |
| "learning_rate": 3.666256233855495e-05, | |
| "loss": 0.5435, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.2670983671169337, | |
| "grad_norm": 0.43989554047584534, | |
| "learning_rate": 3.664627532223862e-05, | |
| "loss": 0.5653, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.26742409683292995, | |
| "grad_norm": 1.083422303199768, | |
| "learning_rate": 3.662998830592229e-05, | |
| "loss": 0.5338, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.2677498265489262, | |
| "grad_norm": 0.40522611141204834, | |
| "learning_rate": 3.661370128960596e-05, | |
| "loss": 0.4892, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.26807555626492247, | |
| "grad_norm": 0.7010061740875244, | |
| "learning_rate": 3.659741427328962e-05, | |
| "loss": 0.5372, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.2684012859809188, | |
| "grad_norm": 0.9971382021903992, | |
| "learning_rate": 3.6581127256973284e-05, | |
| "loss": 0.501, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.26872701569691504, | |
| "grad_norm": 0.5222276449203491, | |
| "learning_rate": 3.656484024065696e-05, | |
| "loss": 0.5194, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.2690527454129113, | |
| "grad_norm": 0.724824845790863, | |
| "learning_rate": 3.654855322434062e-05, | |
| "loss": 0.499, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.26937847512890756, | |
| "grad_norm": 0.48272421956062317, | |
| "learning_rate": 3.653226620802429e-05, | |
| "loss": 0.486, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.2697042048449038, | |
| "grad_norm": 0.8187432885169983, | |
| "learning_rate": 3.6515979191707954e-05, | |
| "loss": 0.5634, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.2700299345609001, | |
| "grad_norm": 0.46917855739593506, | |
| "learning_rate": 3.6499692175391626e-05, | |
| "loss": 0.5468, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.27035566427689633, | |
| "grad_norm": 0.5338607430458069, | |
| "learning_rate": 3.648340515907529e-05, | |
| "loss": 0.481, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.2706813939928926, | |
| "grad_norm": 0.5420836806297302, | |
| "learning_rate": 3.646711814275896e-05, | |
| "loss": 0.5391, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.27100712370888885, | |
| "grad_norm": 0.5124307870864868, | |
| "learning_rate": 3.6450831126442624e-05, | |
| "loss": 0.5446, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.2713328534248851, | |
| "grad_norm": 0.5944223403930664, | |
| "learning_rate": 3.643454411012629e-05, | |
| "loss": 0.5759, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.27165858314088137, | |
| "grad_norm": 1.1431384086608887, | |
| "learning_rate": 3.641825709380996e-05, | |
| "loss": 0.5416, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.27198431285687763, | |
| "grad_norm": 0.9613766670227051, | |
| "learning_rate": 3.640197007749363e-05, | |
| "loss": 0.521, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.2723100425728739, | |
| "grad_norm": 0.7477935552597046, | |
| "learning_rate": 3.638568306117729e-05, | |
| "loss": 0.558, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.27263577228887015, | |
| "grad_norm": 0.47112804651260376, | |
| "learning_rate": 3.636939604486096e-05, | |
| "loss": 0.5083, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.2729615020048664, | |
| "grad_norm": 0.5914379954338074, | |
| "learning_rate": 3.6353109028544625e-05, | |
| "loss": 0.5776, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.27328723172086267, | |
| "grad_norm": 0.5500662326812744, | |
| "learning_rate": 3.63368220122283e-05, | |
| "loss": 0.5194, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.2736129614368589, | |
| "grad_norm": 0.41591793298721313, | |
| "learning_rate": 3.6320534995911956e-05, | |
| "loss": 0.5266, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.2739386911528552, | |
| "grad_norm": 1.080356478691101, | |
| "learning_rate": 3.630424797959563e-05, | |
| "loss": 0.4964, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.27426442086885144, | |
| "grad_norm": 0.40892690420150757, | |
| "learning_rate": 3.6287960963279294e-05, | |
| "loss": 0.5163, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.2745901505848477, | |
| "grad_norm": 0.7729841470718384, | |
| "learning_rate": 3.627167394696297e-05, | |
| "loss": 0.5336, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.27491588030084396, | |
| "grad_norm": 0.6264617443084717, | |
| "learning_rate": 3.6255386930646626e-05, | |
| "loss": 0.5762, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.2752416100168402, | |
| "grad_norm": 0.8050372004508972, | |
| "learning_rate": 3.623909991433029e-05, | |
| "loss": 0.4509, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.2755673397328365, | |
| "grad_norm": 0.621804416179657, | |
| "learning_rate": 3.6222812898013964e-05, | |
| "loss": 0.5174, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.27589306944883274, | |
| "grad_norm": 0.5717790126800537, | |
| "learning_rate": 3.620652588169763e-05, | |
| "loss": 0.5431, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.276218799164829, | |
| "grad_norm": 0.394345223903656, | |
| "learning_rate": 3.6190238865381295e-05, | |
| "loss": 0.5294, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.27654452888082526, | |
| "grad_norm": 0.8917814493179321, | |
| "learning_rate": 3.617395184906496e-05, | |
| "loss": 0.4955, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.2768702585968215, | |
| "grad_norm": 0.721481442451477, | |
| "learning_rate": 3.6157664832748634e-05, | |
| "loss": 0.5433, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.2771959883128178, | |
| "grad_norm": 0.6476948857307434, | |
| "learning_rate": 3.61413778164323e-05, | |
| "loss": 0.563, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.27752171802881404, | |
| "grad_norm": 0.38036003708839417, | |
| "learning_rate": 3.6125090800115965e-05, | |
| "loss": 0.516, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.2778474477448103, | |
| "grad_norm": 0.6185033917427063, | |
| "learning_rate": 3.610880378379963e-05, | |
| "loss": 0.5178, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.27817317746080655, | |
| "grad_norm": 0.8313725590705872, | |
| "learning_rate": 3.60925167674833e-05, | |
| "loss": 0.5296, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.2784989071768028, | |
| "grad_norm": 0.5369439721107483, | |
| "learning_rate": 3.607622975116697e-05, | |
| "loss": 0.5803, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.27882463689279907, | |
| "grad_norm": 0.7777513265609741, | |
| "learning_rate": 3.6059942734850635e-05, | |
| "loss": 0.4875, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.27915036660879533, | |
| "grad_norm": 0.5527925491333008, | |
| "learning_rate": 3.60436557185343e-05, | |
| "loss": 0.5141, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.2794760963247916, | |
| "grad_norm": 0.8335199356079102, | |
| "learning_rate": 3.6027368702217966e-05, | |
| "loss": 0.4851, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.27980182604078785, | |
| "grad_norm": 0.7015230059623718, | |
| "learning_rate": 3.601108168590163e-05, | |
| "loss": 0.5395, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.2801275557567841, | |
| "grad_norm": 0.7245033979415894, | |
| "learning_rate": 3.5994794669585305e-05, | |
| "loss": 0.5204, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.2804532854727804, | |
| "grad_norm": 0.8472508192062378, | |
| "learning_rate": 3.5978507653268964e-05, | |
| "loss": 0.5087, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.2807790151887767, | |
| "grad_norm": 0.7517431974411011, | |
| "learning_rate": 3.5962220636952636e-05, | |
| "loss": 0.5176, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.28110474490477294, | |
| "grad_norm": 0.5864343643188477, | |
| "learning_rate": 3.59459336206363e-05, | |
| "loss": 0.5828, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.2814304746207692, | |
| "grad_norm": 0.8981267809867859, | |
| "learning_rate": 3.5929646604319974e-05, | |
| "loss": 0.5309, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.28175620433676546, | |
| "grad_norm": 0.8167164325714111, | |
| "learning_rate": 3.591335958800364e-05, | |
| "loss": 0.5513, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.2820819340527617, | |
| "grad_norm": 0.7764830589294434, | |
| "learning_rate": 3.58970725716873e-05, | |
| "loss": 0.5249, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.282407663768758, | |
| "grad_norm": 0.7545201182365417, | |
| "learning_rate": 3.588078555537097e-05, | |
| "loss": 0.5293, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.28273339348475424, | |
| "grad_norm": 0.6954336166381836, | |
| "learning_rate": 3.586449853905464e-05, | |
| "loss": 0.5532, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.2830591232007505, | |
| "grad_norm": 0.6742025017738342, | |
| "learning_rate": 3.584821152273831e-05, | |
| "loss": 0.5356, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.28338485291674675, | |
| "grad_norm": 0.731679379940033, | |
| "learning_rate": 3.583192450642197e-05, | |
| "loss": 0.5128, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.283710582632743, | |
| "grad_norm": 0.7906468510627747, | |
| "learning_rate": 3.581563749010564e-05, | |
| "loss": 0.5359, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.2840363123487393, | |
| "grad_norm": 0.36753523349761963, | |
| "learning_rate": 3.579935047378931e-05, | |
| "loss": 0.5366, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.28436204206473553, | |
| "grad_norm": 0.6043976545333862, | |
| "learning_rate": 3.578306345747298e-05, | |
| "loss": 0.4995, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.2846877717807318, | |
| "grad_norm": 0.7573038339614868, | |
| "learning_rate": 3.576677644115664e-05, | |
| "loss": 0.5093, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.28501350149672805, | |
| "grad_norm": 0.25290992856025696, | |
| "learning_rate": 3.5750489424840304e-05, | |
| "loss": 0.4948, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.2853392312127243, | |
| "grad_norm": 0.6551434397697449, | |
| "learning_rate": 3.5734202408523977e-05, | |
| "loss": 0.5116, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.28566496092872057, | |
| "grad_norm": 0.6715214252471924, | |
| "learning_rate": 3.571791539220764e-05, | |
| "loss": 0.6104, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.2859906906447168, | |
| "grad_norm": 0.7275449633598328, | |
| "learning_rate": 3.570162837589131e-05, | |
| "loss": 0.506, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.2863164203607131, | |
| "grad_norm": 0.2885235846042633, | |
| "learning_rate": 3.5685341359574974e-05, | |
| "loss": 0.4684, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.28664215007670935, | |
| "grad_norm": 0.9342713356018066, | |
| "learning_rate": 3.5669054343258646e-05, | |
| "loss": 0.5293, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.2869678797927056, | |
| "grad_norm": 1.0423755645751953, | |
| "learning_rate": 3.565276732694231e-05, | |
| "loss": 0.5466, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.28729360950870186, | |
| "grad_norm": 1.0259456634521484, | |
| "learning_rate": 3.563648031062598e-05, | |
| "loss": 0.4885, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.2876193392246981, | |
| "grad_norm": 0.8733958601951599, | |
| "learning_rate": 3.5620193294309643e-05, | |
| "loss": 0.5353, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.2879450689406944, | |
| "grad_norm": 0.33869871497154236, | |
| "learning_rate": 3.560390627799331e-05, | |
| "loss": 0.5465, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.28827079865669064, | |
| "grad_norm": 0.5838894844055176, | |
| "learning_rate": 3.558761926167698e-05, | |
| "loss": 0.555, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.2885965283726869, | |
| "grad_norm": 0.8616543412208557, | |
| "learning_rate": 3.557133224536065e-05, | |
| "loss": 0.5173, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.28892225808868316, | |
| "grad_norm": 0.8486323356628418, | |
| "learning_rate": 3.555504522904431e-05, | |
| "loss": 0.5258, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.2892479878046794, | |
| "grad_norm": 0.6569567918777466, | |
| "learning_rate": 3.553875821272798e-05, | |
| "loss": 0.5097, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.2895737175206757, | |
| "grad_norm": 0.6821163296699524, | |
| "learning_rate": 3.5522471196411645e-05, | |
| "loss": 0.5428, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.28989944723667194, | |
| "grad_norm": 0.6147534251213074, | |
| "learning_rate": 3.550618418009532e-05, | |
| "loss": 0.5544, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.2902251769526682, | |
| "grad_norm": 0.42478904128074646, | |
| "learning_rate": 3.5489897163778976e-05, | |
| "loss": 0.5376, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.29055090666866445, | |
| "grad_norm": 0.5254961252212524, | |
| "learning_rate": 3.547361014746265e-05, | |
| "loss": 0.4964, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.2908766363846607, | |
| "grad_norm": 0.6934669017791748, | |
| "learning_rate": 3.5457323131146314e-05, | |
| "loss": 0.4835, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.291202366100657, | |
| "grad_norm": 0.4250465929508209, | |
| "learning_rate": 3.544103611482999e-05, | |
| "loss": 0.4954, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.29152809581665323, | |
| "grad_norm": 0.6067728996276855, | |
| "learning_rate": 3.5424749098513646e-05, | |
| "loss": 0.4926, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.2918538255326495, | |
| "grad_norm": 0.5424463748931885, | |
| "learning_rate": 3.540846208219731e-05, | |
| "loss": 0.5627, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.2921795552486458, | |
| "grad_norm": 0.5810889005661011, | |
| "learning_rate": 3.5392175065880984e-05, | |
| "loss": 0.4316, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.29250528496464206, | |
| "grad_norm": 0.4583912491798401, | |
| "learning_rate": 3.537588804956465e-05, | |
| "loss": 0.4987, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.2928310146806383, | |
| "grad_norm": 0.4320780634880066, | |
| "learning_rate": 3.5359601033248315e-05, | |
| "loss": 0.5204, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.2931567443966346, | |
| "grad_norm": 0.6955101490020752, | |
| "learning_rate": 3.534331401693198e-05, | |
| "loss": 0.5179, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.29348247411263084, | |
| "grad_norm": 0.512250542640686, | |
| "learning_rate": 3.5327027000615654e-05, | |
| "loss": 0.4909, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.2938082038286271, | |
| "grad_norm": 0.7975231409072876, | |
| "learning_rate": 3.531073998429932e-05, | |
| "loss": 0.4845, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.29413393354462336, | |
| "grad_norm": 0.25338149070739746, | |
| "learning_rate": 3.5294452967982985e-05, | |
| "loss": 0.4963, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.2944596632606196, | |
| "grad_norm": 0.43115437030792236, | |
| "learning_rate": 3.527816595166665e-05, | |
| "loss": 0.5203, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.2947853929766159, | |
| "grad_norm": 0.830754280090332, | |
| "learning_rate": 3.5261878935350317e-05, | |
| "loss": 0.4916, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.29511112269261214, | |
| "grad_norm": 0.8370751738548279, | |
| "learning_rate": 3.524559191903399e-05, | |
| "loss": 0.547, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.2954368524086084, | |
| "grad_norm": 0.7122400403022766, | |
| "learning_rate": 3.5229304902717655e-05, | |
| "loss": 0.5126, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.29576258212460466, | |
| "grad_norm": 0.4084763824939728, | |
| "learning_rate": 3.521301788640132e-05, | |
| "loss": 0.4971, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.2960883118406009, | |
| "grad_norm": 0.8079352974891663, | |
| "learning_rate": 3.5196730870084986e-05, | |
| "loss": 0.4992, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.2964140415565972, | |
| "grad_norm": 0.25352516770362854, | |
| "learning_rate": 3.518044385376865e-05, | |
| "loss": 0.5333, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.29673977127259343, | |
| "grad_norm": 0.5390329957008362, | |
| "learning_rate": 3.5164156837452324e-05, | |
| "loss": 0.5007, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.2970655009885897, | |
| "grad_norm": 0.6617804765701294, | |
| "learning_rate": 3.514786982113599e-05, | |
| "loss": 0.548, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.29739123070458595, | |
| "grad_norm": 0.7202132940292358, | |
| "learning_rate": 3.5131582804819656e-05, | |
| "loss": 0.5417, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.2977169604205822, | |
| "grad_norm": 0.28012895584106445, | |
| "learning_rate": 3.511529578850332e-05, | |
| "loss": 0.4883, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.29804269013657847, | |
| "grad_norm": 0.3527827560901642, | |
| "learning_rate": 3.5099008772186994e-05, | |
| "loss": 0.523, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.29836841985257473, | |
| "grad_norm": 0.7193790078163147, | |
| "learning_rate": 3.508272175587066e-05, | |
| "loss": 0.5148, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.298694149568571, | |
| "grad_norm": 0.9702345728874207, | |
| "learning_rate": 3.506643473955432e-05, | |
| "loss": 0.4781, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.29901987928456725, | |
| "grad_norm": 0.7323670983314514, | |
| "learning_rate": 3.505014772323799e-05, | |
| "loss": 0.5394, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.2993456090005635, | |
| "grad_norm": 0.6757960915565491, | |
| "learning_rate": 3.503386070692166e-05, | |
| "loss": 0.4984, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.29967133871655977, | |
| "grad_norm": 0.7119109630584717, | |
| "learning_rate": 3.501757369060533e-05, | |
| "loss": 0.5502, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.299997068432556, | |
| "grad_norm": 0.6820542216300964, | |
| "learning_rate": 3.500128667428899e-05, | |
| "loss": 0.5498, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.3003227981485523, | |
| "grad_norm": 0.784050703048706, | |
| "learning_rate": 3.498499965797266e-05, | |
| "loss": 0.5445, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.30064852786454854, | |
| "grad_norm": 0.6549366116523743, | |
| "learning_rate": 3.496871264165633e-05, | |
| "loss": 0.5326, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.3009742575805448, | |
| "grad_norm": 0.4872061014175415, | |
| "learning_rate": 3.495242562533999e-05, | |
| "loss": 0.5093, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.30129998729654106, | |
| "grad_norm": 0.3646996319293976, | |
| "learning_rate": 3.493613860902366e-05, | |
| "loss": 0.5476, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.3016257170125373, | |
| "grad_norm": 0.5709706544876099, | |
| "learning_rate": 3.4919851592707324e-05, | |
| "loss": 0.4513, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.3019514467285336, | |
| "grad_norm": 0.6031984090805054, | |
| "learning_rate": 3.4903564576390996e-05, | |
| "loss": 0.5044, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.30227717644452984, | |
| "grad_norm": 0.8381587862968445, | |
| "learning_rate": 3.488727756007466e-05, | |
| "loss": 0.5128, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.3026029061605261, | |
| "grad_norm": 1.0859401226043701, | |
| "learning_rate": 3.487099054375833e-05, | |
| "loss": 0.5328, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.30292863587652236, | |
| "grad_norm": 0.34642109274864197, | |
| "learning_rate": 3.4854703527441994e-05, | |
| "loss": 0.4852, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.3032543655925186, | |
| "grad_norm": 0.6529460549354553, | |
| "learning_rate": 3.483841651112566e-05, | |
| "loss": 0.5032, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.3035800953085149, | |
| "grad_norm": 0.7026881575584412, | |
| "learning_rate": 3.482212949480933e-05, | |
| "loss": 0.6338, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.30390582502451113, | |
| "grad_norm": 0.49741417169570923, | |
| "learning_rate": 3.4805842478493e-05, | |
| "loss": 0.5231, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.30423155474050745, | |
| "grad_norm": 0.6611301898956299, | |
| "learning_rate": 3.478955546217666e-05, | |
| "loss": 0.5189, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.3045572844565037, | |
| "grad_norm": 0.6907228827476501, | |
| "learning_rate": 3.477326844586033e-05, | |
| "loss": 0.5256, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.30488301417249997, | |
| "grad_norm": 0.5975654721260071, | |
| "learning_rate": 3.4756981429544e-05, | |
| "loss": 0.522, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.3052087438884962, | |
| "grad_norm": 0.6043006777763367, | |
| "learning_rate": 3.474069441322767e-05, | |
| "loss": 0.5018, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.3055344736044925, | |
| "grad_norm": 0.5697898864746094, | |
| "learning_rate": 3.4724407396911326e-05, | |
| "loss": 0.5009, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.30586020332048874, | |
| "grad_norm": 0.40364518761634827, | |
| "learning_rate": 3.4708120380595e-05, | |
| "loss": 0.4642, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.306185933036485, | |
| "grad_norm": 0.940877377986908, | |
| "learning_rate": 3.4691833364278664e-05, | |
| "loss": 0.5136, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.30651166275248126, | |
| "grad_norm": 0.7497209310531616, | |
| "learning_rate": 3.467554634796234e-05, | |
| "loss": 0.5261, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.3068373924684775, | |
| "grad_norm": 0.8120318651199341, | |
| "learning_rate": 3.4659259331645996e-05, | |
| "loss": 0.4756, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.3071631221844738, | |
| "grad_norm": 0.6802115440368652, | |
| "learning_rate": 3.464297231532967e-05, | |
| "loss": 0.5257, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.30748885190047004, | |
| "grad_norm": 0.43083488941192627, | |
| "learning_rate": 3.4626685299013334e-05, | |
| "loss": 0.5365, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.3078145816164663, | |
| "grad_norm": 0.6194273233413696, | |
| "learning_rate": 3.4610398282697e-05, | |
| "loss": 0.5157, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.30814031133246256, | |
| "grad_norm": 0.5603410601615906, | |
| "learning_rate": 3.4594111266380666e-05, | |
| "loss": 0.51, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.3084660410484588, | |
| "grad_norm": 1.0651506185531616, | |
| "learning_rate": 3.457782425006433e-05, | |
| "loss": 0.4759, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.3087917707644551, | |
| "grad_norm": 0.7674971222877502, | |
| "learning_rate": 3.4561537233748004e-05, | |
| "loss": 0.467, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.30911750048045133, | |
| "grad_norm": 0.9666951298713684, | |
| "learning_rate": 3.454525021743167e-05, | |
| "loss": 0.5524, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.3094432301964476, | |
| "grad_norm": 0.6148163080215454, | |
| "learning_rate": 3.4528963201115335e-05, | |
| "loss": 0.5345, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.30976895991244385, | |
| "grad_norm": 0.7641096711158752, | |
| "learning_rate": 3.4512676184799e-05, | |
| "loss": 0.4872, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.3100946896284401, | |
| "grad_norm": 0.6152538657188416, | |
| "learning_rate": 3.449638916848267e-05, | |
| "loss": 0.4832, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.31042041934443637, | |
| "grad_norm": 0.7761083841323853, | |
| "learning_rate": 3.448010215216634e-05, | |
| "loss": 0.4761, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.31074614906043263, | |
| "grad_norm": 0.6005348563194275, | |
| "learning_rate": 3.4463815135850005e-05, | |
| "loss": 0.4585, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.3110718787764289, | |
| "grad_norm": 0.7649496793746948, | |
| "learning_rate": 3.444752811953367e-05, | |
| "loss": 0.5283, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.31139760849242515, | |
| "grad_norm": 0.9503573179244995, | |
| "learning_rate": 3.4431241103217336e-05, | |
| "loss": 0.5032, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.3117233382084214, | |
| "grad_norm": 0.8403215408325195, | |
| "learning_rate": 3.441495408690101e-05, | |
| "loss": 0.5172, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.31204906792441767, | |
| "grad_norm": 0.5137957334518433, | |
| "learning_rate": 3.4398667070584675e-05, | |
| "loss": 0.5551, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.3123747976404139, | |
| "grad_norm": 0.6618998646736145, | |
| "learning_rate": 3.438238005426834e-05, | |
| "loss": 0.5237, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.3127005273564102, | |
| "grad_norm": 0.3272695541381836, | |
| "learning_rate": 3.4366093037952006e-05, | |
| "loss": 0.4556, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.31302625707240644, | |
| "grad_norm": 0.7416215538978577, | |
| "learning_rate": 3.434980602163567e-05, | |
| "loss": 0.5039, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.3133519867884027, | |
| "grad_norm": 0.9183087944984436, | |
| "learning_rate": 3.4333519005319344e-05, | |
| "loss": 0.5408, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.31367771650439896, | |
| "grad_norm": 0.3782617151737213, | |
| "learning_rate": 3.431723198900301e-05, | |
| "loss": 0.5113, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 0.3140034462203952, | |
| "grad_norm": 0.6314922571182251, | |
| "learning_rate": 3.4300944972686676e-05, | |
| "loss": 0.4955, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.3143291759363915, | |
| "grad_norm": 0.3009500801563263, | |
| "learning_rate": 3.428465795637034e-05, | |
| "loss": 0.5114, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.31465490565238774, | |
| "grad_norm": 0.8378229737281799, | |
| "learning_rate": 3.4268370940054014e-05, | |
| "loss": 0.5287, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.314980635368384, | |
| "grad_norm": 0.7249593138694763, | |
| "learning_rate": 3.425208392373768e-05, | |
| "loss": 0.5209, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 0.31530636508438026, | |
| "grad_norm": 0.45489412546157837, | |
| "learning_rate": 3.423579690742134e-05, | |
| "loss": 0.5745, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.3156320948003765, | |
| "grad_norm": 0.6379255056381226, | |
| "learning_rate": 3.421950989110501e-05, | |
| "loss": 0.5199, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 0.31595782451637283, | |
| "grad_norm": 0.8550392389297485, | |
| "learning_rate": 3.420322287478868e-05, | |
| "loss": 0.5374, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.3162835542323691, | |
| "grad_norm": 0.5571677684783936, | |
| "learning_rate": 3.418693585847235e-05, | |
| "loss": 0.5057, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 0.31660928394836535, | |
| "grad_norm": 0.48302140831947327, | |
| "learning_rate": 3.417064884215601e-05, | |
| "loss": 0.5496, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.3169350136643616, | |
| "grad_norm": 0.7864711284637451, | |
| "learning_rate": 3.415436182583968e-05, | |
| "loss": 0.5132, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 0.31726074338035787, | |
| "grad_norm": 0.5517250299453735, | |
| "learning_rate": 3.413807480952335e-05, | |
| "loss": 0.4826, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.3175864730963541, | |
| "grad_norm": 0.7834230065345764, | |
| "learning_rate": 3.412178779320701e-05, | |
| "loss": 0.5186, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.3179122028123504, | |
| "grad_norm": 0.938097357749939, | |
| "learning_rate": 3.410550077689068e-05, | |
| "loss": 0.4817, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.31823793252834665, | |
| "grad_norm": 0.25078582763671875, | |
| "learning_rate": 3.4089213760574344e-05, | |
| "loss": 0.4996, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 0.3185636622443429, | |
| "grad_norm": 0.7896013259887695, | |
| "learning_rate": 3.4072926744258016e-05, | |
| "loss": 0.5163, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.31888939196033916, | |
| "grad_norm": 0.6857266426086426, | |
| "learning_rate": 3.405663972794168e-05, | |
| "loss": 0.4952, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 0.3192151216763354, | |
| "grad_norm": 0.5710707306861877, | |
| "learning_rate": 3.404035271162535e-05, | |
| "loss": 0.5273, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.3195408513923317, | |
| "grad_norm": 0.5274339914321899, | |
| "learning_rate": 3.4024065695309014e-05, | |
| "loss": 0.5385, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 0.31986658110832794, | |
| "grad_norm": 0.27135804295539856, | |
| "learning_rate": 3.400777867899268e-05, | |
| "loss": 0.5042, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.3201923108243242, | |
| "grad_norm": 0.6852828860282898, | |
| "learning_rate": 3.399149166267635e-05, | |
| "loss": 0.5214, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 0.32051804054032046, | |
| "grad_norm": 0.5614081621170044, | |
| "learning_rate": 3.397520464636002e-05, | |
| "loss": 0.5023, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.3208437702563167, | |
| "grad_norm": 0.7719017863273621, | |
| "learning_rate": 3.395891763004368e-05, | |
| "loss": 0.4919, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.321169499972313, | |
| "grad_norm": 0.8100476264953613, | |
| "learning_rate": 3.394263061372735e-05, | |
| "loss": 0.4607, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.32149522968830924, | |
| "grad_norm": 0.6814531087875366, | |
| "learning_rate": 3.392634359741102e-05, | |
| "loss": 0.5457, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 0.3218209594043055, | |
| "grad_norm": 1.0356829166412354, | |
| "learning_rate": 3.391005658109469e-05, | |
| "loss": 0.4844, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.32214668912030175, | |
| "grad_norm": 0.8719603419303894, | |
| "learning_rate": 3.3893769564778346e-05, | |
| "loss": 0.5182, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 0.322472418836298, | |
| "grad_norm": 0.6145396828651428, | |
| "learning_rate": 3.387748254846202e-05, | |
| "loss": 0.4732, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.3227981485522943, | |
| "grad_norm": 1.005679726600647, | |
| "learning_rate": 3.3861195532145684e-05, | |
| "loss": 0.5182, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 0.32312387826829053, | |
| "grad_norm": 0.29751360416412354, | |
| "learning_rate": 3.384490851582936e-05, | |
| "loss": 0.4823, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.3234496079842868, | |
| "grad_norm": 0.7968891263008118, | |
| "learning_rate": 3.3828621499513016e-05, | |
| "loss": 0.5235, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 0.32377533770028305, | |
| "grad_norm": 0.7049364447593689, | |
| "learning_rate": 3.381233448319669e-05, | |
| "loss": 0.5392, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.3241010674162793, | |
| "grad_norm": 0.6265050172805786, | |
| "learning_rate": 3.3796047466880354e-05, | |
| "loss": 0.5119, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.32442679713227557, | |
| "grad_norm": 0.6732152104377747, | |
| "learning_rate": 3.377976045056402e-05, | |
| "loss": 0.4837, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.3247525268482718, | |
| "grad_norm": 0.25657424330711365, | |
| "learning_rate": 3.3763473434247686e-05, | |
| "loss": 0.5199, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 0.3250782565642681, | |
| "grad_norm": 0.4994146227836609, | |
| "learning_rate": 3.374718641793135e-05, | |
| "loss": 0.4894, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.32540398628026435, | |
| "grad_norm": 0.7468940615653992, | |
| "learning_rate": 3.3730899401615024e-05, | |
| "loss": 0.5409, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 0.3257297159962606, | |
| "grad_norm": 0.17829063534736633, | |
| "learning_rate": 3.371461238529869e-05, | |
| "loss": 0.5111, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.32605544571225686, | |
| "grad_norm": 0.6492403745651245, | |
| "learning_rate": 3.369832536898236e-05, | |
| "loss": 0.5085, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 0.3263811754282531, | |
| "grad_norm": 0.41203296184539795, | |
| "learning_rate": 3.368203835266602e-05, | |
| "loss": 0.4674, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 0.3267069051442494, | |
| "grad_norm": 0.6258901953697205, | |
| "learning_rate": 3.366575133634969e-05, | |
| "loss": 0.4797, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 0.32703263486024564, | |
| "grad_norm": 0.5243533849716187, | |
| "learning_rate": 3.364946432003336e-05, | |
| "loss": 0.4851, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.3273583645762419, | |
| "grad_norm": 0.7344015836715698, | |
| "learning_rate": 3.3633177303717025e-05, | |
| "loss": 0.4964, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.32768409429223816, | |
| "grad_norm": 1.1914827823638916, | |
| "learning_rate": 3.361689028740069e-05, | |
| "loss": 0.4923, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 0.3280098240082345, | |
| "grad_norm": 0.7036446928977966, | |
| "learning_rate": 3.3600603271084356e-05, | |
| "loss": 0.5234, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 0.32833555372423073, | |
| "grad_norm": 0.8239650726318359, | |
| "learning_rate": 3.358431625476803e-05, | |
| "loss": 0.4715, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.328661283440227, | |
| "grad_norm": 0.6158246397972107, | |
| "learning_rate": 3.3568029238451695e-05, | |
| "loss": 0.488, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 0.32898701315622325, | |
| "grad_norm": 0.708604633808136, | |
| "learning_rate": 3.355174222213536e-05, | |
| "loss": 0.4674, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.3293127428722195, | |
| "grad_norm": 0.5420898199081421, | |
| "learning_rate": 3.3535455205819026e-05, | |
| "loss": 0.4741, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 0.32963847258821577, | |
| "grad_norm": 0.49769943952560425, | |
| "learning_rate": 3.351916818950269e-05, | |
| "loss": 0.4638, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.32996420230421203, | |
| "grad_norm": 0.7099531888961792, | |
| "learning_rate": 3.3502881173186364e-05, | |
| "loss": 0.5236, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 0.3302899320202083, | |
| "grad_norm": 0.712815523147583, | |
| "learning_rate": 3.348659415687003e-05, | |
| "loss": 0.5268, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 0.33061566173620455, | |
| "grad_norm": 0.8762120008468628, | |
| "learning_rate": 3.3470307140553696e-05, | |
| "loss": 0.5045, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.3309413914522008, | |
| "grad_norm": 0.7411269545555115, | |
| "learning_rate": 3.345402012423736e-05, | |
| "loss": 0.5017, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.33126712116819707, | |
| "grad_norm": 0.7993664145469666, | |
| "learning_rate": 3.343773310792103e-05, | |
| "loss": 0.4866, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 0.3315928508841933, | |
| "grad_norm": 0.9997897148132324, | |
| "learning_rate": 3.34214460916047e-05, | |
| "loss": 0.5033, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 0.3319185806001896, | |
| "grad_norm": 0.3995771110057831, | |
| "learning_rate": 3.340515907528836e-05, | |
| "loss": 0.5037, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 0.33224431031618584, | |
| "grad_norm": 0.4990951418876648, | |
| "learning_rate": 3.338887205897203e-05, | |
| "loss": 0.5353, | |
| "step": 102000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 307003, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.515680604094464e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |