|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.997242140099283, |
|
"eval_steps": 500, |
|
"global_step": 339, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00882515168229454, |
|
"grad_norm": 0.40674829483032227, |
|
"learning_rate": 9.999785297426788e-05, |
|
"loss": 0.2055, |
|
"num_input_tokens_seen": 203120, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01765030336458908, |
|
"grad_norm": 0.4242195785045624, |
|
"learning_rate": 9.999141208146028e-05, |
|
"loss": 0.1902, |
|
"num_input_tokens_seen": 406048, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.026475455046883617, |
|
"grad_norm": 0.3813261389732361, |
|
"learning_rate": 9.998067787472772e-05, |
|
"loss": 0.1421, |
|
"num_input_tokens_seen": 614736, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03530060672917816, |
|
"grad_norm": 0.28003761172294617, |
|
"learning_rate": 9.996565127593488e-05, |
|
"loss": 0.1102, |
|
"num_input_tokens_seen": 816416, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0441257584114727, |
|
"grad_norm": 0.25300610065460205, |
|
"learning_rate": 9.994633357558158e-05, |
|
"loss": 0.0801, |
|
"num_input_tokens_seen": 1024272, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.052950910093767234, |
|
"grad_norm": 0.2328871786594391, |
|
"learning_rate": 9.99227264326918e-05, |
|
"loss": 0.0574, |
|
"num_input_tokens_seen": 1228192, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06177606177606178, |
|
"grad_norm": 0.17362241446971893, |
|
"learning_rate": 9.989483187467127e-05, |
|
"loss": 0.0401, |
|
"num_input_tokens_seen": 1434992, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07060121345835632, |
|
"grad_norm": 0.09250874817371368, |
|
"learning_rate": 9.986265229713331e-05, |
|
"loss": 0.0295, |
|
"num_input_tokens_seen": 1646560, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07942636514065085, |
|
"grad_norm": 0.08936059474945068, |
|
"learning_rate": 9.982619046369321e-05, |
|
"loss": 0.0262, |
|
"num_input_tokens_seen": 1838624, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0882515168229454, |
|
"grad_norm": 0.08603595942258835, |
|
"learning_rate": 9.978544950573074e-05, |
|
"loss": 0.0263, |
|
"num_input_tokens_seen": 2053488, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09707666850523994, |
|
"grad_norm": 0.07848804444074631, |
|
"learning_rate": 9.974043292212128e-05, |
|
"loss": 0.022, |
|
"num_input_tokens_seen": 2253680, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.10590182018753447, |
|
"grad_norm": 0.06246768683195114, |
|
"learning_rate": 9.96911445789354e-05, |
|
"loss": 0.0202, |
|
"num_input_tokens_seen": 2442000, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.11472697186982901, |
|
"grad_norm": 0.048259809613227844, |
|
"learning_rate": 9.963758870910671e-05, |
|
"loss": 0.0202, |
|
"num_input_tokens_seen": 2655920, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.12355212355212356, |
|
"grad_norm": 0.03917853534221649, |
|
"learning_rate": 9.957976991206846e-05, |
|
"loss": 0.0178, |
|
"num_input_tokens_seen": 2874064, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.13237727523441808, |
|
"grad_norm": 0.040510393679142, |
|
"learning_rate": 9.951769315335844e-05, |
|
"loss": 0.0158, |
|
"num_input_tokens_seen": 3071744, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.14120242691671264, |
|
"grad_norm": 0.035558607429265976, |
|
"learning_rate": 9.945136376419259e-05, |
|
"loss": 0.0159, |
|
"num_input_tokens_seen": 3277904, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.15002757859900717, |
|
"grad_norm": 0.034995947033166885, |
|
"learning_rate": 9.938078744100712e-05, |
|
"loss": 0.0147, |
|
"num_input_tokens_seen": 3493136, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1588527302813017, |
|
"grad_norm": 0.03230876475572586, |
|
"learning_rate": 9.930597024496931e-05, |
|
"loss": 0.0138, |
|
"num_input_tokens_seen": 3704288, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.16767788196359626, |
|
"grad_norm": 0.028281500563025475, |
|
"learning_rate": 9.922691860145696e-05, |
|
"loss": 0.0128, |
|
"num_input_tokens_seen": 3904352, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1765030336458908, |
|
"grad_norm": 0.026264235377311707, |
|
"learning_rate": 9.914363929950659e-05, |
|
"loss": 0.0124, |
|
"num_input_tokens_seen": 4113888, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18532818532818532, |
|
"grad_norm": 0.023232094943523407, |
|
"learning_rate": 9.905613949123036e-05, |
|
"loss": 0.0116, |
|
"num_input_tokens_seen": 4323504, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.19415333701047988, |
|
"grad_norm": 0.02393435873091221, |
|
"learning_rate": 9.896442669120187e-05, |
|
"loss": 0.0109, |
|
"num_input_tokens_seen": 4523008, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.2029784886927744, |
|
"grad_norm": 0.024421676993370056, |
|
"learning_rate": 9.886850877581079e-05, |
|
"loss": 0.0106, |
|
"num_input_tokens_seen": 4732864, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.21180364037506894, |
|
"grad_norm": 0.022869078442454338, |
|
"learning_rate": 9.876839398258641e-05, |
|
"loss": 0.0099, |
|
"num_input_tokens_seen": 4941936, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2206287920573635, |
|
"grad_norm": 0.025933578610420227, |
|
"learning_rate": 9.866409090949022e-05, |
|
"loss": 0.0109, |
|
"num_input_tokens_seen": 5143584, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.22945394373965802, |
|
"grad_norm": 0.02043001353740692, |
|
"learning_rate": 9.855560851417752e-05, |
|
"loss": 0.0084, |
|
"num_input_tokens_seen": 5351024, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.23827909542195255, |
|
"grad_norm": 0.02140035293996334, |
|
"learning_rate": 9.844295611322804e-05, |
|
"loss": 0.0081, |
|
"num_input_tokens_seen": 5563760, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2471042471042471, |
|
"grad_norm": 0.019948888570070267, |
|
"learning_rate": 9.832614338134595e-05, |
|
"loss": 0.0078, |
|
"num_input_tokens_seen": 5772416, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.25592939878654164, |
|
"grad_norm": 0.021153336390852928, |
|
"learning_rate": 9.820518035052889e-05, |
|
"loss": 0.0081, |
|
"num_input_tokens_seen": 5974464, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.26475455046883617, |
|
"grad_norm": 0.02002059668302536, |
|
"learning_rate": 9.808007740920646e-05, |
|
"loss": 0.0087, |
|
"num_input_tokens_seen": 6193520, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2735797021511307, |
|
"grad_norm": 0.029256833717226982, |
|
"learning_rate": 9.795084530134801e-05, |
|
"loss": 0.0079, |
|
"num_input_tokens_seen": 6399792, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.2824048538334253, |
|
"grad_norm": 0.02395695447921753, |
|
"learning_rate": 9.781749512553999e-05, |
|
"loss": 0.0086, |
|
"num_input_tokens_seen": 6603584, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2912300055157198, |
|
"grad_norm": 0.02185678854584694, |
|
"learning_rate": 9.768003833403278e-05, |
|
"loss": 0.0079, |
|
"num_input_tokens_seen": 6810656, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.30005515719801434, |
|
"grad_norm": 0.02072463184595108, |
|
"learning_rate": 9.753848673175707e-05, |
|
"loss": 0.0069, |
|
"num_input_tokens_seen": 7001792, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3088803088803089, |
|
"grad_norm": 0.018024709075689316, |
|
"learning_rate": 9.739285247531018e-05, |
|
"loss": 0.0064, |
|
"num_input_tokens_seen": 7205952, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3177054605626034, |
|
"grad_norm": 0.019729286432266235, |
|
"learning_rate": 9.724314807191195e-05, |
|
"loss": 0.006, |
|
"num_input_tokens_seen": 7406304, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.01830880530178547, |
|
"learning_rate": 9.708938637833065e-05, |
|
"loss": 0.0067, |
|
"num_input_tokens_seen": 7629568, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3353557639271925, |
|
"grad_norm": 0.021113887429237366, |
|
"learning_rate": 9.693158059977878e-05, |
|
"loss": 0.0063, |
|
"num_input_tokens_seen": 7845200, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.34418091560948705, |
|
"grad_norm": 0.015138108283281326, |
|
"learning_rate": 9.676974428877901e-05, |
|
"loss": 0.0058, |
|
"num_input_tokens_seen": 8061840, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3530060672917816, |
|
"grad_norm": 0.017043087631464005, |
|
"learning_rate": 9.660389134400033e-05, |
|
"loss": 0.0061, |
|
"num_input_tokens_seen": 8279664, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3618312189740761, |
|
"grad_norm": 0.01955767348408699, |
|
"learning_rate": 9.643403600906433e-05, |
|
"loss": 0.0055, |
|
"num_input_tokens_seen": 8475376, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.37065637065637064, |
|
"grad_norm": 0.014688636176288128, |
|
"learning_rate": 9.626019287132203e-05, |
|
"loss": 0.005, |
|
"num_input_tokens_seen": 8691760, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3794815223386652, |
|
"grad_norm": 0.01973150670528412, |
|
"learning_rate": 9.608237686060099e-05, |
|
"loss": 0.006, |
|
"num_input_tokens_seen": 8884736, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.38830667402095975, |
|
"grad_norm": 0.01489401888102293, |
|
"learning_rate": 9.590060324792327e-05, |
|
"loss": 0.0048, |
|
"num_input_tokens_seen": 9084064, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3971318257032543, |
|
"grad_norm": 0.015995647758245468, |
|
"learning_rate": 9.571488764419381e-05, |
|
"loss": 0.0047, |
|
"num_input_tokens_seen": 9302144, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4059569773855488, |
|
"grad_norm": 0.01859475113451481, |
|
"learning_rate": 9.552524599885981e-05, |
|
"loss": 0.0053, |
|
"num_input_tokens_seen": 9517456, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.41478212906784334, |
|
"grad_norm": 0.018746482208371162, |
|
"learning_rate": 9.533169459854098e-05, |
|
"loss": 0.0044, |
|
"num_input_tokens_seen": 9710768, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.42360728075013787, |
|
"grad_norm": 0.017155013978481293, |
|
"learning_rate": 9.513425006563079e-05, |
|
"loss": 0.0043, |
|
"num_input_tokens_seen": 9914064, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.43243243243243246, |
|
"grad_norm": 0.015938682481646538, |
|
"learning_rate": 9.493292935686895e-05, |
|
"loss": 0.0041, |
|
"num_input_tokens_seen": 10120208, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.441257584114727, |
|
"grad_norm": 0.017114240676164627, |
|
"learning_rate": 9.472774976188515e-05, |
|
"loss": 0.0044, |
|
"num_input_tokens_seen": 10346304, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4500827357970215, |
|
"grad_norm": 0.014332287944853306, |
|
"learning_rate": 9.451872890171419e-05, |
|
"loss": 0.004, |
|
"num_input_tokens_seen": 10547984, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.45890788747931605, |
|
"grad_norm": 0.017018554732203484, |
|
"learning_rate": 9.43058847272827e-05, |
|
"loss": 0.0045, |
|
"num_input_tokens_seen": 10754288, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.4677330391616106, |
|
"grad_norm": 0.013670100830495358, |
|
"learning_rate": 9.408923551786743e-05, |
|
"loss": 0.0028, |
|
"num_input_tokens_seen": 10942704, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.4765581908439051, |
|
"grad_norm": 0.016749229282140732, |
|
"learning_rate": 9.386879987952549e-05, |
|
"loss": 0.0034, |
|
"num_input_tokens_seen": 11150864, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.4853833425261997, |
|
"grad_norm": 0.01554529182612896, |
|
"learning_rate": 9.364459674349641e-05, |
|
"loss": 0.0042, |
|
"num_input_tokens_seen": 11367728, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.4942084942084942, |
|
"grad_norm": 0.015070905908942223, |
|
"learning_rate": 9.341664536457626e-05, |
|
"loss": 0.0028, |
|
"num_input_tokens_seen": 11575536, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5030336458907887, |
|
"grad_norm": 0.016440849751234055, |
|
"learning_rate": 9.31849653194641e-05, |
|
"loss": 0.0035, |
|
"num_input_tokens_seen": 11781328, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5118587975730833, |
|
"grad_norm": 0.014468475244939327, |
|
"learning_rate": 9.294957650508065e-05, |
|
"loss": 0.0029, |
|
"num_input_tokens_seen": 11981232, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5206839492553779, |
|
"grad_norm": 0.014588565565645695, |
|
"learning_rate": 9.27104991368596e-05, |
|
"loss": 0.0028, |
|
"num_input_tokens_seen": 12187296, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5295091009376723, |
|
"grad_norm": 0.0141281234100461, |
|
"learning_rate": 9.246775374701139e-05, |
|
"loss": 0.0027, |
|
"num_input_tokens_seen": 12385632, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.5383342526199669, |
|
"grad_norm": 0.013463583774864674, |
|
"learning_rate": 9.222136118275995e-05, |
|
"loss": 0.0022, |
|
"num_input_tokens_seen": 12588928, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.5471594043022614, |
|
"grad_norm": 0.014033553190529346, |
|
"learning_rate": 9.197134260455233e-05, |
|
"loss": 0.0027, |
|
"num_input_tokens_seen": 12825616, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.555984555984556, |
|
"grad_norm": 0.013906535692512989, |
|
"learning_rate": 9.171771948424137e-05, |
|
"loss": 0.0025, |
|
"num_input_tokens_seen": 13044976, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.5648097076668506, |
|
"grad_norm": 0.012418747879564762, |
|
"learning_rate": 9.146051360324166e-05, |
|
"loss": 0.0025, |
|
"num_input_tokens_seen": 13255280, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.573634859349145, |
|
"grad_norm": 0.015126565471291542, |
|
"learning_rate": 9.119974705065901e-05, |
|
"loss": 0.0022, |
|
"num_input_tokens_seen": 13463456, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.5824600110314396, |
|
"grad_norm": 0.013123284094035625, |
|
"learning_rate": 9.093544222139337e-05, |
|
"loss": 0.0023, |
|
"num_input_tokens_seen": 13667744, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.5912851627137341, |
|
"grad_norm": 0.014246366918087006, |
|
"learning_rate": 9.066762181421552e-05, |
|
"loss": 0.0024, |
|
"num_input_tokens_seen": 13874240, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6001103143960287, |
|
"grad_norm": 0.011402356438338757, |
|
"learning_rate": 9.039630882981768e-05, |
|
"loss": 0.0015, |
|
"num_input_tokens_seen": 14081392, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6089354660783233, |
|
"grad_norm": 0.014725148677825928, |
|
"learning_rate": 9.012152656883823e-05, |
|
"loss": 0.0033, |
|
"num_input_tokens_seen": 14300896, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6177606177606177, |
|
"grad_norm": 0.014837515540421009, |
|
"learning_rate": 8.984329862986056e-05, |
|
"loss": 0.0021, |
|
"num_input_tokens_seen": 14523968, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6265857694429123, |
|
"grad_norm": 0.014493652619421482, |
|
"learning_rate": 8.956164890738643e-05, |
|
"loss": 0.0013, |
|
"num_input_tokens_seen": 14728960, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.6354109211252068, |
|
"grad_norm": 0.011806878261268139, |
|
"learning_rate": 8.927660158978392e-05, |
|
"loss": 0.0016, |
|
"num_input_tokens_seen": 14912480, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.6442360728075014, |
|
"grad_norm": 0.01818985864520073, |
|
"learning_rate": 8.898818115721008e-05, |
|
"loss": 0.0019, |
|
"num_input_tokens_seen": 15114608, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 0.015412255190312862, |
|
"learning_rate": 8.86964123795085e-05, |
|
"loss": 0.0017, |
|
"num_input_tokens_seen": 15326112, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.6618863761720905, |
|
"grad_norm": 0.013063928112387657, |
|
"learning_rate": 8.84013203140821e-05, |
|
"loss": 0.0015, |
|
"num_input_tokens_seen": 15545248, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.670711527854385, |
|
"grad_norm": 0.016336796805262566, |
|
"learning_rate": 8.810293030374126e-05, |
|
"loss": 0.0017, |
|
"num_input_tokens_seen": 15751872, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.6795366795366795, |
|
"grad_norm": 0.010313590988516808, |
|
"learning_rate": 8.780126797452713e-05, |
|
"loss": 0.001, |
|
"num_input_tokens_seen": 15957872, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.6883618312189741, |
|
"grad_norm": 0.015468253754079342, |
|
"learning_rate": 8.749635923351107e-05, |
|
"loss": 0.0018, |
|
"num_input_tokens_seen": 16162640, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.6971869829012686, |
|
"grad_norm": 0.01543041318655014, |
|
"learning_rate": 8.71882302665696e-05, |
|
"loss": 0.001, |
|
"num_input_tokens_seen": 16352368, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.7060121345835632, |
|
"grad_norm": 0.01957864873111248, |
|
"learning_rate": 8.687690753613554e-05, |
|
"loss": 0.0014, |
|
"num_input_tokens_seen": 16563920, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.7148372862658577, |
|
"grad_norm": 0.012508533895015717, |
|
"learning_rate": 8.656241777892543e-05, |
|
"loss": 0.001, |
|
"num_input_tokens_seen": 16759024, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.7236624379481522, |
|
"grad_norm": 0.012273616157472134, |
|
"learning_rate": 8.624478800364332e-05, |
|
"loss": 0.0013, |
|
"num_input_tokens_seen": 16973728, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.7324875896304468, |
|
"grad_norm": 0.01503776852041483, |
|
"learning_rate": 8.592404548866123e-05, |
|
"loss": 0.0012, |
|
"num_input_tokens_seen": 17162752, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.7413127413127413, |
|
"grad_norm": 0.014227951876819134, |
|
"learning_rate": 8.560021777967649e-05, |
|
"loss": 0.0013, |
|
"num_input_tokens_seen": 17364064, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.7501378929950359, |
|
"grad_norm": 0.01252016518265009, |
|
"learning_rate": 8.527333268734606e-05, |
|
"loss": 0.0011, |
|
"num_input_tokens_seen": 17564576, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.7589630446773304, |
|
"grad_norm": 0.011520475149154663, |
|
"learning_rate": 8.494341828489812e-05, |
|
"loss": 0.0037, |
|
"num_input_tokens_seen": 17778752, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.7677881963596249, |
|
"grad_norm": 0.010531144216656685, |
|
"learning_rate": 8.461050290572114e-05, |
|
"loss": 0.0007, |
|
"num_input_tokens_seen": 17982448, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.7766133480419195, |
|
"grad_norm": 0.010875461623072624, |
|
"learning_rate": 8.427461514093056e-05, |
|
"loss": 0.0008, |
|
"num_input_tokens_seen": 18180608, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.785438499724214, |
|
"grad_norm": 0.007611530367285013, |
|
"learning_rate": 8.393578383691329e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 18384496, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.7942636514065086, |
|
"grad_norm": 0.010159923695027828, |
|
"learning_rate": 8.359403809285053e-05, |
|
"loss": 0.001, |
|
"num_input_tokens_seen": 18587744, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.803088803088803, |
|
"grad_norm": 0.011715343222022057, |
|
"learning_rate": 8.324940725821852e-05, |
|
"loss": 0.001, |
|
"num_input_tokens_seen": 18791056, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.8119139547710976, |
|
"grad_norm": 0.012972251512110233, |
|
"learning_rate": 8.290192093026805e-05, |
|
"loss": 0.0008, |
|
"num_input_tokens_seen": 18985008, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.8207391064533922, |
|
"grad_norm": 0.0135871022939682, |
|
"learning_rate": 8.255160895148263e-05, |
|
"loss": 0.0014, |
|
"num_input_tokens_seen": 19193888, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.8295642581356867, |
|
"grad_norm": 0.011914449743926525, |
|
"learning_rate": 8.219850140701557e-05, |
|
"loss": 0.001, |
|
"num_input_tokens_seen": 19399552, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.8383894098179813, |
|
"grad_norm": 0.009591113775968552, |
|
"learning_rate": 8.184262862210624e-05, |
|
"loss": 0.0007, |
|
"num_input_tokens_seen": 19605120, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.8472145615002757, |
|
"grad_norm": 0.009942690841853619, |
|
"learning_rate": 8.148402115947571e-05, |
|
"loss": 0.0008, |
|
"num_input_tokens_seen": 19802480, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.8560397131825703, |
|
"grad_norm": 0.012667879462242126, |
|
"learning_rate": 8.112270981670196e-05, |
|
"loss": 0.0011, |
|
"num_input_tokens_seen": 20009520, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.8648648648648649, |
|
"grad_norm": 0.010983509942889214, |
|
"learning_rate": 8.075872562357501e-05, |
|
"loss": 0.0009, |
|
"num_input_tokens_seen": 20235888, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.8736900165471594, |
|
"grad_norm": 0.011479397304356098, |
|
"learning_rate": 8.039209983943201e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 20433600, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.882515168229454, |
|
"grad_norm": 0.012184002436697483, |
|
"learning_rate": 8.002286395047267e-05, |
|
"loss": 0.0009, |
|
"num_input_tokens_seen": 20631664, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.8913403199117484, |
|
"grad_norm": 0.009395604953169823, |
|
"learning_rate": 7.965104966705518e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 20833056, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.900165471594043, |
|
"grad_norm": 0.013585143722593784, |
|
"learning_rate": 7.927668892097289e-05, |
|
"loss": 0.0008, |
|
"num_input_tokens_seen": 21051104, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.9089906232763376, |
|
"grad_norm": 0.008882119320333004, |
|
"learning_rate": 7.889981386271201e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 21246080, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.9178157749586321, |
|
"grad_norm": 0.010433576069772243, |
|
"learning_rate": 7.852045685869045e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 21439696, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.9266409266409267, |
|
"grad_norm": 0.01474383007735014, |
|
"learning_rate": 7.813865048847819e-05, |
|
"loss": 0.0008, |
|
"num_input_tokens_seen": 21648432, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.9354660783232212, |
|
"grad_norm": 0.011113091371953487, |
|
"learning_rate": 7.775442754199928e-05, |
|
"loss": 0.0007, |
|
"num_input_tokens_seen": 21864368, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.9442912300055157, |
|
"grad_norm": 0.009181715548038483, |
|
"learning_rate": 7.736782101671587e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 22061968, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.9531163816878102, |
|
"grad_norm": 0.0140100521966815, |
|
"learning_rate": 7.697886411479423e-05, |
|
"loss": 0.0012, |
|
"num_input_tokens_seen": 22278128, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.9619415333701048, |
|
"grad_norm": 0.007349591236561537, |
|
"learning_rate": 7.658759024025349e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 22469056, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.9707666850523994, |
|
"grad_norm": 0.01252900529652834, |
|
"learning_rate": 7.619403299609668e-05, |
|
"loss": 0.0008, |
|
"num_input_tokens_seen": 22662128, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.012083148583769798, |
|
"learning_rate": 7.579822618142505e-05, |
|
"loss": 0.0007, |
|
"num_input_tokens_seen": 22883216, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.9884169884169884, |
|
"grad_norm": 0.010517132468521595, |
|
"learning_rate": 7.540020378853523e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 23085888, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.9972421400992829, |
|
"grad_norm": 0.01143716461956501, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0007, |
|
"num_input_tokens_seen": 23307520, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.0088251516822946, |
|
"grad_norm": 0.0287212785333395, |
|
"learning_rate": 7.459764918573264e-05, |
|
"loss": 0.0014, |
|
"num_input_tokens_seen": 23564192, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.0176503033645892, |
|
"grad_norm": 0.010353313758969307, |
|
"learning_rate": 7.419318590003523e-05, |
|
"loss": 0.0007, |
|
"num_input_tokens_seen": 23768816, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.0264754550468835, |
|
"grad_norm": 0.013796573504805565, |
|
"learning_rate": 7.378664487863103e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 23974096, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.0353006067291781, |
|
"grad_norm": 0.006352484691888094, |
|
"learning_rate": 7.33780610356814e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 24172256, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.0441257584114727, |
|
"grad_norm": 0.007957457564771175, |
|
"learning_rate": 7.296746946078736e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 24362208, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.0529509100937673, |
|
"grad_norm": 0.0068214968778193, |
|
"learning_rate": 7.255490541597594e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 24562224, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.0617760617760619, |
|
"grad_norm": 0.00877879373729229, |
|
"learning_rate": 7.214040433267198e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 24776528, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0706012134583562, |
|
"grad_norm": 0.007200079504400492, |
|
"learning_rate": 7.172400180865513e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 24985008, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.0794263651406508, |
|
"grad_norm": 0.010829208418726921, |
|
"learning_rate": 7.130573360500276e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 25200720, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0882515168229454, |
|
"grad_norm": 0.010170291177928448, |
|
"learning_rate": 7.088563564301873e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 25413568, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.09707666850524, |
|
"grad_norm": 0.007032219786196947, |
|
"learning_rate": 7.046374400114842e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 25608576, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.1059018201875346, |
|
"grad_norm": 0.00843306165188551, |
|
"learning_rate": 7.004009491188022e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 25818400, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.114726971869829, |
|
"grad_norm": 0.00947788916528225, |
|
"learning_rate": 6.961472475863405e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 26037424, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.1235521235521235, |
|
"grad_norm": 0.009593469090759754, |
|
"learning_rate": 6.918767007263646e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 26250480, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.1323772752344181, |
|
"grad_norm": 0.012611499056220055, |
|
"learning_rate": 6.875896752978344e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 26458592, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.1412024269167127, |
|
"grad_norm": 0.005860932637006044, |
|
"learning_rate": 6.832865394749065e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 26680256, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.150027578599007, |
|
"grad_norm": 0.008905632421374321, |
|
"learning_rate": 6.789676628153143e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 26887424, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.1588527302813016, |
|
"grad_norm": 0.00839240662753582, |
|
"learning_rate": 6.746334162286307e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 27112736, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.1676778819635962, |
|
"grad_norm": 0.010829194448888302, |
|
"learning_rate": 6.702841719444141e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 27320064, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.1765030336458908, |
|
"grad_norm": 0.005576102528721094, |
|
"learning_rate": 6.659203034802397e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 27520544, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.1853281853281854, |
|
"grad_norm": 0.008609413169324398, |
|
"learning_rate": 6.615421856096231e-05, |
|
"loss": 0.0009, |
|
"num_input_tokens_seen": 27737920, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.19415333701048, |
|
"grad_norm": 0.013195198960602283, |
|
"learning_rate": 6.571501943298334e-05, |
|
"loss": 0.0014, |
|
"num_input_tokens_seen": 27947552, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.2029784886927744, |
|
"grad_norm": 0.008647961542010307, |
|
"learning_rate": 6.527447068296026e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 28143808, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.211803640375069, |
|
"grad_norm": 0.006975845899432898, |
|
"learning_rate": 6.483261014567311e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 28349312, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.2206287920573635, |
|
"grad_norm": 0.013750969432294369, |
|
"learning_rate": 6.438947576855968e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 28560096, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.229453943739658, |
|
"grad_norm": 0.009799162857234478, |
|
"learning_rate": 6.394510560845637e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 28764544, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.2382790954219525, |
|
"grad_norm": 0.00819414108991623, |
|
"learning_rate": 6.349953782832991e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 28949360, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.247104247104247, |
|
"grad_norm": 0.008884673938155174, |
|
"learning_rate": 6.305281069399989e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 29148112, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.2559293987865416, |
|
"grad_norm": 0.009248818270862103, |
|
"learning_rate": 6.26049625708524e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 29370624, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.2647545504688362, |
|
"grad_norm": 0.008902438916265965, |
|
"learning_rate": 6.215603192054522e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 29572464, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.2735797021511308, |
|
"grad_norm": 0.012439709156751633, |
|
"learning_rate": 6.17060572977047e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 29771152, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.2824048538334254, |
|
"grad_norm": 0.013059360906481743, |
|
"learning_rate": 6.125507734661458e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 29954960, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.2912300055157198, |
|
"grad_norm": 0.011295526288449764, |
|
"learning_rate": 6.080313079789723e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 30165568, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.3000551571980143, |
|
"grad_norm": 0.01000818982720375, |
|
"learning_rate": 6.035025646518746e-05, |
|
"loss": 0.0005, |
|
"num_input_tokens_seen": 30372160, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.308880308880309, |
|
"grad_norm": 0.010914387181401253, |
|
"learning_rate": 5.989649324179911e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 30572752, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.3177054605626033, |
|
"grad_norm": 0.009289560839533806, |
|
"learning_rate": 5.944188009738483e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 30780496, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.3265306122448979, |
|
"grad_norm": 0.015559184364974499, |
|
"learning_rate": 5.8986456074589404e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 30975120, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.3353557639271925, |
|
"grad_norm": 0.00643413420766592, |
|
"learning_rate": 5.853026028569667e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 31174000, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.344180915609487, |
|
"grad_norm": 0.0077626509591937065, |
|
"learning_rate": 5.807333190927053e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 31387088, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.3530060672917816, |
|
"grad_norm": 0.0083751380443573, |
|
"learning_rate": 5.761571018679025e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 31576400, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.3618312189740762, |
|
"grad_norm": 0.007961435243487358, |
|
"learning_rate": 5.715743441928041e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 31784320, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.3706563706563706, |
|
"grad_norm": 0.006737589370459318, |
|
"learning_rate": 5.669854396393559e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 31987520, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.3794815223386652, |
|
"grad_norm": 0.014642222784459591, |
|
"learning_rate": 5.6239078230740436e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 32187456, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.3883066740209598, |
|
"grad_norm": 0.006064648274332285, |
|
"learning_rate": 5.5779076679085054e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 32384528, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.3971318257032543, |
|
"grad_norm": 0.009461612440645695, |
|
"learning_rate": 5.531857881437612e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 32593040, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.4059569773855487, |
|
"grad_norm": 0.007511747535318136, |
|
"learning_rate": 5.48576241846443e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 32797952, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.4147821290678433, |
|
"grad_norm": 0.02702983096241951, |
|
"learning_rate": 5.4396252377147615e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 33008800, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.4236072807501379, |
|
"grad_norm": 0.008439299650490284, |
|
"learning_rate": 5.3934503014971793e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 33208352, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.4324324324324325, |
|
"grad_norm": 0.0037907836958765984, |
|
"learning_rate": 5.347241575362729e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 33410208, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.441257584114727, |
|
"grad_norm": 0.008237862028181553, |
|
"learning_rate": 5.30100302776438e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 33631888, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.4500827357970216, |
|
"grad_norm": 0.009860441088676453, |
|
"learning_rate": 5.254738629716186e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 33825152, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.458907887479316, |
|
"grad_norm": 0.007564296945929527, |
|
"learning_rate": 5.208452354452274e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 34020352, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.4677330391616106, |
|
"grad_norm": 0.019607344642281532, |
|
"learning_rate": 5.162148177085604e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 34226288, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.4765581908439052, |
|
"grad_norm": 0.007924061268568039, |
|
"learning_rate": 5.115830074266591e-05, |
|
"loss": 0.0016, |
|
"num_input_tokens_seen": 34426672, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.4853833425261997, |
|
"grad_norm": 0.006358864717185497, |
|
"learning_rate": 5.0695020238415756e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 34636944, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.494208494208494, |
|
"grad_norm": 0.010681587271392345, |
|
"learning_rate": 5.0231680045112176e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 34839456, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.5030336458907887, |
|
"grad_norm": 0.01033815648406744, |
|
"learning_rate": 4.976831995488784e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 35031600, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.5118587975730833, |
|
"grad_norm": 0.016812577843666077, |
|
"learning_rate": 4.9304979761584256e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 35227728, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.5206839492553779, |
|
"grad_norm": 0.008957776241004467, |
|
"learning_rate": 4.884169925733409e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 35436528, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.5295091009376725, |
|
"grad_norm": 0.006675931625068188, |
|
"learning_rate": 4.837851822914397e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 35628624, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.538334252619967, |
|
"grad_norm": 0.006146900821477175, |
|
"learning_rate": 4.791547645547726e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 35827376, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.5471594043022614, |
|
"grad_norm": 0.012180755846202374, |
|
"learning_rate": 4.745261370283817e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 36056560, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.555984555984556, |
|
"grad_norm": 0.00920344889163971, |
|
"learning_rate": 4.698996972235622e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 36267568, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.5648097076668506, |
|
"grad_norm": 0.010103096254169941, |
|
"learning_rate": 4.652758424637271e-05, |
|
"loss": 0.0027, |
|
"num_input_tokens_seen": 36473008, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.573634859349145, |
|
"grad_norm": 0.012086655013263226, |
|
"learning_rate": 4.606549698502823e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 36670944, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.5824600110314395, |
|
"grad_norm": 0.0054108137264847755, |
|
"learning_rate": 4.56037476228524e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 36882256, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.591285162713734, |
|
"grad_norm": 0.014871139079332352, |
|
"learning_rate": 4.5142375815355706e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 37091392, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.6001103143960287, |
|
"grad_norm": 0.005915229208767414, |
|
"learning_rate": 4.468142118562389e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 37309680, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.6089354660783233, |
|
"grad_norm": 0.006937643978744745, |
|
"learning_rate": 4.4220923320914964e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 37517952, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.6177606177606179, |
|
"grad_norm": 0.00866376981139183, |
|
"learning_rate": 4.376092176925958e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 37732160, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.6265857694429124, |
|
"grad_norm": 0.007841500453650951, |
|
"learning_rate": 4.330145603606441e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 37940368, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.6354109211252068, |
|
"grad_norm": 0.008568421937525272, |
|
"learning_rate": 4.2842565580719595e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 38135024, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.6442360728075014, |
|
"grad_norm": 0.011796732433140278, |
|
"learning_rate": 4.238428981320975e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 38336176, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.6530612244897958, |
|
"grad_norm": 0.00755694042891264, |
|
"learning_rate": 4.192666809072948e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 38548880, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.6618863761720903, |
|
"grad_norm": 0.01243317686021328, |
|
"learning_rate": 4.146973971430333e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 38755920, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.670711527854385, |
|
"grad_norm": 0.006207725498825312, |
|
"learning_rate": 4.101354392541061e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 38973328, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.6795366795366795, |
|
"grad_norm": 0.008532355539500713, |
|
"learning_rate": 4.0558119902615174e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 39193232, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.688361831218974, |
|
"grad_norm": 0.008602111600339413, |
|
"learning_rate": 4.010350675820091e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 39406608, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.6971869829012687, |
|
"grad_norm": 0.008903734385967255, |
|
"learning_rate": 3.964974353481254e-05, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 39620160, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.7060121345835633, |
|
"grad_norm": 0.005871508736163378, |
|
"learning_rate": 3.919686920210277e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 39815952, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.7148372862658579, |
|
"grad_norm": 0.008220325224101543, |
|
"learning_rate": 3.874492265338544e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 40015408, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.7236624379481522, |
|
"grad_norm": 0.00940727163106203, |
|
"learning_rate": 3.829394270229531e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 40215328, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.7324875896304468, |
|
"grad_norm": 0.005745697300881147, |
|
"learning_rate": 3.784396807945477e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 40414384, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.7413127413127412, |
|
"grad_norm": 0.009524352848529816, |
|
"learning_rate": 3.7395037429147615e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 40620656, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.7501378929950357, |
|
"grad_norm": 0.00809427909553051, |
|
"learning_rate": 3.694718930600012e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 40847008, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.7589630446773303, |
|
"grad_norm": 0.0051635075360536575, |
|
"learning_rate": 3.65004621716701e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 41036368, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.767788196359625, |
|
"grad_norm": 0.006504002492874861, |
|
"learning_rate": 3.6054894391543646e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 41252976, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.7766133480419195, |
|
"grad_norm": 0.009855791926383972, |
|
"learning_rate": 3.561052423144032e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 41465104, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.785438499724214, |
|
"grad_norm": 0.004304118454456329, |
|
"learning_rate": 3.5167389854326905e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 41670800, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.7942636514065087, |
|
"grad_norm": 0.014682441018521786, |
|
"learning_rate": 3.4725529317039754e-05, |
|
"loss": 0.0013, |
|
"num_input_tokens_seen": 41883536, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.803088803088803, |
|
"grad_norm": 0.0061918287537992, |
|
"learning_rate": 3.428498056701665e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 42083360, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.8119139547710976, |
|
"grad_norm": 0.009490927681326866, |
|
"learning_rate": 3.38457814390377e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 42283120, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.8207391064533922, |
|
"grad_norm": 0.008434086106717587, |
|
"learning_rate": 3.340796965197604e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 42499088, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.8295642581356866, |
|
"grad_norm": 0.004052174277603626, |
|
"learning_rate": 3.297158280555862e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 42692976, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.8383894098179812, |
|
"grad_norm": 0.007411065977066755, |
|
"learning_rate": 3.2536658377136935e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 42907216, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.8472145615002757, |
|
"grad_norm": 0.006996455602347851, |
|
"learning_rate": 3.210323371846857e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 43112448, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.8560397131825703, |
|
"grad_norm": 0.006998082622885704, |
|
"learning_rate": 3.167134605250938e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 43340096, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.864864864864865, |
|
"grad_norm": 0.006418649572879076, |
|
"learning_rate": 3.124103247021657e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 43539664, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.8736900165471595, |
|
"grad_norm": 0.009151714853942394, |
|
"learning_rate": 3.081232992736355e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 43727664, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.882515168229454, |
|
"grad_norm": 0.004692760296165943, |
|
"learning_rate": 3.0385275241365962e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 43953584, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.8913403199117484, |
|
"grad_norm": 0.006455820985138416, |
|
"learning_rate": 2.9959905088119776e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 44157504, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.900165471594043, |
|
"grad_norm": 0.006325691007077694, |
|
"learning_rate": 2.9536255998851613e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 44350448, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.9089906232763376, |
|
"grad_norm": 0.006784004159271717, |
|
"learning_rate": 2.9114364356981272e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 44561472, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.917815774958632, |
|
"grad_norm": 0.008874817751348019, |
|
"learning_rate": 2.8694266394997238e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 44769936, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.9266409266409266, |
|
"grad_norm": 0.006964050233364105, |
|
"learning_rate": 2.8275998191344888e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 44979344, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.9354660783232212, |
|
"grad_norm": 0.014264012686908245, |
|
"learning_rate": 2.7859595667328026e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 45196944, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.9442912300055157, |
|
"grad_norm": 0.005279663018882275, |
|
"learning_rate": 2.7445094584024067e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 45406832, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.9531163816878103, |
|
"grad_norm": 0.0171637125313282, |
|
"learning_rate": 2.7032530539212658e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 45603120, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.961941533370105, |
|
"grad_norm": 0.007687513716518879, |
|
"learning_rate": 2.6621938964318595e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 45805184, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.9707666850523995, |
|
"grad_norm": 0.0034611017908900976, |
|
"learning_rate": 2.621335512136899e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 46001184, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.9795918367346939, |
|
"grad_norm": 0.004358428996056318, |
|
"learning_rate": 2.5806814099964772e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 46206288, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.9884169884169884, |
|
"grad_norm": 0.008765267208218575, |
|
"learning_rate": 2.540235081426736e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 46427344, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.9972421400992828, |
|
"grad_norm": 0.006889387033879757, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 46627344, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.0088251516822946, |
|
"grad_norm": 0.043494511395692825, |
|
"learning_rate": 2.459979621146477e-05, |
|
"loss": 0.0011, |
|
"num_input_tokens_seen": 46901504, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 2.017650303364589, |
|
"grad_norm": 0.007718184031546116, |
|
"learning_rate": 2.4201773818574956e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 47104400, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 2.0264754550468838, |
|
"grad_norm": 0.003912526648491621, |
|
"learning_rate": 2.3805967003903333e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 47314176, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 2.0353006067291783, |
|
"grad_norm": 0.010783454403281212, |
|
"learning_rate": 2.3412409759746528e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 47525264, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.0441257584114725, |
|
"grad_norm": 0.0026623259764164686, |
|
"learning_rate": 2.302113588520578e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 47724528, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 2.052950910093767, |
|
"grad_norm": 0.00557671207934618, |
|
"learning_rate": 2.2632178983284153e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 47932624, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 2.0617760617760617, |
|
"grad_norm": 0.003710981458425522, |
|
"learning_rate": 2.2245572458000712e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 48148608, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 2.0706012134583562, |
|
"grad_norm": 0.009742701426148415, |
|
"learning_rate": 2.1861349511521815e-05, |
|
"loss": 0.0025, |
|
"num_input_tokens_seen": 48373632, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 2.079426365140651, |
|
"grad_norm": 0.009755464270710945, |
|
"learning_rate": 2.147954314130955e-05, |
|
"loss": 0.0013, |
|
"num_input_tokens_seen": 48586512, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.0882515168229454, |
|
"grad_norm": 0.002706202445551753, |
|
"learning_rate": 2.1100186137288e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 48793568, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 2.09707666850524, |
|
"grad_norm": 0.005180325359106064, |
|
"learning_rate": 2.072331107902713e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 49006224, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 2.1059018201875346, |
|
"grad_norm": 0.005968959536403418, |
|
"learning_rate": 2.0348950332944834e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 49217632, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 2.114726971869829, |
|
"grad_norm": 0.0063306307420134544, |
|
"learning_rate": 1.9977136049527345e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 49426624, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 2.1235521235521237, |
|
"grad_norm": 0.005157762672752142, |
|
"learning_rate": 1.960790016056801e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 49623376, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.132377275234418, |
|
"grad_norm": 0.005218483041971922, |
|
"learning_rate": 1.9241274376425e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 49828144, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 2.1412024269167125, |
|
"grad_norm": 0.00744604179635644, |
|
"learning_rate": 1.8877290183298057e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 50018448, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 2.150027578599007, |
|
"grad_norm": 0.005399591755121946, |
|
"learning_rate": 1.8515978840524302e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 50218176, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 2.1588527302813016, |
|
"grad_norm": 0.005761398002505302, |
|
"learning_rate": 1.815737137789377e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 50424896, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.1676778819635962, |
|
"grad_norm": 0.006964447908103466, |
|
"learning_rate": 1.7801498592984446e-05, |
|
"loss": 0.0006, |
|
"num_input_tokens_seen": 50635088, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.176503033645891, |
|
"grad_norm": 0.002962745726108551, |
|
"learning_rate": 1.7448391048517376e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 50849552, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 2.1853281853281854, |
|
"grad_norm": 0.005332667380571365, |
|
"learning_rate": 1.7098079069731958e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 51037776, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 2.19415333701048, |
|
"grad_norm": 0.006928949151188135, |
|
"learning_rate": 1.6750592741781497e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 51242672, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 2.2029784886927746, |
|
"grad_norm": 0.004213888198137283, |
|
"learning_rate": 1.640596190714947e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 51437008, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 2.211803640375069, |
|
"grad_norm": 0.010446918196976185, |
|
"learning_rate": 1.6064216163086716e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 51641264, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.2206287920573633, |
|
"grad_norm": 0.004029524512588978, |
|
"learning_rate": 1.5725384859069455e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 51842592, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 2.229453943739658, |
|
"grad_norm": 0.006790219806134701, |
|
"learning_rate": 1.538949709427886e-05, |
|
"loss": 0.0012, |
|
"num_input_tokens_seen": 52047456, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 2.2382790954219525, |
|
"grad_norm": 0.003987099044024944, |
|
"learning_rate": 1.5056581715101886e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 52242208, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 2.247104247104247, |
|
"grad_norm": 0.008930574171245098, |
|
"learning_rate": 1.472666731265394e-05, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 52436800, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 2.2559293987865416, |
|
"grad_norm": 0.004108684603124857, |
|
"learning_rate": 1.4399782220323515e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 52624752, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.2647545504688362, |
|
"grad_norm": 0.00732703972607851, |
|
"learning_rate": 1.4075954511338785e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 52836384, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.273579702151131, |
|
"grad_norm": 0.006608397234231234, |
|
"learning_rate": 1.3755211996356687e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 53059296, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 2.2824048538334254, |
|
"grad_norm": 0.002376733347773552, |
|
"learning_rate": 1.3437582221074573e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 53267440, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 2.29123000551572, |
|
"grad_norm": 0.004921163432300091, |
|
"learning_rate": 1.3123092463864456e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 53501008, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 2.300055157198014, |
|
"grad_norm": 0.0034377635456621647, |
|
"learning_rate": 1.2811769733430406e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 53700432, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.3088803088803087, |
|
"grad_norm": 0.006821690127253532, |
|
"learning_rate": 1.250364076648894e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 53919616, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 2.3177054605626033, |
|
"grad_norm": 0.004776927176862955, |
|
"learning_rate": 1.2198732025472876e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 54130528, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 2.326530612244898, |
|
"grad_norm": 0.004824692849069834, |
|
"learning_rate": 1.1897069696258755e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 54350560, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 2.3353557639271925, |
|
"grad_norm": 0.005174586083739996, |
|
"learning_rate": 1.1598679685917901e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 54542224, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 2.344180915609487, |
|
"grad_norm": 0.012352543883025646, |
|
"learning_rate": 1.1303587620491513e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 54745136, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.3530060672917816, |
|
"grad_norm": 0.005056153051555157, |
|
"learning_rate": 1.1011818842789928e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 54957584, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 2.361831218974076, |
|
"grad_norm": 0.010525842197239399, |
|
"learning_rate": 1.0723398410216084e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 55162496, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 2.370656370656371, |
|
"grad_norm": 0.0092442212626338, |
|
"learning_rate": 1.0438351092613569e-05, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 55376544, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 2.3794815223386654, |
|
"grad_norm": 0.00699999462813139, |
|
"learning_rate": 1.0156701370139454e-05, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 55583072, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 2.38830667402096, |
|
"grad_norm": 0.007677710149437189, |
|
"learning_rate": 9.878473431161767e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 55801200, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.397131825703254, |
|
"grad_norm": 0.003174175275489688, |
|
"learning_rate": 9.603691170182317e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 55998080, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 2.4059569773855487, |
|
"grad_norm": 0.005871200002729893, |
|
"learning_rate": 9.33237818578449e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 56200448, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.4147821290678433, |
|
"grad_norm": 0.00371691957116127, |
|
"learning_rate": 9.064557778606631e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 56400416, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 2.423607280750138, |
|
"grad_norm": 0.007599337492138147, |
|
"learning_rate": 8.800252949340998e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 56606128, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 0.0015243644593283534, |
|
"learning_rate": 8.539486396758356e-06, |
|
"loss": 0.0, |
|
"num_input_tokens_seen": 56797824, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.441257584114727, |
|
"grad_norm": 0.0030196798034012318, |
|
"learning_rate": 8.28228051575864e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 57006384, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.4500827357970216, |
|
"grad_norm": 0.005347589962184429, |
|
"learning_rate": 8.02865739544767e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 57207824, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 2.458907887479316, |
|
"grad_norm": 0.005150883924216032, |
|
"learning_rate": 7.778638817240042e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 57415152, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.467733039161611, |
|
"grad_norm": 0.006857512053102255, |
|
"learning_rate": 7.532246252988617e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 57628096, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 2.476558190843905, |
|
"grad_norm": 0.005364645272493362, |
|
"learning_rate": 7.289500863140414e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 57824064, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.4853833425261995, |
|
"grad_norm": 0.007198365870863199, |
|
"learning_rate": 7.05042349491935e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 58042720, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 2.494208494208494, |
|
"grad_norm": 0.005014900583773851, |
|
"learning_rate": 6.815034680535915e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 58255408, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.5030336458907887, |
|
"grad_norm": 0.008873779326677322, |
|
"learning_rate": 6.5833546354237556e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 58464800, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 2.5118587975730833, |
|
"grad_norm": 0.0044725253246724606, |
|
"learning_rate": 6.355403256503595e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 58672496, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.520683949255378, |
|
"grad_norm": 0.0047348616644740105, |
|
"learning_rate": 6.1312001204745115e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 58898256, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.5295091009376725, |
|
"grad_norm": 0.00710884016007185, |
|
"learning_rate": 5.910764482132575e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 59107152, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.538334252619967, |
|
"grad_norm": 0.007686229422688484, |
|
"learning_rate": 5.6941152727173265e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 59307664, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.5471594043022616, |
|
"grad_norm": 0.014555118046700954, |
|
"learning_rate": 5.481271098285817e-06, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 59514736, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.5559845559845558, |
|
"grad_norm": 0.0028200196102261543, |
|
"learning_rate": 5.272250238114856e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 59712512, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.564809707666851, |
|
"grad_norm": 0.004194322973489761, |
|
"learning_rate": 5.067070643131055e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 59910000, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.573634859349145, |
|
"grad_norm": 0.006987538188695908, |
|
"learning_rate": 4.865749934369223e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 60116400, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.5824600110314395, |
|
"grad_norm": 0.003778768004849553, |
|
"learning_rate": 4.668305401459022e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 60320368, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.591285162713734, |
|
"grad_norm": 0.003472360782325268, |
|
"learning_rate": 4.474754001140191e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 60536528, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.6001103143960287, |
|
"grad_norm": 0.009052475914359093, |
|
"learning_rate": 4.285112355806192e-06, |
|
"loss": 0.001, |
|
"num_input_tokens_seen": 60743120, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.6089354660783233, |
|
"grad_norm": 0.0060082292184233665, |
|
"learning_rate": 4.099396752076745e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 60942704, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.617760617760618, |
|
"grad_norm": 0.0075798071920871735, |
|
"learning_rate": 3.917623139399018e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 61140128, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.6265857694429124, |
|
"grad_norm": 0.0055752964690327644, |
|
"learning_rate": 3.7398071286779857e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 61334224, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.6354109211252066, |
|
"grad_norm": 0.007863204926252365, |
|
"learning_rate": 3.5659639909356723e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 61543280, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.6442360728075016, |
|
"grad_norm": 0.006538075394928455, |
|
"learning_rate": 3.3961086559996803e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 61750720, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.6530612244897958, |
|
"grad_norm": 0.002779777627438307, |
|
"learning_rate": 3.230255711220992e-06, |
|
"loss": 0.0, |
|
"num_input_tokens_seen": 61945952, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.6618863761720903, |
|
"grad_norm": 0.004271807614713907, |
|
"learning_rate": 3.0684194002212287e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 62155632, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.670711527854385, |
|
"grad_norm": 0.00638817623257637, |
|
"learning_rate": 2.910613621669356e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 62353216, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.6795366795366795, |
|
"grad_norm": 0.00442032516002655, |
|
"learning_rate": 2.7568519280880558e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 62544128, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.688361831218974, |
|
"grad_norm": 0.008686737157404423, |
|
"learning_rate": 2.607147524689829e-06, |
|
"loss": 0.0004, |
|
"num_input_tokens_seen": 62752688, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.6971869829012687, |
|
"grad_norm": 0.0059651597402989864, |
|
"learning_rate": 2.4615132682429374e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 62963296, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.7060121345835633, |
|
"grad_norm": 0.0056177834048867226, |
|
"learning_rate": 2.3199616659672354e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 63161904, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.714837286265858, |
|
"grad_norm": 0.0029979923274368048, |
|
"learning_rate": 2.182504874460006e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 63365744, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.7236624379481524, |
|
"grad_norm": 0.004314000252634287, |
|
"learning_rate": 2.049154698651989e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 63571808, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.7324875896304466, |
|
"grad_norm": 0.006837273947894573, |
|
"learning_rate": 1.919922590793549e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 63768960, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.741312741312741, |
|
"grad_norm": 0.0037646403070539236, |
|
"learning_rate": 1.7948196494711188e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 63979648, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.7501378929950357, |
|
"grad_norm": 0.0031723175197839737, |
|
"learning_rate": 1.6738566186540627e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 64189712, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.7589630446773303, |
|
"grad_norm": 0.005477920174598694, |
|
"learning_rate": 1.5570438867719694e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 64400624, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.767788196359625, |
|
"grad_norm": 0.006315939594060183, |
|
"learning_rate": 1.4443914858224938e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 64626320, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.7766133480419195, |
|
"grad_norm": 0.004753002431243658, |
|
"learning_rate": 1.3359090905097848e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 64826480, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.785438499724214, |
|
"grad_norm": 0.00812880601733923, |
|
"learning_rate": 1.2316060174136002e-06, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 65031984, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.7942636514065087, |
|
"grad_norm": 0.0029212606605142355, |
|
"learning_rate": 1.1314912241892183e-06, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 65239456, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.8030888030888033, |
|
"grad_norm": 0.006850802339613438, |
|
"learning_rate": 1.0355733087981378e-06, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 65433888, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.8119139547710974, |
|
"grad_norm": 0.0020711093675345182, |
|
"learning_rate": 9.43860508769645e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 65638288, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.8207391064533924, |
|
"grad_norm": 0.004868640564382076, |
|
"learning_rate": 8.563607004934193e-07, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 65855952, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.8295642581356866, |
|
"grad_norm": 0.006297328509390354, |
|
"learning_rate": 7.730813985430407e-07, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 66070192, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.838389409817981, |
|
"grad_norm": 0.0036759376525878906, |
|
"learning_rate": 6.940297550306896e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 66283808, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.8472145615002757, |
|
"grad_norm": 0.0120092136785388, |
|
"learning_rate": 6.192125589928821e-07, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 66507776, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.8560397131825703, |
|
"grad_norm": 0.005414010491222143, |
|
"learning_rate": 5.486362358074094e-07, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 66708320, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.864864864864865, |
|
"grad_norm": 0.007992051541805267, |
|
"learning_rate": 4.823068466415615e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 66910032, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.8736900165471595, |
|
"grad_norm": 0.006493248511105776, |
|
"learning_rate": 4.202300879315446e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 67112784, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.882515168229454, |
|
"grad_norm": 0.004381334874778986, |
|
"learning_rate": 3.624112908932942e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 67306464, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.8913403199117482, |
|
"grad_norm": 0.00577085604891181, |
|
"learning_rate": 3.088554210646133e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 67504720, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.9001654715940433, |
|
"grad_norm": 0.003793071024119854, |
|
"learning_rate": 2.595670778787196e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 67694048, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.9089906232763374, |
|
"grad_norm": 0.00835067592561245, |
|
"learning_rate": 2.1455049426926666e-07, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 67895008, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.917815774958632, |
|
"grad_norm": 0.005372443702071905, |
|
"learning_rate": 1.7380953630678488e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 68093168, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.9266409266409266, |
|
"grad_norm": 0.010219305753707886, |
|
"learning_rate": 1.373477028666803e-07, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 68305568, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.935466078323221, |
|
"grad_norm": 0.0038206197787076235, |
|
"learning_rate": 1.0516812532873621e-07, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 68506384, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.9442912300055157, |
|
"grad_norm": 0.007432411424815655, |
|
"learning_rate": 7.727356730820035e-08, |
|
"loss": 0.0002, |
|
"num_input_tokens_seen": 68716160, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.9531163816878103, |
|
"grad_norm": 0.004036502446979284, |
|
"learning_rate": 5.3666424418413744e-08, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 68918048, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.961941533370105, |
|
"grad_norm": 0.0045955548994243145, |
|
"learning_rate": 3.4348724065119685e-08, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 69129152, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.9707666850523995, |
|
"grad_norm": 0.012164157815277576, |
|
"learning_rate": 1.9322125272297488e-08, |
|
"loss": 0.0003, |
|
"num_input_tokens_seen": 69328576, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.979591836734694, |
|
"grad_norm": 0.0029640356078743935, |
|
"learning_rate": 8.587918539726402e-09, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 69537232, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.988416988416988, |
|
"grad_norm": 0.005239939782768488, |
|
"learning_rate": 2.1470257321298813e-09, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 69761008, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.997242140099283, |
|
"grad_norm": 0.0060053626075387, |
|
"learning_rate": 0.0, |
|
"loss": 0.0001, |
|
"num_input_tokens_seen": 69953200, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.997242140099283, |
|
"num_input_tokens_seen": 69953200, |
|
"step": 339, |
|
"total_flos": 2.976146663409713e+18, |
|
"train_loss": 0.004280612113766934, |
|
"train_runtime": 8852.4475, |
|
"train_samples_per_second": 4.914, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 339, |
|
"num_input_tokens_seen": 69953200, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.976146663409713e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|