|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 3906, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"embedding_loss": 0.1733, |
|
"epoch": 0.0002560163850486431, |
|
"grad_norm": 1.3411569595336914, |
|
"learning_rate": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"embedding_loss": 0.1425, |
|
"epoch": 0.012800819252432157, |
|
"grad_norm": 1.5893903970718384, |
|
"learning_rate": 2.5063938618925833e-06, |
|
"step": 50 |
|
}, |
|
{ |
|
"embedding_loss": 0.0954, |
|
"epoch": 0.025601638504864313, |
|
"grad_norm": 1.3638615608215332, |
|
"learning_rate": 5.0639386189258325e-06, |
|
"step": 100 |
|
}, |
|
{ |
|
"embedding_loss": 0.0483, |
|
"epoch": 0.03840245775729647, |
|
"grad_norm": 1.0745915174484253, |
|
"learning_rate": 7.62148337595908e-06, |
|
"step": 150 |
|
}, |
|
{ |
|
"embedding_loss": 0.0297, |
|
"epoch": 0.051203277009728626, |
|
"grad_norm": 0.8514853715896606, |
|
"learning_rate": 1.0179028132992328e-05, |
|
"step": 200 |
|
}, |
|
{ |
|
"embedding_loss": 0.0189, |
|
"epoch": 0.06400409626216078, |
|
"grad_norm": 0.7511357069015503, |
|
"learning_rate": 1.2736572890025576e-05, |
|
"step": 250 |
|
}, |
|
{ |
|
"embedding_loss": 0.0122, |
|
"epoch": 0.07680491551459294, |
|
"grad_norm": 0.525897204875946, |
|
"learning_rate": 1.5294117647058822e-05, |
|
"step": 300 |
|
}, |
|
{ |
|
"embedding_loss": 0.0099, |
|
"epoch": 0.08960573476702509, |
|
"grad_norm": 0.5095636248588562, |
|
"learning_rate": 1.7851662404092073e-05, |
|
"step": 350 |
|
}, |
|
{ |
|
"embedding_loss": 0.0085, |
|
"epoch": 0.10240655401945725, |
|
"grad_norm": 1.5715042352676392, |
|
"learning_rate": 1.9954480796586062e-05, |
|
"step": 400 |
|
}, |
|
{ |
|
"embedding_loss": 0.0067, |
|
"epoch": 0.1152073732718894, |
|
"grad_norm": 0.27569064497947693, |
|
"learning_rate": 1.9669985775248933e-05, |
|
"step": 450 |
|
}, |
|
{ |
|
"embedding_loss": 0.0059, |
|
"epoch": 0.12800819252432155, |
|
"grad_norm": 0.09678972512483597, |
|
"learning_rate": 1.9385490753911807e-05, |
|
"step": 500 |
|
}, |
|
{ |
|
"embedding_loss": 0.0076, |
|
"epoch": 0.1408090117767537, |
|
"grad_norm": 0.15479978919029236, |
|
"learning_rate": 1.910099573257468e-05, |
|
"step": 550 |
|
}, |
|
{ |
|
"embedding_loss": 0.0061, |
|
"epoch": 0.15360983102918588, |
|
"grad_norm": 0.4873831272125244, |
|
"learning_rate": 1.8816500711237555e-05, |
|
"step": 600 |
|
}, |
|
{ |
|
"embedding_loss": 0.0049, |
|
"epoch": 0.16641065028161803, |
|
"grad_norm": 0.09139434248209, |
|
"learning_rate": 1.8532005689900426e-05, |
|
"step": 650 |
|
}, |
|
{ |
|
"embedding_loss": 0.003, |
|
"epoch": 0.17921146953405018, |
|
"grad_norm": 0.05168338865041733, |
|
"learning_rate": 1.8247510668563304e-05, |
|
"step": 700 |
|
}, |
|
{ |
|
"embedding_loss": 0.0023, |
|
"epoch": 0.19201228878648233, |
|
"grad_norm": 0.05378415808081627, |
|
"learning_rate": 1.7963015647226174e-05, |
|
"step": 750 |
|
}, |
|
{ |
|
"embedding_loss": 0.002, |
|
"epoch": 0.2048131080389145, |
|
"grad_norm": 0.026590052992105484, |
|
"learning_rate": 1.7678520625889048e-05, |
|
"step": 800 |
|
}, |
|
{ |
|
"embedding_loss": 0.0037, |
|
"epoch": 0.21761392729134665, |
|
"grad_norm": 0.03566845878958702, |
|
"learning_rate": 1.739402560455192e-05, |
|
"step": 850 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.2304147465437788, |
|
"grad_norm": 0.061990801244974136, |
|
"learning_rate": 1.7109530583214796e-05, |
|
"step": 900 |
|
}, |
|
{ |
|
"embedding_loss": 0.0015, |
|
"epoch": 0.24321556579621095, |
|
"grad_norm": 0.027544036507606506, |
|
"learning_rate": 1.682503556187767e-05, |
|
"step": 950 |
|
}, |
|
{ |
|
"embedding_loss": 0.0017, |
|
"epoch": 0.2560163850486431, |
|
"grad_norm": 0.04399234429001808, |
|
"learning_rate": 1.654054054054054e-05, |
|
"step": 1000 |
|
}, |
|
{ |
|
"embedding_loss": 0.0014, |
|
"epoch": 0.26881720430107525, |
|
"grad_norm": 0.01828560046851635, |
|
"learning_rate": 1.6256045519203415e-05, |
|
"step": 1050 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.2816180235535074, |
|
"grad_norm": 0.018775783479213715, |
|
"learning_rate": 1.597155049786629e-05, |
|
"step": 1100 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.2944188428059396, |
|
"grad_norm": 0.01576610654592514, |
|
"learning_rate": 1.5687055476529163e-05, |
|
"step": 1150 |
|
}, |
|
{ |
|
"embedding_loss": 0.001, |
|
"epoch": 0.30721966205837176, |
|
"grad_norm": 0.14026065170764923, |
|
"learning_rate": 1.5402560455192034e-05, |
|
"step": 1200 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.3200204813108039, |
|
"grad_norm": 0.01965928263962269, |
|
"learning_rate": 1.511806543385491e-05, |
|
"step": 1250 |
|
}, |
|
{ |
|
"embedding_loss": 0.0002, |
|
"epoch": 0.33282130056323606, |
|
"grad_norm": 0.014394218102097511, |
|
"learning_rate": 1.4833570412517782e-05, |
|
"step": 1300 |
|
}, |
|
{ |
|
"embedding_loss": 0.0012, |
|
"epoch": 0.3456221198156682, |
|
"grad_norm": 0.018288280814886093, |
|
"learning_rate": 1.4549075391180656e-05, |
|
"step": 1350 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.35842293906810035, |
|
"grad_norm": 0.017536135390400887, |
|
"learning_rate": 1.4264580369843529e-05, |
|
"step": 1400 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.3712237583205325, |
|
"grad_norm": 0.012779198586940765, |
|
"learning_rate": 1.3980085348506403e-05, |
|
"step": 1450 |
|
}, |
|
{ |
|
"embedding_loss": 0.0005, |
|
"epoch": 0.38402457757296465, |
|
"grad_norm": 0.19666947424411774, |
|
"learning_rate": 1.3695590327169275e-05, |
|
"step": 1500 |
|
}, |
|
{ |
|
"embedding_loss": 0.0002, |
|
"epoch": 0.3968253968253968, |
|
"grad_norm": 0.008438820950686932, |
|
"learning_rate": 1.341109530583215e-05, |
|
"step": 1550 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.409626216077829, |
|
"grad_norm": 0.009661088697612286, |
|
"learning_rate": 1.3126600284495022e-05, |
|
"step": 1600 |
|
}, |
|
{ |
|
"embedding_loss": 0.0009, |
|
"epoch": 0.42242703533026116, |
|
"grad_norm": 0.02346787601709366, |
|
"learning_rate": 1.2842105263157896e-05, |
|
"step": 1650 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.4352278545826933, |
|
"grad_norm": 0.0062308646738529205, |
|
"learning_rate": 1.255761024182077e-05, |
|
"step": 1700 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.44802867383512546, |
|
"grad_norm": 0.013097619637846947, |
|
"learning_rate": 1.2273115220483642e-05, |
|
"step": 1750 |
|
}, |
|
{ |
|
"embedding_loss": 0.0001, |
|
"epoch": 0.4608294930875576, |
|
"grad_norm": 0.008484157733619213, |
|
"learning_rate": 1.1988620199146516e-05, |
|
"step": 1800 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.47363031233998976, |
|
"grad_norm": 0.010278033092617989, |
|
"learning_rate": 1.1704125177809389e-05, |
|
"step": 1850 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.4864311315924219, |
|
"grad_norm": 0.012740347534418106, |
|
"learning_rate": 1.1419630156472263e-05, |
|
"step": 1900 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.49923195084485406, |
|
"grad_norm": 0.022875774651765823, |
|
"learning_rate": 1.1135135135135135e-05, |
|
"step": 1950 |
|
}, |
|
{ |
|
"embedding_loss": 0.0001, |
|
"epoch": 0.5120327700972862, |
|
"grad_norm": 0.007295957300812006, |
|
"learning_rate": 1.085064011379801e-05, |
|
"step": 2000 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.5248335893497184, |
|
"grad_norm": 0.008994905278086662, |
|
"learning_rate": 1.0566145092460882e-05, |
|
"step": 2050 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.5376344086021505, |
|
"grad_norm": 0.01373240165412426, |
|
"learning_rate": 1.0281650071123756e-05, |
|
"step": 2100 |
|
}, |
|
{ |
|
"embedding_loss": 0.0012, |
|
"epoch": 0.5504352278545827, |
|
"grad_norm": 0.017664149403572083, |
|
"learning_rate": 9.99715504978663e-06, |
|
"step": 2150 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.5632360471070148, |
|
"grad_norm": 0.026641126722097397, |
|
"learning_rate": 9.712660028449504e-06, |
|
"step": 2200 |
|
}, |
|
{ |
|
"embedding_loss": 0.0001, |
|
"epoch": 0.576036866359447, |
|
"grad_norm": 0.015163728035986423, |
|
"learning_rate": 9.428165007112376e-06, |
|
"step": 2250 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.5888376856118792, |
|
"grad_norm": 0.014809815213084221, |
|
"learning_rate": 9.14366998577525e-06, |
|
"step": 2300 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.6016385048643114, |
|
"grad_norm": 0.007227804511785507, |
|
"learning_rate": 8.859174964438123e-06, |
|
"step": 2350 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.6144393241167435, |
|
"grad_norm": 0.010935621336102486, |
|
"learning_rate": 8.574679943100997e-06, |
|
"step": 2400 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.6272401433691757, |
|
"grad_norm": 0.0073186722584068775, |
|
"learning_rate": 8.29018492176387e-06, |
|
"step": 2450 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.6400409626216078, |
|
"grad_norm": 0.006618503015488386, |
|
"learning_rate": 8.005689900426743e-06, |
|
"step": 2500 |
|
}, |
|
{ |
|
"embedding_loss": 0.0001, |
|
"epoch": 0.65284178187404, |
|
"grad_norm": 0.008351747877895832, |
|
"learning_rate": 7.721194879089616e-06, |
|
"step": 2550 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.6656426011264721, |
|
"grad_norm": 0.008504342287778854, |
|
"learning_rate": 7.43669985775249e-06, |
|
"step": 2600 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.6784434203789043, |
|
"grad_norm": 0.015397731214761734, |
|
"learning_rate": 7.152204836415363e-06, |
|
"step": 2650 |
|
}, |
|
{ |
|
"embedding_loss": 0.001, |
|
"epoch": 0.6912442396313364, |
|
"grad_norm": 0.004343625158071518, |
|
"learning_rate": 6.867709815078236e-06, |
|
"step": 2700 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.7040450588837686, |
|
"grad_norm": 0.010210598818957806, |
|
"learning_rate": 6.58321479374111e-06, |
|
"step": 2750 |
|
}, |
|
{ |
|
"embedding_loss": 0.0008, |
|
"epoch": 0.7168458781362007, |
|
"grad_norm": 0.021265419200062752, |
|
"learning_rate": 6.2987197724039836e-06, |
|
"step": 2800 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.7296466973886329, |
|
"grad_norm": 0.006350652314722538, |
|
"learning_rate": 6.014224751066858e-06, |
|
"step": 2850 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.742447516641065, |
|
"grad_norm": 0.042495131492614746, |
|
"learning_rate": 5.729729729729731e-06, |
|
"step": 2900 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.7552483358934972, |
|
"grad_norm": 0.003976090345531702, |
|
"learning_rate": 5.445234708392604e-06, |
|
"step": 2950 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.7680491551459293, |
|
"grad_norm": 0.008590229786932468, |
|
"learning_rate": 5.160739687055477e-06, |
|
"step": 3000 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.7808499743983615, |
|
"grad_norm": 0.006639127153903246, |
|
"learning_rate": 4.8762446657183506e-06, |
|
"step": 3050 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 0.006947138346731663, |
|
"learning_rate": 4.591749644381224e-06, |
|
"step": 3100 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.8064516129032258, |
|
"grad_norm": 0.00519227422773838, |
|
"learning_rate": 4.307254623044097e-06, |
|
"step": 3150 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.819252432155658, |
|
"grad_norm": 0.013285805471241474, |
|
"learning_rate": 4.02275960170697e-06, |
|
"step": 3200 |
|
}, |
|
{ |
|
"embedding_loss": 0.0001, |
|
"epoch": 0.8320532514080902, |
|
"grad_norm": 0.010509872809052467, |
|
"learning_rate": 3.7382645803698435e-06, |
|
"step": 3250 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.8448540706605223, |
|
"grad_norm": 0.0032197178807109594, |
|
"learning_rate": 3.4537695590327167e-06, |
|
"step": 3300 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.8576548899129545, |
|
"grad_norm": 0.007785377558320761, |
|
"learning_rate": 3.169274537695591e-06, |
|
"step": 3350 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.8704557091653866, |
|
"grad_norm": 0.0052650910802185535, |
|
"learning_rate": 2.884779516358464e-06, |
|
"step": 3400 |
|
}, |
|
{ |
|
"embedding_loss": 0.0009, |
|
"epoch": 0.8832565284178188, |
|
"grad_norm": 0.020432407036423683, |
|
"learning_rate": 2.6002844950213373e-06, |
|
"step": 3450 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.8960573476702509, |
|
"grad_norm": 0.009411387145519257, |
|
"learning_rate": 2.3157894736842105e-06, |
|
"step": 3500 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.9088581669226831, |
|
"grad_norm": 0.007589740678668022, |
|
"learning_rate": 2.031294452347084e-06, |
|
"step": 3550 |
|
}, |
|
{ |
|
"embedding_loss": 0.0004, |
|
"epoch": 0.9216589861751152, |
|
"grad_norm": 0.12826202809810638, |
|
"learning_rate": 1.7467994310099576e-06, |
|
"step": 3600 |
|
}, |
|
{ |
|
"embedding_loss": 0.0001, |
|
"epoch": 0.9344598054275474, |
|
"grad_norm": 0.012935018166899681, |
|
"learning_rate": 1.4623044096728308e-06, |
|
"step": 3650 |
|
}, |
|
{ |
|
"embedding_loss": 0.0001, |
|
"epoch": 0.9472606246799795, |
|
"grad_norm": 0.008784984238445759, |
|
"learning_rate": 1.1778093883357043e-06, |
|
"step": 3700 |
|
}, |
|
{ |
|
"embedding_loss": 0.0007, |
|
"epoch": 0.9600614439324117, |
|
"grad_norm": 0.004482260439544916, |
|
"learning_rate": 8.933143669985776e-07, |
|
"step": 3750 |
|
}, |
|
{ |
|
"embedding_loss": 0.0006, |
|
"epoch": 0.9728622631848438, |
|
"grad_norm": 0.004357804544270039, |
|
"learning_rate": 6.08819345661451e-07, |
|
"step": 3800 |
|
}, |
|
{ |
|
"embedding_loss": 0.0005, |
|
"epoch": 0.985663082437276, |
|
"grad_norm": 0.014645060524344444, |
|
"learning_rate": 3.2432432432432436e-07, |
|
"step": 3850 |
|
}, |
|
{ |
|
"embedding_loss": 0.0003, |
|
"epoch": 0.9984639016897081, |
|
"grad_norm": 0.005299082491546869, |
|
"learning_rate": 3.9829302987197725e-08, |
|
"step": 3900 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 3906, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|