nomic-embed-text-v1.5 / trainer_state.json
weizhou03's picture
Upload folder using huggingface_hub
40db897 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 3906,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"embedding_loss": 0.1733,
"epoch": 0.0002560163850486431,
"grad_norm": 1.3411569595336914,
"learning_rate": 0.0,
"step": 1
},
{
"embedding_loss": 0.1425,
"epoch": 0.012800819252432157,
"grad_norm": 1.5893903970718384,
"learning_rate": 2.5063938618925833e-06,
"step": 50
},
{
"embedding_loss": 0.0954,
"epoch": 0.025601638504864313,
"grad_norm": 1.3638615608215332,
"learning_rate": 5.0639386189258325e-06,
"step": 100
},
{
"embedding_loss": 0.0483,
"epoch": 0.03840245775729647,
"grad_norm": 1.0745915174484253,
"learning_rate": 7.62148337595908e-06,
"step": 150
},
{
"embedding_loss": 0.0297,
"epoch": 0.051203277009728626,
"grad_norm": 0.8514853715896606,
"learning_rate": 1.0179028132992328e-05,
"step": 200
},
{
"embedding_loss": 0.0189,
"epoch": 0.06400409626216078,
"grad_norm": 0.7511357069015503,
"learning_rate": 1.2736572890025576e-05,
"step": 250
},
{
"embedding_loss": 0.0122,
"epoch": 0.07680491551459294,
"grad_norm": 0.525897204875946,
"learning_rate": 1.5294117647058822e-05,
"step": 300
},
{
"embedding_loss": 0.0099,
"epoch": 0.08960573476702509,
"grad_norm": 0.5095636248588562,
"learning_rate": 1.7851662404092073e-05,
"step": 350
},
{
"embedding_loss": 0.0085,
"epoch": 0.10240655401945725,
"grad_norm": 1.5715042352676392,
"learning_rate": 1.9954480796586062e-05,
"step": 400
},
{
"embedding_loss": 0.0067,
"epoch": 0.1152073732718894,
"grad_norm": 0.27569064497947693,
"learning_rate": 1.9669985775248933e-05,
"step": 450
},
{
"embedding_loss": 0.0059,
"epoch": 0.12800819252432155,
"grad_norm": 0.09678972512483597,
"learning_rate": 1.9385490753911807e-05,
"step": 500
},
{
"embedding_loss": 0.0076,
"epoch": 0.1408090117767537,
"grad_norm": 0.15479978919029236,
"learning_rate": 1.910099573257468e-05,
"step": 550
},
{
"embedding_loss": 0.0061,
"epoch": 0.15360983102918588,
"grad_norm": 0.4873831272125244,
"learning_rate": 1.8816500711237555e-05,
"step": 600
},
{
"embedding_loss": 0.0049,
"epoch": 0.16641065028161803,
"grad_norm": 0.09139434248209,
"learning_rate": 1.8532005689900426e-05,
"step": 650
},
{
"embedding_loss": 0.003,
"epoch": 0.17921146953405018,
"grad_norm": 0.05168338865041733,
"learning_rate": 1.8247510668563304e-05,
"step": 700
},
{
"embedding_loss": 0.0023,
"epoch": 0.19201228878648233,
"grad_norm": 0.05378415808081627,
"learning_rate": 1.7963015647226174e-05,
"step": 750
},
{
"embedding_loss": 0.002,
"epoch": 0.2048131080389145,
"grad_norm": 0.026590052992105484,
"learning_rate": 1.7678520625889048e-05,
"step": 800
},
{
"embedding_loss": 0.0037,
"epoch": 0.21761392729134665,
"grad_norm": 0.03566845878958702,
"learning_rate": 1.739402560455192e-05,
"step": 850
},
{
"embedding_loss": 0.0007,
"epoch": 0.2304147465437788,
"grad_norm": 0.061990801244974136,
"learning_rate": 1.7109530583214796e-05,
"step": 900
},
{
"embedding_loss": 0.0015,
"epoch": 0.24321556579621095,
"grad_norm": 0.027544036507606506,
"learning_rate": 1.682503556187767e-05,
"step": 950
},
{
"embedding_loss": 0.0017,
"epoch": 0.2560163850486431,
"grad_norm": 0.04399234429001808,
"learning_rate": 1.654054054054054e-05,
"step": 1000
},
{
"embedding_loss": 0.0014,
"epoch": 0.26881720430107525,
"grad_norm": 0.01828560046851635,
"learning_rate": 1.6256045519203415e-05,
"step": 1050
},
{
"embedding_loss": 0.0003,
"epoch": 0.2816180235535074,
"grad_norm": 0.018775783479213715,
"learning_rate": 1.597155049786629e-05,
"step": 1100
},
{
"embedding_loss": 0.0006,
"epoch": 0.2944188428059396,
"grad_norm": 0.01576610654592514,
"learning_rate": 1.5687055476529163e-05,
"step": 1150
},
{
"embedding_loss": 0.001,
"epoch": 0.30721966205837176,
"grad_norm": 0.14026065170764923,
"learning_rate": 1.5402560455192034e-05,
"step": 1200
},
{
"embedding_loss": 0.0007,
"epoch": 0.3200204813108039,
"grad_norm": 0.01965928263962269,
"learning_rate": 1.511806543385491e-05,
"step": 1250
},
{
"embedding_loss": 0.0002,
"epoch": 0.33282130056323606,
"grad_norm": 0.014394218102097511,
"learning_rate": 1.4833570412517782e-05,
"step": 1300
},
{
"embedding_loss": 0.0012,
"epoch": 0.3456221198156682,
"grad_norm": 0.018288280814886093,
"learning_rate": 1.4549075391180656e-05,
"step": 1350
},
{
"embedding_loss": 0.0006,
"epoch": 0.35842293906810035,
"grad_norm": 0.017536135390400887,
"learning_rate": 1.4264580369843529e-05,
"step": 1400
},
{
"embedding_loss": 0.0003,
"epoch": 0.3712237583205325,
"grad_norm": 0.012779198586940765,
"learning_rate": 1.3980085348506403e-05,
"step": 1450
},
{
"embedding_loss": 0.0005,
"epoch": 0.38402457757296465,
"grad_norm": 0.19666947424411774,
"learning_rate": 1.3695590327169275e-05,
"step": 1500
},
{
"embedding_loss": 0.0002,
"epoch": 0.3968253968253968,
"grad_norm": 0.008438820950686932,
"learning_rate": 1.341109530583215e-05,
"step": 1550
},
{
"embedding_loss": 0.0004,
"epoch": 0.409626216077829,
"grad_norm": 0.009661088697612286,
"learning_rate": 1.3126600284495022e-05,
"step": 1600
},
{
"embedding_loss": 0.0009,
"epoch": 0.42242703533026116,
"grad_norm": 0.02346787601709366,
"learning_rate": 1.2842105263157896e-05,
"step": 1650
},
{
"embedding_loss": 0.0007,
"epoch": 0.4352278545826933,
"grad_norm": 0.0062308646738529205,
"learning_rate": 1.255761024182077e-05,
"step": 1700
},
{
"embedding_loss": 0.0003,
"epoch": 0.44802867383512546,
"grad_norm": 0.013097619637846947,
"learning_rate": 1.2273115220483642e-05,
"step": 1750
},
{
"embedding_loss": 0.0001,
"epoch": 0.4608294930875576,
"grad_norm": 0.008484157733619213,
"learning_rate": 1.1988620199146516e-05,
"step": 1800
},
{
"embedding_loss": 0.0006,
"epoch": 0.47363031233998976,
"grad_norm": 0.010278033092617989,
"learning_rate": 1.1704125177809389e-05,
"step": 1850
},
{
"embedding_loss": 0.0004,
"epoch": 0.4864311315924219,
"grad_norm": 0.012740347534418106,
"learning_rate": 1.1419630156472263e-05,
"step": 1900
},
{
"embedding_loss": 0.0004,
"epoch": 0.49923195084485406,
"grad_norm": 0.022875774651765823,
"learning_rate": 1.1135135135135135e-05,
"step": 1950
},
{
"embedding_loss": 0.0001,
"epoch": 0.5120327700972862,
"grad_norm": 0.007295957300812006,
"learning_rate": 1.085064011379801e-05,
"step": 2000
},
{
"embedding_loss": 0.0004,
"epoch": 0.5248335893497184,
"grad_norm": 0.008994905278086662,
"learning_rate": 1.0566145092460882e-05,
"step": 2050
},
{
"embedding_loss": 0.0003,
"epoch": 0.5376344086021505,
"grad_norm": 0.01373240165412426,
"learning_rate": 1.0281650071123756e-05,
"step": 2100
},
{
"embedding_loss": 0.0012,
"epoch": 0.5504352278545827,
"grad_norm": 0.017664149403572083,
"learning_rate": 9.99715504978663e-06,
"step": 2150
},
{
"embedding_loss": 0.0006,
"epoch": 0.5632360471070148,
"grad_norm": 0.026641126722097397,
"learning_rate": 9.712660028449504e-06,
"step": 2200
},
{
"embedding_loss": 0.0001,
"epoch": 0.576036866359447,
"grad_norm": 0.015163728035986423,
"learning_rate": 9.428165007112376e-06,
"step": 2250
},
{
"embedding_loss": 0.0003,
"epoch": 0.5888376856118792,
"grad_norm": 0.014809815213084221,
"learning_rate": 9.14366998577525e-06,
"step": 2300
},
{
"embedding_loss": 0.0007,
"epoch": 0.6016385048643114,
"grad_norm": 0.007227804511785507,
"learning_rate": 8.859174964438123e-06,
"step": 2350
},
{
"embedding_loss": 0.0003,
"epoch": 0.6144393241167435,
"grad_norm": 0.010935621336102486,
"learning_rate": 8.574679943100997e-06,
"step": 2400
},
{
"embedding_loss": 0.0004,
"epoch": 0.6272401433691757,
"grad_norm": 0.0073186722584068775,
"learning_rate": 8.29018492176387e-06,
"step": 2450
},
{
"embedding_loss": 0.0006,
"epoch": 0.6400409626216078,
"grad_norm": 0.006618503015488386,
"learning_rate": 8.005689900426743e-06,
"step": 2500
},
{
"embedding_loss": 0.0001,
"epoch": 0.65284178187404,
"grad_norm": 0.008351747877895832,
"learning_rate": 7.721194879089616e-06,
"step": 2550
},
{
"embedding_loss": 0.0003,
"epoch": 0.6656426011264721,
"grad_norm": 0.008504342287778854,
"learning_rate": 7.43669985775249e-06,
"step": 2600
},
{
"embedding_loss": 0.0006,
"epoch": 0.6784434203789043,
"grad_norm": 0.015397731214761734,
"learning_rate": 7.152204836415363e-06,
"step": 2650
},
{
"embedding_loss": 0.001,
"epoch": 0.6912442396313364,
"grad_norm": 0.004343625158071518,
"learning_rate": 6.867709815078236e-06,
"step": 2700
},
{
"embedding_loss": 0.0004,
"epoch": 0.7040450588837686,
"grad_norm": 0.010210598818957806,
"learning_rate": 6.58321479374111e-06,
"step": 2750
},
{
"embedding_loss": 0.0008,
"epoch": 0.7168458781362007,
"grad_norm": 0.021265419200062752,
"learning_rate": 6.2987197724039836e-06,
"step": 2800
},
{
"embedding_loss": 0.0003,
"epoch": 0.7296466973886329,
"grad_norm": 0.006350652314722538,
"learning_rate": 6.014224751066858e-06,
"step": 2850
},
{
"embedding_loss": 0.0007,
"epoch": 0.742447516641065,
"grad_norm": 0.042495131492614746,
"learning_rate": 5.729729729729731e-06,
"step": 2900
},
{
"embedding_loss": 0.0007,
"epoch": 0.7552483358934972,
"grad_norm": 0.003976090345531702,
"learning_rate": 5.445234708392604e-06,
"step": 2950
},
{
"embedding_loss": 0.0007,
"epoch": 0.7680491551459293,
"grad_norm": 0.008590229786932468,
"learning_rate": 5.160739687055477e-06,
"step": 3000
},
{
"embedding_loss": 0.0003,
"epoch": 0.7808499743983615,
"grad_norm": 0.006639127153903246,
"learning_rate": 4.8762446657183506e-06,
"step": 3050
},
{
"embedding_loss": 0.0003,
"epoch": 0.7936507936507936,
"grad_norm": 0.006947138346731663,
"learning_rate": 4.591749644381224e-06,
"step": 3100
},
{
"embedding_loss": 0.0003,
"epoch": 0.8064516129032258,
"grad_norm": 0.00519227422773838,
"learning_rate": 4.307254623044097e-06,
"step": 3150
},
{
"embedding_loss": 0.0007,
"epoch": 0.819252432155658,
"grad_norm": 0.013285805471241474,
"learning_rate": 4.02275960170697e-06,
"step": 3200
},
{
"embedding_loss": 0.0001,
"epoch": 0.8320532514080902,
"grad_norm": 0.010509872809052467,
"learning_rate": 3.7382645803698435e-06,
"step": 3250
},
{
"embedding_loss": 0.0006,
"epoch": 0.8448540706605223,
"grad_norm": 0.0032197178807109594,
"learning_rate": 3.4537695590327167e-06,
"step": 3300
},
{
"embedding_loss": 0.0007,
"epoch": 0.8576548899129545,
"grad_norm": 0.007785377558320761,
"learning_rate": 3.169274537695591e-06,
"step": 3350
},
{
"embedding_loss": 0.0003,
"epoch": 0.8704557091653866,
"grad_norm": 0.0052650910802185535,
"learning_rate": 2.884779516358464e-06,
"step": 3400
},
{
"embedding_loss": 0.0009,
"epoch": 0.8832565284178188,
"grad_norm": 0.020432407036423683,
"learning_rate": 2.6002844950213373e-06,
"step": 3450
},
{
"embedding_loss": 0.0003,
"epoch": 0.8960573476702509,
"grad_norm": 0.009411387145519257,
"learning_rate": 2.3157894736842105e-06,
"step": 3500
},
{
"embedding_loss": 0.0004,
"epoch": 0.9088581669226831,
"grad_norm": 0.007589740678668022,
"learning_rate": 2.031294452347084e-06,
"step": 3550
},
{
"embedding_loss": 0.0004,
"epoch": 0.9216589861751152,
"grad_norm": 0.12826202809810638,
"learning_rate": 1.7467994310099576e-06,
"step": 3600
},
{
"embedding_loss": 0.0001,
"epoch": 0.9344598054275474,
"grad_norm": 0.012935018166899681,
"learning_rate": 1.4623044096728308e-06,
"step": 3650
},
{
"embedding_loss": 0.0001,
"epoch": 0.9472606246799795,
"grad_norm": 0.008784984238445759,
"learning_rate": 1.1778093883357043e-06,
"step": 3700
},
{
"embedding_loss": 0.0007,
"epoch": 0.9600614439324117,
"grad_norm": 0.004482260439544916,
"learning_rate": 8.933143669985776e-07,
"step": 3750
},
{
"embedding_loss": 0.0006,
"epoch": 0.9728622631848438,
"grad_norm": 0.004357804544270039,
"learning_rate": 6.08819345661451e-07,
"step": 3800
},
{
"embedding_loss": 0.0005,
"epoch": 0.985663082437276,
"grad_norm": 0.014645060524344444,
"learning_rate": 3.2432432432432436e-07,
"step": 3850
},
{
"embedding_loss": 0.0003,
"epoch": 0.9984639016897081,
"grad_norm": 0.005299082491546869,
"learning_rate": 3.9829302987197725e-08,
"step": 3900
}
],
"logging_steps": 50,
"max_steps": 3906,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}