{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.08072653884964683, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008072653884964682, "grad_norm": 0.04381619393825531, "learning_rate": 4.999451708687114e-06, "logits/chosen": 14.719314575195312, "logits/rejected": 15.156938552856445, "logps/chosen": -0.2856016755104065, "logps/rejected": -0.31895095109939575, "loss": 0.9242, "rewards/accuracies": 0.4375, "rewards/chosen": -0.42840251326560974, "rewards/margins": 0.050023891031742096, "rewards/rejected": -0.47842639684677124, "step": 10 }, { "epoch": 0.016145307769929364, "grad_norm": 0.05155143886804581, "learning_rate": 4.997807075247147e-06, "logits/chosen": 14.559402465820312, "logits/rejected": 15.32939338684082, "logps/chosen": -0.2736968398094177, "logps/rejected": -0.3458033502101898, "loss": 0.9127, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.4105452597141266, "rewards/margins": 0.10815979540348053, "rewards/rejected": -0.5187050104141235, "step": 20 }, { "epoch": 0.024217961654894045, "grad_norm": 0.05071854218840599, "learning_rate": 4.9950668210706795e-06, "logits/chosen": 14.653738021850586, "logits/rejected": 15.168347358703613, "logps/chosen": -0.2985997200012207, "logps/rejected": -0.34624338150024414, "loss": 0.9141, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.44789963960647583, "rewards/margins": 0.07146544009447098, "rewards/rejected": -0.5193650722503662, "step": 30 }, { "epoch": 0.03229061553985873, "grad_norm": 0.052318744361400604, "learning_rate": 4.9912321481237616e-06, "logits/chosen": 14.621539115905762, "logits/rejected": 15.138806343078613, "logps/chosen": -0.27971988916397095, "logps/rejected": -0.360626757144928, "loss": 0.9313, "rewards/accuracies": 0.5625, "rewards/chosen": -0.4195798337459564, "rewards/margins": 0.12136033922433853, "rewards/rejected": -0.5409401655197144, "step": 40 }, { "epoch": 0.04036326942482341, "grad_norm": 0.06900553405284882, "learning_rate": 4.986304738420684e-06, "logits/chosen": 14.308789253234863, "logits/rejected": 14.605737686157227, "logps/chosen": -0.2685723304748535, "logps/rejected": -0.323064386844635, "loss": 0.9076, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.40285855531692505, "rewards/margins": 0.08173803985118866, "rewards/rejected": -0.4845965802669525, "step": 50 }, { "epoch": 0.04036326942482341, "eval_logits/chosen": 14.528907775878906, "eval_logits/rejected": 15.016877174377441, "eval_logps/chosen": -0.2801212966442108, "eval_logps/rejected": -0.34862396121025085, "eval_loss": 0.9108895063400269, "eval_rewards/accuracies": 0.5544554591178894, "eval_rewards/chosen": -0.4201819598674774, "eval_rewards/margins": 0.10275395959615707, "eval_rewards/rejected": -0.5229359865188599, "eval_runtime": 30.01, "eval_samples_per_second": 26.691, "eval_steps_per_second": 3.366, "step": 50 }, { "epoch": 0.04843592330978809, "grad_norm": 0.32321593165397644, "learning_rate": 4.980286753286196e-06, "logits/chosen": 14.644981384277344, "logits/rejected": 15.177103996276855, "logps/chosen": -0.26382654905319214, "logps/rejected": -0.33932510018348694, "loss": 0.9204, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.3957397937774658, "rewards/margins": 0.1132478266954422, "rewards/rejected": -0.5089876055717468, "step": 60 }, { "epoch": 0.056508577194752774, "grad_norm": 0.07268164306879044, "learning_rate": 4.973180832407471e-06, "logits/chosen": 14.562113761901855, "logits/rejected": 15.092450141906738, "logps/chosen": -0.2856511175632477, "logps/rejected": -0.34295767545700073, "loss": 0.915, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.42847663164138794, "rewards/margins": 0.08595988899469376, "rewards/rejected": -0.5144366025924683, "step": 70 }, { "epoch": 0.06458123107971746, "grad_norm": 0.06727313250303268, "learning_rate": 4.964990092676263e-06, "logits/chosen": 13.979713439941406, "logits/rejected": 14.924532890319824, "logps/chosen": -0.27184560894966125, "logps/rejected": -0.3679867386817932, "loss": 0.9223, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.4077683985233307, "rewards/margins": 0.14421164989471436, "rewards/rejected": -0.5519800186157227, "step": 80 }, { "epoch": 0.07265388496468214, "grad_norm": 0.06138753890991211, "learning_rate": 4.9557181268217225e-06, "logits/chosen": 14.366241455078125, "logits/rejected": 14.924840927124023, "logps/chosen": -0.2656143009662628, "logps/rejected": -0.3583180606365204, "loss": 0.9117, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.39842137694358826, "rewards/margins": 0.13905569911003113, "rewards/rejected": -0.5374771356582642, "step": 90 }, { "epoch": 0.08072653884964683, "grad_norm": 0.14299456775188446, "learning_rate": 4.9453690018345144e-06, "logits/chosen": 14.401769638061523, "logits/rejected": 14.532609939575195, "logps/chosen": -0.2966740131378174, "logps/rejected": -0.3347373604774475, "loss": 0.9162, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4450109899044037, "rewards/margins": 0.05709508806467056, "rewards/rejected": -0.5021060705184937, "step": 100 }, { "epoch": 0.08072653884964683, "eval_logits/chosen": 14.096770286560059, "eval_logits/rejected": 14.638699531555176, "eval_logps/chosen": -0.2713560461997986, "eval_logps/rejected": -0.35128629207611084, "eval_loss": 0.900999128818512, "eval_rewards/accuracies": 0.5643564462661743, "eval_rewards/chosen": -0.4070340394973755, "eval_rewards/margins": 0.11989541351795197, "eval_rewards/rejected": -0.5269294381141663, "eval_runtime": 29.986, "eval_samples_per_second": 26.712, "eval_steps_per_second": 3.368, "step": 100 } ], "logging_steps": 10, "max_steps": 1500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.41040928372949e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }