shuheng
/

squad_llama_finetuned

Question Answering

Generated from Trainer

text-generation-inference

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

squad_llama_finetuned / trainer_state.json

shuheng's picture

End of training

fc7edab verified about 1 month ago

history blame contribute delete

3.76 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500,
	"global_step": 8304,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.18063583815028902,
	"grad_norm": 7.173945903778076,
	"learning_rate": 9.397880539499037e-06,
	"loss": 6.2596,
	"step": 500
	},
	{
	"epoch": 0.36127167630057805,
	"grad_norm": 7.090274333953857,
	"learning_rate": 8.795761078998074e-06,
	"loss": 6.1274,
	"step": 1000
	},
	{
	"epoch": 0.541907514450867,
	"grad_norm": 6.882017612457275,
	"learning_rate": 8.19364161849711e-06,
	"loss": 6.0157,
	"step": 1500
	},
	{
	"epoch": 0.7225433526011561,
	"grad_norm": 6.622304916381836,
	"learning_rate": 7.591522157996147e-06,
	"loss": 5.9661,
	"step": 2000
	},
	{
	"epoch": 0.903179190751445,
	"grad_norm": 6.847517967224121,
	"learning_rate": 6.989402697495183e-06,
	"loss": 5.9061,
	"step": 2500
	},
	{
	"epoch": 1.083815028901734,
	"grad_norm": 6.756404399871826,
	"learning_rate": 6.387283236994221e-06,
	"loss": 5.8773,
	"step": 3000
	},
	{
	"epoch": 1.2644508670520231,
	"grad_norm": 6.702535629272461,
	"learning_rate": 5.785163776493257e-06,
	"loss": 5.8402,
	"step": 3500
	},
	{
	"epoch": 1.4450867052023122,
	"grad_norm": 6.539181709289551,
	"learning_rate": 5.1830443159922924e-06,
	"loss": 5.8124,
	"step": 4000
	},
	{
	"epoch": 1.6257225433526012,
	"grad_norm": 6.716819763183594,
	"learning_rate": 4.58092485549133e-06,
	"loss": 5.7907,
	"step": 4500
	},
	{
	"epoch": 1.80635838150289,
	"grad_norm": 6.560629367828369,
	"learning_rate": 3.978805394990367e-06,
	"loss": 5.769,
	"step": 5000
	},
	{
	"epoch": 1.9869942196531793,
	"grad_norm": 6.585288047790527,
	"learning_rate": 3.376685934489403e-06,
	"loss": 5.746,
	"step": 5500
	},
	{
	"epoch": 2.167630057803468,
	"grad_norm": 6.649946689605713,
	"learning_rate": 2.774566473988439e-06,
	"loss": 5.7399,
	"step": 6000
	},
	{
	"epoch": 2.348265895953757,
	"grad_norm": 6.707363605499268,
	"learning_rate": 2.1724470134874763e-06,
	"loss": 5.7306,
	"step": 6500
	},
	{
	"epoch": 2.5289017341040463,
	"grad_norm": 6.509890556335449,
	"learning_rate": 1.5703275529865126e-06,
	"loss": 5.7217,
	"step": 7000
	},
	{
	"epoch": 2.709537572254335,
	"grad_norm": 6.645078659057617,
	"learning_rate": 9.682080924855492e-07,
	"loss": 5.7249,
	"step": 7500
	},
	{
	"epoch": 2.8901734104046244,
	"grad_norm": 6.534315586090088,
	"learning_rate": 3.660886319845858e-07,
	"loss": 5.7126,
	"step": 8000
	},
	{
	"epoch": 3.0,
	"step": 8304,
	"total_flos": 1.7254148744798807e+18,
	"train_loss": 5.854142925872509,
	"train_runtime": 17646.7524,
	"train_samples_per_second": 15.055,
	"train_steps_per_second": 0.471
	}
	],
	"logging_steps": 500,
	"max_steps": 8304,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.7254148744798807e+18,
	"train_batch_size": 32,
	"trial_name": null,
	"trial_params": null
	}