aseratus1 commited on
Commit
2fd1647
·
verified ·
1 Parent(s): 4e93268

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:200a88f61a7e88a93fb00c5dc2da4ba537a81cd94444730c2f73356bcb9fa588
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c5ece6319df12b42a1321ab23cc0d598cb195389f44ad963eb2a55611fa1bb
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75023e07d60e4e00600120d9257e00fe03185b56bfc95b72efdb36dd2222933b
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38d317eefa2b55406d7080b244f8c8276671d3ad992293aa7b453dbb94ceedb8
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddf91ecd669f186faeccc2ddc419b2e245a4aca4accac80b04b3de8a0ea96372
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8432027dbbc8dab44e5086e964f9cdb0667714c28870c25714f6895b3ab20859
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6769de87f07ffd3568aa81b9a1fe6ecade894861382192b5a67c276923ba90ef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e81835a72dc0c964cd591ffc9309a23b625c25e3c788c837ad74942ba4e6af0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4398421049118042,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1350",
4
- "epoch": 0.5118483412322274,
5
  "eval_steps": 150,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1032,6 +1032,119 @@
1032
  "eval_samples_per_second": 21.042,
1033
  "eval_steps_per_second": 5.263,
1034
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1035
  }
1036
  ],
1037
  "logging_steps": 10,
@@ -1060,7 +1173,7 @@
1060
  "attributes": {}
1061
  }
1062
  },
1063
- "total_flos": 1.197948345721553e+18,
1064
  "train_batch_size": 8,
1065
  "trial_name": null,
1066
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.43063661456108093,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
+ "epoch": 0.5687203791469194,
5
  "eval_steps": 150,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1032
  "eval_samples_per_second": 21.042,
1033
  "eval_steps_per_second": 5.263,
1034
  "step": 1350
1035
+ },
1036
+ {
1037
+ "epoch": 0.5156398104265403,
1038
+ "grad_norm": 0.7083640694618225,
1039
+ "learning_rate": 4.9028935042379894e-05,
1040
+ "loss": 0.7199,
1041
+ "step": 1360
1042
+ },
1043
+ {
1044
+ "epoch": 0.5194312796208531,
1045
+ "grad_norm": 0.7591987252235413,
1046
+ "learning_rate": 4.842218221162174e-05,
1047
+ "loss": 0.4614,
1048
+ "step": 1370
1049
+ },
1050
+ {
1051
+ "epoch": 0.5232227488151658,
1052
+ "grad_norm": 0.6956468820571899,
1053
+ "learning_rate": 4.78156618807346e-05,
1054
+ "loss": 0.4092,
1055
+ "step": 1380
1056
+ },
1057
+ {
1058
+ "epoch": 0.5270142180094787,
1059
+ "grad_norm": 0.6942465901374817,
1060
+ "learning_rate": 4.720946342372596e-05,
1061
+ "loss": 0.3517,
1062
+ "step": 1390
1063
+ },
1064
+ {
1065
+ "epoch": 0.5308056872037915,
1066
+ "grad_norm": 0.6259123086929321,
1067
+ "learning_rate": 4.660367616717337e-05,
1068
+ "loss": 0.258,
1069
+ "step": 1400
1070
+ },
1071
+ {
1072
+ "epoch": 0.5345971563981042,
1073
+ "grad_norm": 0.7933112978935242,
1074
+ "learning_rate": 4.599838937706183e-05,
1075
+ "loss": 0.6823,
1076
+ "step": 1410
1077
+ },
1078
+ {
1079
+ "epoch": 0.5383886255924171,
1080
+ "grad_norm": 0.8216676115989685,
1081
+ "learning_rate": 4.5393692245629936e-05,
1082
+ "loss": 0.4629,
1083
+ "step": 1420
1084
+ },
1085
+ {
1086
+ "epoch": 0.5421800947867299,
1087
+ "grad_norm": 0.6931918263435364,
1088
+ "learning_rate": 4.478967387822697e-05,
1089
+ "loss": 0.402,
1090
+ "step": 1430
1091
+ },
1092
+ {
1093
+ "epoch": 0.5459715639810426,
1094
+ "grad_norm": 0.6871898174285889,
1095
+ "learning_rate": 4.418642328018265e-05,
1096
+ "loss": 0.3666,
1097
+ "step": 1440
1098
+ },
1099
+ {
1100
+ "epoch": 0.5497630331753555,
1101
+ "grad_norm": 0.7060806751251221,
1102
+ "learning_rate": 4.3584029343691805e-05,
1103
+ "loss": 0.2629,
1104
+ "step": 1450
1105
+ },
1106
+ {
1107
+ "epoch": 0.5535545023696683,
1108
+ "grad_norm": 0.7377336025238037,
1109
+ "learning_rate": 4.298258083471563e-05,
1110
+ "loss": 0.7025,
1111
+ "step": 1460
1112
+ },
1113
+ {
1114
+ "epoch": 0.557345971563981,
1115
+ "grad_norm": 0.718527615070343,
1116
+ "learning_rate": 4.238216637990152e-05,
1117
+ "loss": 0.4684,
1118
+ "step": 1470
1119
+ },
1120
+ {
1121
+ "epoch": 0.5611374407582939,
1122
+ "grad_norm": 0.7513172626495361,
1123
+ "learning_rate": 4.178287445352348e-05,
1124
+ "loss": 0.4107,
1125
+ "step": 1480
1126
+ },
1127
+ {
1128
+ "epoch": 0.5649289099526066,
1129
+ "grad_norm": 0.6840953826904297,
1130
+ "learning_rate": 4.118479336444492e-05,
1131
+ "loss": 0.3441,
1132
+ "step": 1490
1133
+ },
1134
+ {
1135
+ "epoch": 0.5687203791469194,
1136
+ "grad_norm": 0.5901861190795898,
1137
+ "learning_rate": 4.058801124310595e-05,
1138
+ "loss": 0.2717,
1139
+ "step": 1500
1140
+ },
1141
+ {
1142
+ "epoch": 0.5687203791469194,
1143
+ "eval_loss": 0.43063661456108093,
1144
+ "eval_runtime": 211.4062,
1145
+ "eval_samples_per_second": 21.012,
1146
+ "eval_steps_per_second": 5.255,
1147
+ "step": 1500
1148
  }
1149
  ],
1150
  "logging_steps": 10,
 
1173
  "attributes": {}
1174
  }
1175
  },
1176
+ "total_flos": 1.3314517974405612e+18,
1177
  "train_batch_size": 8,
1178
  "trial_name": null,
1179
  "trial_params": null