cat-searcher commited on
Commit
f3d421e
·
verified ·
1 Parent(s): 7f3f51c

Training in progress, epoch 8, checkpoint

Browse files
Files changed (29) hide show
  1. last-checkpoint/global_step1580/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  2. last-checkpoint/global_step1580/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step1580/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step1580/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step1580/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step1580/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step1580/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step1580/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step1580/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  10. last-checkpoint/global_step1580/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step1580/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step1580/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step1580/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step1580/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step1580/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step1580/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/latest +1 -1
  18. last-checkpoint/model-00001-of-00002.safetensors +1 -1
  19. last-checkpoint/model-00002-of-00002.safetensors +1 -1
  20. last-checkpoint/rng_state_0.pth +1 -1
  21. last-checkpoint/rng_state_1.pth +1 -1
  22. last-checkpoint/rng_state_2.pth +1 -1
  23. last-checkpoint/rng_state_3.pth +1 -1
  24. last-checkpoint/rng_state_4.pth +1 -1
  25. last-checkpoint/rng_state_5.pth +1 -1
  26. last-checkpoint/rng_state_6.pth +1 -1
  27. last-checkpoint/rng_state_7.pth +1 -1
  28. last-checkpoint/scheduler.pt +1 -1
  29. last-checkpoint/trainer_state.json +302 -2
last-checkpoint/global_step1580/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35b8d916c014ce248bb98e73a3806d83db0060df242c4851d6304d9ac624587a
3
+ size 2506176112
last-checkpoint/global_step1580/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffec229c0d2dd05f50e617b7f5dbb7e07ac92f8a1f9a7f705ff3d70f90464e39
3
+ size 2506176112
last-checkpoint/global_step1580/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03483348e6552301cb13fc8304e7de58faa7fa806b7ccc77448967ab7c9c140b
3
+ size 2506176112
last-checkpoint/global_step1580/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756c27dc2358f438b45bd626b25026fae77fc421f5018eb282c8f023f0dcc46e
3
+ size 2506176112
last-checkpoint/global_step1580/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd329bfac8f118af8025cdaeaf69703e913ea32a6f301d6af1dfccdb73171352
3
+ size 2506176112
last-checkpoint/global_step1580/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484b5403a8824ef95137ff82bb37f4e4a7236ad81b461ca02535d897bb210d64
3
+ size 2506176112
last-checkpoint/global_step1580/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffa5eb8f4626caf4beb17f20b54f219b9c6a63e53e69c89ba3b916fa5409f27
3
+ size 2506176112
last-checkpoint/global_step1580/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a8e73036cb47e552f1bb09c2098f42541b69bf120c99ec30d00ddf8d53789a
3
+ size 2506176112
last-checkpoint/global_step1580/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68743fb792877033bfb3aedb8d7d498df0e80df4fa97f3b49df7392f14046293
3
+ size 85570
last-checkpoint/global_step1580/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:562ab1107c2e6e61e559e052443b7b8804c2b7284cebe971d0bd6bbf89c38695
3
+ size 85506
last-checkpoint/global_step1580/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09fd2b95b4260467ebd2b91d2758ff7dac5a6b4d5bc17d41d9e4f4034421dba
3
+ size 85506
last-checkpoint/global_step1580/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d82f1ee7ab57e1ddcc9ef0f135ee190da1b92285b3914a186314ade3393eaab3
3
+ size 85506
last-checkpoint/global_step1580/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9076aab39103ad4399e01117b7df76cf8bf23a6f4c5764716d87171d24267e
3
+ size 85506
last-checkpoint/global_step1580/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9908edca5b40c6ba0f8ba54a93a80c05c032c21df25828608487d14975abeced
3
+ size 85506
last-checkpoint/global_step1580/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eb8caed2a09d6d17007ab0248eb74db5df6263c83f0a0cad02eecc5f8ceb78c
3
+ size 85506
last-checkpoint/global_step1580/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6a30d04f9a8013c674a78929c0d2dbe739edd246e8526478f31634ef534f97f
3
+ size 85506
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1382
 
1
+ global_step1580
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f02726a44bd4780d17e35e236963b8fce24dc953bf71f40b4952f4922816c337
3
  size 4945242264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4c8b92632023613bca2cbaab82aff2bfc0f1b7c62aab671b9cfd3d8f06b448
3
  size 4945242264
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b91f43ac308f747e0c6169d0cc727dbf1d843ea3d107cf67c9329f99912a02
3
  size 67121608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12c27030d235bb5b6be6f9fb6111ad0d2904a9ae1dbba3911f671ef6abf0b238
3
  size 67121608
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7d7e02ffb4d440dce7ab4ce0b5617578ec9ce3672acee7434ed6f1153f1ae0c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee195ebde9bf012f945f068f133e7fe22fef5450c496607e3ef11cc2034a186
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5b40ca759e432b2688f021b81291d74a40f56a205e9842119f7e772275eebd3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf0fe1a3315d60b197207c5cb249d0ce4f9ce6d7585e696276d9ffbcb5379893
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdaef955ddd36d6bc1c40584113dd6205483e2aa85b02439b8b27e82e02a8359
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01c5bd6eae04542162b3e94245555bd81312524066bc01d0ebbfc4fd8554240e
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10b14ae5db356e6512538751d6b386c190754e307cc99cd652d5c6dd891e1f82
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45b74942c68b00d657cfce186b0eeb4aa8f52efa04b114803b605fee8de45972
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f26e28be26826eeeed244b77185c67b443ac185175f8d4bf5ba94caa8b271bc5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd66dd2ba958fc9929441817d8154abbd929c0aa9cd66ff3171965bdaaf5d78
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:847cedc1d6ca26f299a132c2ade9754887374acb9d98f26594a85d4c7742d474
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89eeedefdd62514d0130acc330a5c08e9774c95d38c60997905cfd65fc54b710
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcd043d1690ae0ff6991b03322799a0b28f021427b15fd9f1e5ed8b9905d9307
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43ced939100082608f57561a10e1888e69210c80675068db530c5815889910e
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:772190f7e6667c865d25fc72da7bdd1b5d39f46fe03bb5c2d754aee1ad3c99c7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d8d6ee244d99525e7004ae3f02d44ae63082d81fbbab7306f641ac6aeeb736f
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dd882b6f08b8cde72a038748b9c995e480ab99405e1f7e6c7a03592bdad6355
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4222d0b9fadaea1c2825a6be6146f638b45462a966591dbc095e76b291c3b43f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.99746835443038,
5
  "eval_steps": 100,
6
- "global_step": 1382,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2092,6 +2092,306 @@
2092
  "rewards/margins": 0.36379513144493103,
2093
  "rewards/rejected": -0.25348588824272156,
2094
  "step": 1380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2095
  }
2096
  ],
2097
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
  "eval_steps": 100,
6
+ "global_step": 1580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2092
  "rewards/margins": 0.36379513144493103,
2093
  "rewards/rejected": -0.25348588824272156,
2094
  "step": 1380
2095
+ },
2096
+ {
2097
+ "epoch": 7.037974683544304,
2098
+ "grad_norm": 1158283.9951295503,
2099
+ "learning_rate": 3.3782513318708864e-07,
2100
+ "logits/chosen": -4.4635396003723145,
2101
+ "logits/rejected": -4.055373668670654,
2102
+ "logps/chosen": -126.25242614746094,
2103
+ "logps/rejected": -513.0021362304688,
2104
+ "loss": 32182.2562,
2105
+ "rewards/accuracies": 0.9750000238418579,
2106
+ "rewards/chosen": 0.12364669889211655,
2107
+ "rewards/margins": 0.39015716314315796,
2108
+ "rewards/rejected": -0.2665104568004608,
2109
+ "step": 1390
2110
+ },
2111
+ {
2112
+ "epoch": 7.0886075949367084,
2113
+ "grad_norm": 1635336.0000705447,
2114
+ "learning_rate": 3.3625822626136005e-07,
2115
+ "logits/chosen": -3.2711379528045654,
2116
+ "logits/rejected": -2.849708080291748,
2117
+ "logps/chosen": -120.3502426147461,
2118
+ "logps/rejected": -554.61669921875,
2119
+ "loss": 28154.0125,
2120
+ "rewards/accuracies": 1.0,
2121
+ "rewards/chosen": 0.1400633156299591,
2122
+ "rewards/margins": 0.4437219500541687,
2123
+ "rewards/rejected": -0.3036586344242096,
2124
+ "step": 1400
2125
+ },
2126
+ {
2127
+ "epoch": 7.139240506329114,
2128
+ "grad_norm": 1478880.6175367055,
2129
+ "learning_rate": 3.346913193356314e-07,
2130
+ "logits/chosen": -1.498684048652649,
2131
+ "logits/rejected": -1.5719478130340576,
2132
+ "logps/chosen": -97.41731262207031,
2133
+ "logps/rejected": -528.29833984375,
2134
+ "loss": 30443.8531,
2135
+ "rewards/accuracies": 1.0,
2136
+ "rewards/chosen": 0.13250485062599182,
2137
+ "rewards/margins": 0.4276755452156067,
2138
+ "rewards/rejected": -0.29517072439193726,
2139
+ "step": 1410
2140
+ },
2141
+ {
2142
+ "epoch": 7.189873417721519,
2143
+ "grad_norm": 1190966.9261622827,
2144
+ "learning_rate": 3.331244124099029e-07,
2145
+ "logits/chosen": -3.576815366744995,
2146
+ "logits/rejected": -3.1508662700653076,
2147
+ "logps/chosen": -92.4610595703125,
2148
+ "logps/rejected": -499.2225646972656,
2149
+ "loss": 30200.7656,
2150
+ "rewards/accuracies": 0.987500011920929,
2151
+ "rewards/chosen": 0.1342589408159256,
2152
+ "rewards/margins": 0.40714582800865173,
2153
+ "rewards/rejected": -0.2728869318962097,
2154
+ "step": 1420
2155
+ },
2156
+ {
2157
+ "epoch": 7.2405063291139244,
2158
+ "grad_norm": 1654460.4321586012,
2159
+ "learning_rate": 3.3155750548417424e-07,
2160
+ "logits/chosen": -3.6517982482910156,
2161
+ "logits/rejected": -2.912386894226074,
2162
+ "logps/chosen": -113.77073669433594,
2163
+ "logps/rejected": -548.2919921875,
2164
+ "loss": 29291.1719,
2165
+ "rewards/accuracies": 0.9750000238418579,
2166
+ "rewards/chosen": 0.13462531566619873,
2167
+ "rewards/margins": 0.435891717672348,
2168
+ "rewards/rejected": -0.3012663722038269,
2169
+ "step": 1430
2170
+ },
2171
+ {
2172
+ "epoch": 7.291139240506329,
2173
+ "grad_norm": 1547048.8074025025,
2174
+ "learning_rate": 3.2999059855844565e-07,
2175
+ "logits/chosen": -4.762998580932617,
2176
+ "logits/rejected": -4.417517185211182,
2177
+ "logps/chosen": -103.59019470214844,
2178
+ "logps/rejected": -516.0870361328125,
2179
+ "loss": 30597.95,
2180
+ "rewards/accuracies": 0.987500011920929,
2181
+ "rewards/chosen": 0.1253672093153,
2182
+ "rewards/margins": 0.4090943932533264,
2183
+ "rewards/rejected": -0.28372713923454285,
2184
+ "step": 1440
2185
+ },
2186
+ {
2187
+ "epoch": 7.341772151898734,
2188
+ "grad_norm": 1083334.846955902,
2189
+ "learning_rate": 3.28423691632717e-07,
2190
+ "logits/chosen": -4.341902732849121,
2191
+ "logits/rejected": -3.4809889793395996,
2192
+ "logps/chosen": -105.1113052368164,
2193
+ "logps/rejected": -537.7858276367188,
2194
+ "loss": 28933.9125,
2195
+ "rewards/accuracies": 1.0,
2196
+ "rewards/chosen": 0.139817476272583,
2197
+ "rewards/margins": 0.4371423125267029,
2198
+ "rewards/rejected": -0.2973248362541199,
2199
+ "step": 1450
2200
+ },
2201
+ {
2202
+ "epoch": 7.3924050632911396,
2203
+ "grad_norm": 1583721.4157786674,
2204
+ "learning_rate": 3.268567847069884e-07,
2205
+ "logits/chosen": -5.8856353759765625,
2206
+ "logits/rejected": -5.3746867179870605,
2207
+ "logps/chosen": -94.76522827148438,
2208
+ "logps/rejected": -525.3110961914062,
2209
+ "loss": 29575.7844,
2210
+ "rewards/accuracies": 0.9750000238418579,
2211
+ "rewards/chosen": 0.13582661747932434,
2212
+ "rewards/margins": 0.4354213774204254,
2213
+ "rewards/rejected": -0.29959478974342346,
2214
+ "step": 1460
2215
+ },
2216
+ {
2217
+ "epoch": 7.443037974683544,
2218
+ "grad_norm": 1391896.6733071958,
2219
+ "learning_rate": 3.252898777812598e-07,
2220
+ "logits/chosen": -3.2749342918395996,
2221
+ "logits/rejected": -3.6061177253723145,
2222
+ "logps/chosen": -99.21089172363281,
2223
+ "logps/rejected": -534.4422607421875,
2224
+ "loss": 29207.5719,
2225
+ "rewards/accuracies": 1.0,
2226
+ "rewards/chosen": 0.1312985122203827,
2227
+ "rewards/margins": 0.433136522769928,
2228
+ "rewards/rejected": -0.3018379807472229,
2229
+ "step": 1470
2230
+ },
2231
+ {
2232
+ "epoch": 7.493670886075949,
2233
+ "grad_norm": 1294960.5242478126,
2234
+ "learning_rate": 3.237229708555312e-07,
2235
+ "logits/chosen": -2.985567808151245,
2236
+ "logits/rejected": -1.8726612329483032,
2237
+ "logps/chosen": -112.32755279541016,
2238
+ "logps/rejected": -509.37286376953125,
2239
+ "loss": 29187.1594,
2240
+ "rewards/accuracies": 0.9750000238418579,
2241
+ "rewards/chosen": 0.1289207637310028,
2242
+ "rewards/margins": 0.4079267978668213,
2243
+ "rewards/rejected": -0.27900606393814087,
2244
+ "step": 1480
2245
+ },
2246
+ {
2247
+ "epoch": 7.544303797468355,
2248
+ "grad_norm": 1193173.6877739348,
2249
+ "learning_rate": 3.2215606392980255e-07,
2250
+ "logits/chosen": -2.0656161308288574,
2251
+ "logits/rejected": -2.3443799018859863,
2252
+ "logps/chosen": -97.64754486083984,
2253
+ "logps/rejected": -511.40576171875,
2254
+ "loss": 29322.4313,
2255
+ "rewards/accuracies": 0.987500011920929,
2256
+ "rewards/chosen": 0.13589712977409363,
2257
+ "rewards/margins": 0.413860946893692,
2258
+ "rewards/rejected": -0.2779638171195984,
2259
+ "step": 1490
2260
+ },
2261
+ {
2262
+ "epoch": 7.594936708860759,
2263
+ "grad_norm": 1279108.0637389964,
2264
+ "learning_rate": 3.2058915700407396e-07,
2265
+ "logits/chosen": -3.5005557537078857,
2266
+ "logits/rejected": -3.4204413890838623,
2267
+ "logps/chosen": -107.39742279052734,
2268
+ "logps/rejected": -530.2638549804688,
2269
+ "loss": 27542.3625,
2270
+ "rewards/accuracies": 0.987500011920929,
2271
+ "rewards/chosen": 0.13995657861232758,
2272
+ "rewards/margins": 0.42647701501846313,
2273
+ "rewards/rejected": -0.28652042150497437,
2274
+ "step": 1500
2275
+ },
2276
+ {
2277
+ "epoch": 7.6455696202531644,
2278
+ "grad_norm": 2707102.044355496,
2279
+ "learning_rate": 3.190222500783453e-07,
2280
+ "logits/chosen": -4.715664863586426,
2281
+ "logits/rejected": -4.245431900024414,
2282
+ "logps/chosen": -101.01532745361328,
2283
+ "logps/rejected": -561.7377319335938,
2284
+ "loss": 29571.3625,
2285
+ "rewards/accuracies": 1.0,
2286
+ "rewards/chosen": 0.14493677020072937,
2287
+ "rewards/margins": 0.4646069407463074,
2288
+ "rewards/rejected": -0.3196701109409332,
2289
+ "step": 1510
2290
+ },
2291
+ {
2292
+ "epoch": 7.69620253164557,
2293
+ "grad_norm": 1346703.2802720347,
2294
+ "learning_rate": 3.1745534315261674e-07,
2295
+ "logits/chosen": -2.4094414710998535,
2296
+ "logits/rejected": -2.316082715988159,
2297
+ "logps/chosen": -90.64556121826172,
2298
+ "logps/rejected": -524.6895751953125,
2299
+ "loss": 29962.2875,
2300
+ "rewards/accuracies": 1.0,
2301
+ "rewards/chosen": 0.1430484652519226,
2302
+ "rewards/margins": 0.4339544177055359,
2303
+ "rewards/rejected": -0.2909059524536133,
2304
+ "step": 1520
2305
+ },
2306
+ {
2307
+ "epoch": 7.746835443037975,
2308
+ "grad_norm": 1570681.8076612286,
2309
+ "learning_rate": 3.158884362268881e-07,
2310
+ "logits/chosen": -1.977839708328247,
2311
+ "logits/rejected": -1.748456597328186,
2312
+ "logps/chosen": -95.17073822021484,
2313
+ "logps/rejected": -536.3465576171875,
2314
+ "loss": 29005.075,
2315
+ "rewards/accuracies": 0.987500011920929,
2316
+ "rewards/chosen": 0.13247540593147278,
2317
+ "rewards/margins": 0.44195109605789185,
2318
+ "rewards/rejected": -0.3094756603240967,
2319
+ "step": 1530
2320
+ },
2321
+ {
2322
+ "epoch": 7.7974683544303796,
2323
+ "grad_norm": 1321655.562082779,
2324
+ "learning_rate": 3.143215293011595e-07,
2325
+ "logits/chosen": -5.75424861907959,
2326
+ "logits/rejected": -5.283251762390137,
2327
+ "logps/chosen": -109.5367202758789,
2328
+ "logps/rejected": -538.626220703125,
2329
+ "loss": 29057.1688,
2330
+ "rewards/accuracies": 0.9750000238418579,
2331
+ "rewards/chosen": 0.14621947705745697,
2332
+ "rewards/margins": 0.43537068367004395,
2333
+ "rewards/rejected": -0.2891511619091034,
2334
+ "step": 1540
2335
+ },
2336
+ {
2337
+ "epoch": 7.848101265822785,
2338
+ "grad_norm": 1360253.1191038797,
2339
+ "learning_rate": 3.1275462237543087e-07,
2340
+ "logits/chosen": -3.4590229988098145,
2341
+ "logits/rejected": -3.5962212085723877,
2342
+ "logps/chosen": -114.27938079833984,
2343
+ "logps/rejected": -566.5555419921875,
2344
+ "loss": 29716.3094,
2345
+ "rewards/accuracies": 1.0,
2346
+ "rewards/chosen": 0.1347774863243103,
2347
+ "rewards/margins": 0.44886675477027893,
2348
+ "rewards/rejected": -0.314089298248291,
2349
+ "step": 1550
2350
+ },
2351
+ {
2352
+ "epoch": 7.89873417721519,
2353
+ "grad_norm": 1269167.0621019504,
2354
+ "learning_rate": 3.111877154497023e-07,
2355
+ "logits/chosen": -1.0884647369384766,
2356
+ "logits/rejected": -0.7194244265556335,
2357
+ "logps/chosen": -89.07111358642578,
2358
+ "logps/rejected": -494.15789794921875,
2359
+ "loss": 29335.9875,
2360
+ "rewards/accuracies": 0.9750000238418579,
2361
+ "rewards/chosen": 0.1284293383359909,
2362
+ "rewards/margins": 0.4071559011936188,
2363
+ "rewards/rejected": -0.2787265181541443,
2364
+ "step": 1560
2365
+ },
2366
+ {
2367
+ "epoch": 7.949367088607595,
2368
+ "grad_norm": 1453875.4579149496,
2369
+ "learning_rate": 3.0962080852397364e-07,
2370
+ "logits/chosen": -2.750883102416992,
2371
+ "logits/rejected": -3.123683452606201,
2372
+ "logps/chosen": -98.0600357055664,
2373
+ "logps/rejected": -508.206298828125,
2374
+ "loss": 29392.4875,
2375
+ "rewards/accuracies": 0.9624999761581421,
2376
+ "rewards/chosen": 0.13056252896785736,
2377
+ "rewards/margins": 0.4083867073059082,
2378
+ "rewards/rejected": -0.2778242230415344,
2379
+ "step": 1570
2380
+ },
2381
+ {
2382
+ "epoch": 8.0,
2383
+ "grad_norm": 1764041.9454831716,
2384
+ "learning_rate": 3.0805390159824505e-07,
2385
+ "logits/chosen": -3.7020182609558105,
2386
+ "logits/rejected": -2.8675622940063477,
2387
+ "logps/chosen": -112.20640563964844,
2388
+ "logps/rejected": -527.1363525390625,
2389
+ "loss": 30214.225,
2390
+ "rewards/accuracies": 1.0,
2391
+ "rewards/chosen": 0.1370132714509964,
2392
+ "rewards/margins": 0.42148295044898987,
2393
+ "rewards/rejected": -0.2844696640968323,
2394
+ "step": 1580
2395
  }
2396
  ],
2397
  "logging_steps": 10,