Laurie commited on
Commit
46e5f4f
·
1 Parent(s): c46a596

Upload 6 files

Browse files
Files changed (6) hide show
  1. chatglm-lora.pt +3 -0
  2. optimizer.pt +3 -0
  3. rng_state.pth +3 -0
  4. scaler.pt +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +240 -0
chatglm-lora.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f412adacf5a4507ed8024072697d523100b4451717144ccb5343bde1b4ea5da0
3
+ size 14700127
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6adc59fdd9ccb9075a598da7135ad61502dd7f79fb2a0736bda8361b413ed2e5
3
+ size 29393541
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0407b29532a356faab2f98d91dc78105842211ab2c6bec1c2c03ad8e8c4ae58
3
+ size 14575
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f382b9b3dd938f2cd8808d166786de50eef1d19e33eaa997aac976ed214737c0
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3365e38a8d8611c30cc6bdee6d1f36171d8dd376c0adfe6243f8ef24d68e8b08
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9141551206970433,
5
+ "global_step": 800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.06,
12
+ "learning_rate": 2.2499999999999998e-05,
13
+ "loss": 5.1881,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 0.06,
18
+ "eval_loss": 5.518892288208008,
19
+ "eval_runtime": 281.7614,
20
+ "eval_samples_per_second": 16.539,
21
+ "eval_steps_per_second": 16.539,
22
+ "step": 50
23
+ },
24
+ {
25
+ "epoch": 0.11,
26
+ "learning_rate": 4.75e-05,
27
+ "loss": 3.8191,
28
+ "step": 100
29
+ },
30
+ {
31
+ "epoch": 0.11,
32
+ "eval_loss": 2.910094738006592,
33
+ "eval_runtime": 282.3915,
34
+ "eval_samples_per_second": 16.502,
35
+ "eval_steps_per_second": 16.502,
36
+ "step": 100
37
+ },
38
+ {
39
+ "epoch": 0.17,
40
+ "learning_rate": 7.25e-05,
41
+ "loss": 2.5718,
42
+ "step": 150
43
+ },
44
+ {
45
+ "epoch": 0.17,
46
+ "eval_loss": 2.240684986114502,
47
+ "eval_runtime": 283.3137,
48
+ "eval_samples_per_second": 16.448,
49
+ "eval_steps_per_second": 16.448,
50
+ "step": 150
51
+ },
52
+ {
53
+ "epoch": 0.23,
54
+ "learning_rate": 9.750000000000001e-05,
55
+ "loss": 2.3477,
56
+ "step": 200
57
+ },
58
+ {
59
+ "epoch": 0.23,
60
+ "eval_loss": 2.1633121967315674,
61
+ "eval_runtime": 282.161,
62
+ "eval_samples_per_second": 16.515,
63
+ "eval_steps_per_second": 16.515,
64
+ "step": 200
65
+ },
66
+ {
67
+ "epoch": 0.29,
68
+ "learning_rate": 0.0001225,
69
+ "loss": 2.3472,
70
+ "step": 250
71
+ },
72
+ {
73
+ "epoch": 0.29,
74
+ "eval_loss": 2.118901252746582,
75
+ "eval_runtime": 281.7032,
76
+ "eval_samples_per_second": 16.542,
77
+ "eval_steps_per_second": 16.542,
78
+ "step": 250
79
+ },
80
+ {
81
+ "epoch": 0.34,
82
+ "learning_rate": 0.0001475,
83
+ "loss": 2.2188,
84
+ "step": 300
85
+ },
86
+ {
87
+ "epoch": 0.34,
88
+ "eval_loss": 2.1062021255493164,
89
+ "eval_runtime": 282.3706,
90
+ "eval_samples_per_second": 16.503,
91
+ "eval_steps_per_second": 16.503,
92
+ "step": 300
93
+ },
94
+ {
95
+ "epoch": 0.4,
96
+ "learning_rate": 0.0001725,
97
+ "loss": 2.2085,
98
+ "step": 350
99
+ },
100
+ {
101
+ "epoch": 0.4,
102
+ "eval_loss": 2.0873425006866455,
103
+ "eval_runtime": 282.0216,
104
+ "eval_samples_per_second": 16.524,
105
+ "eval_steps_per_second": 16.524,
106
+ "step": 350
107
+ },
108
+ {
109
+ "epoch": 0.46,
110
+ "learning_rate": 0.0001975,
111
+ "loss": 2.1271,
112
+ "step": 400
113
+ },
114
+ {
115
+ "epoch": 0.46,
116
+ "eval_loss": 2.0874459743499756,
117
+ "eval_runtime": 283.0992,
118
+ "eval_samples_per_second": 16.461,
119
+ "eval_steps_per_second": 16.461,
120
+ "step": 400
121
+ },
122
+ {
123
+ "epoch": 0.51,
124
+ "learning_rate": 0.00022250000000000001,
125
+ "loss": 2.1834,
126
+ "step": 450
127
+ },
128
+ {
129
+ "epoch": 0.51,
130
+ "eval_loss": 2.0584352016448975,
131
+ "eval_runtime": 281.7518,
132
+ "eval_samples_per_second": 16.539,
133
+ "eval_steps_per_second": 16.539,
134
+ "step": 450
135
+ },
136
+ {
137
+ "epoch": 0.57,
138
+ "learning_rate": 0.0002475,
139
+ "loss": 2.1927,
140
+ "step": 500
141
+ },
142
+ {
143
+ "epoch": 0.57,
144
+ "eval_loss": 2.050870656967163,
145
+ "eval_runtime": 281.7194,
146
+ "eval_samples_per_second": 16.541,
147
+ "eval_steps_per_second": 16.541,
148
+ "step": 500
149
+ },
150
+ {
151
+ "epoch": 0.63,
152
+ "learning_rate": 0.0002725,
153
+ "loss": 2.1816,
154
+ "step": 550
155
+ },
156
+ {
157
+ "epoch": 0.63,
158
+ "eval_loss": 2.039118766784668,
159
+ "eval_runtime": 282.0613,
160
+ "eval_samples_per_second": 16.521,
161
+ "eval_steps_per_second": 16.521,
162
+ "step": 550
163
+ },
164
+ {
165
+ "epoch": 0.69,
166
+ "learning_rate": 0.00029749999999999997,
167
+ "loss": 2.2131,
168
+ "step": 600
169
+ },
170
+ {
171
+ "epoch": 0.69,
172
+ "eval_loss": 2.0333147048950195,
173
+ "eval_runtime": 282.4675,
174
+ "eval_samples_per_second": 16.497,
175
+ "eval_steps_per_second": 16.497,
176
+ "step": 600
177
+ },
178
+ {
179
+ "epoch": 0.74,
180
+ "learning_rate": 0.00032250000000000003,
181
+ "loss": 2.2322,
182
+ "step": 650
183
+ },
184
+ {
185
+ "epoch": 0.74,
186
+ "eval_loss": 2.0386910438537598,
187
+ "eval_runtime": 282.3106,
188
+ "eval_samples_per_second": 16.507,
189
+ "eval_steps_per_second": 16.507,
190
+ "step": 650
191
+ },
192
+ {
193
+ "epoch": 0.8,
194
+ "learning_rate": 0.0003475,
195
+ "loss": 2.2614,
196
+ "step": 700
197
+ },
198
+ {
199
+ "epoch": 0.8,
200
+ "eval_loss": 2.025144577026367,
201
+ "eval_runtime": 285.8974,
202
+ "eval_samples_per_second": 16.3,
203
+ "eval_steps_per_second": 16.3,
204
+ "step": 700
205
+ },
206
+ {
207
+ "epoch": 0.86,
208
+ "learning_rate": 0.0003725,
209
+ "loss": 2.186,
210
+ "step": 750
211
+ },
212
+ {
213
+ "epoch": 0.86,
214
+ "eval_loss": 2.0244803428649902,
215
+ "eval_runtime": 284.3127,
216
+ "eval_samples_per_second": 16.39,
217
+ "eval_steps_per_second": 16.39,
218
+ "step": 750
219
+ },
220
+ {
221
+ "epoch": 0.91,
222
+ "learning_rate": 0.0003975,
223
+ "loss": 2.1677,
224
+ "step": 800
225
+ },
226
+ {
227
+ "epoch": 0.91,
228
+ "eval_loss": 2.0197227001190186,
229
+ "eval_runtime": 284.0212,
230
+ "eval_samples_per_second": 16.407,
231
+ "eval_steps_per_second": 16.407,
232
+ "step": 800
233
+ }
234
+ ],
235
+ "max_steps": 875,
236
+ "num_train_epochs": 1,
237
+ "total_flos": 4.324339205829427e+16,
238
+ "trial_name": null,
239
+ "trial_params": null
240
+ }