Training in progress, epoch 1
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +37 -131
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4877660776
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b62ad8dfc69b349a5be9f007e5927b9ecb7634d812dc208614008da038d1831
|
3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4932751008
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14056e04f18eb0387c7d2f53179647859ce81dc31638ad36ca440092f9c44b64
|
3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4330865200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d80344a6095339b7749b0681dc4a983e1aa53168bf322f01ecb7ee19c01a62b3
|
3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1089994880
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6842654fc018d89da8a92f0b2b37fec7333381d118cf079bb6a90674d35c5bcb
|
3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
@@ -1,131 +1,37 @@
|
|
1 |
-
{"current_steps": 1, "total_steps":
|
2 |
-
{"current_steps": 2, "total_steps":
|
3 |
-
{"current_steps": 3, "total_steps":
|
4 |
-
{"current_steps": 4, "total_steps":
|
5 |
-
{"current_steps": 5, "total_steps":
|
6 |
-
{"current_steps": 6, "total_steps":
|
7 |
-
{"current_steps": 7, "total_steps":
|
8 |
-
{"current_steps": 8, "total_steps":
|
9 |
-
{"current_steps": 9, "total_steps":
|
10 |
-
{"current_steps": 10, "total_steps":
|
11 |
-
{"current_steps": 11, "total_steps":
|
12 |
-
{"current_steps": 12, "total_steps":
|
13 |
-
{"current_steps": 13, "total_steps":
|
14 |
-
{"current_steps": 14, "total_steps":
|
15 |
-
{"current_steps": 15, "total_steps":
|
16 |
-
{"current_steps": 16, "total_steps":
|
17 |
-
{"current_steps": 17, "total_steps":
|
18 |
-
{"current_steps": 18, "total_steps":
|
19 |
-
{"current_steps": 19, "total_steps":
|
20 |
-
{"current_steps": 20, "total_steps":
|
21 |
-
{"current_steps": 21, "total_steps":
|
22 |
-
{"current_steps": 22, "total_steps":
|
23 |
-
{"current_steps": 23, "total_steps":
|
24 |
-
{"current_steps": 24, "total_steps":
|
25 |
-
{"current_steps": 25, "total_steps":
|
26 |
-
{"current_steps": 26, "total_steps":
|
27 |
-
{"current_steps": 27, "total_steps":
|
28 |
-
{"current_steps": 28, "total_steps":
|
29 |
-
{"current_steps": 29, "total_steps":
|
30 |
-
{"current_steps": 30, "total_steps":
|
31 |
-
{"current_steps": 31, "total_steps":
|
32 |
-
{"current_steps": 32, "total_steps":
|
33 |
-
{"current_steps": 33, "total_steps":
|
34 |
-
{"current_steps": 34, "total_steps":
|
35 |
-
{"current_steps": 35, "total_steps":
|
36 |
-
{"current_steps": 36, "total_steps":
|
37 |
-
{"current_steps": 37, "total_steps":
|
38 |
-
{"current_steps": 38, "total_steps": 224, "loss": 0.4868, "lr": 1.972642905324813e-05, "epoch": 1.1620253164556962, "percentage": 16.96, "elapsed_time": "1:33:59", "remaining_time": "7:40:05"}
|
39 |
-
{"current_steps": 39, "total_steps": 224, "loss": 0.4908, "lr": 1.9688933620140638e-05, "epoch": 1.1924050632911392, "percentage": 17.41, "elapsed_time": "1:36:19", "remaining_time": "7:36:53"}
|
40 |
-
{"current_steps": 40, "total_steps": 224, "loss": 0.4863, "lr": 1.96490713150448e-05, "epoch": 1.2227848101265824, "percentage": 17.86, "elapsed_time": "1:38:44", "remaining_time": "7:34:14"}
|
41 |
-
{"current_steps": 41, "total_steps": 224, "loss": 0.4897, "lr": 1.9606851875768404e-05, "epoch": 1.2531645569620253, "percentage": 18.3, "elapsed_time": "1:41:10", "remaining_time": "7:31:35"}
|
42 |
-
{"current_steps": 42, "total_steps": 224, "loss": 0.4508, "lr": 1.956228561593441e-05, "epoch": 1.2835443037974683, "percentage": 18.75, "elapsed_time": "1:43:39", "remaining_time": "7:29:11"}
|
43 |
-
{"current_steps": 43, "total_steps": 224, "loss": 0.4796, "lr": 1.9515383422461457e-05, "epoch": 1.3139240506329113, "percentage": 19.2, "elapsed_time": "1:46:10", "remaining_time": "7:26:54"}
|
44 |
-
{"current_steps": 44, "total_steps": 224, "loss": 0.4819, "lr": 1.9466156752904344e-05, "epoch": 1.3443037974683545, "percentage": 19.64, "elapsed_time": "1:48:23", "remaining_time": "7:23:23"}
|
45 |
-
{"current_steps": 45, "total_steps": 224, "loss": 0.4696, "lr": 1.9414617632655114e-05, "epoch": 1.3746835443037975, "percentage": 20.09, "elapsed_time": "1:50:48", "remaining_time": "7:20:47"}
|
46 |
-
{"current_steps": 46, "total_steps": 224, "loss": 0.4718, "lr": 1.9360778652005416e-05, "epoch": 1.4050632911392404, "percentage": 20.54, "elapsed_time": "1:53:24", "remaining_time": "7:18:51"}
|
47 |
-
{"current_steps": 47, "total_steps": 224, "loss": 0.4655, "lr": 1.9304652963070868e-05, "epoch": 1.4354430379746836, "percentage": 20.98, "elapsed_time": "1:55:49", "remaining_time": "7:16:12"}
|
48 |
-
{"current_steps": 48, "total_steps": 224, "loss": 0.4713, "lr": 1.9246254276578175e-05, "epoch": 1.4658227848101266, "percentage": 21.43, "elapsed_time": "1:58:20", "remaining_time": "7:13:53"}
|
49 |
-
{"current_steps": 49, "total_steps": 224, "loss": 0.4719, "lr": 1.9185596858515797e-05, "epoch": 1.4962025316455696, "percentage": 21.88, "elapsed_time": "2:00:54", "remaining_time": "7:11:49"}
|
50 |
-
{"current_steps": 50, "total_steps": 224, "loss": 0.4597, "lr": 1.9122695526648968e-05, "epoch": 1.5265822784810128, "percentage": 22.32, "elapsed_time": "2:03:09", "remaining_time": "7:08:33"}
|
51 |
-
{"current_steps": 51, "total_steps": 224, "loss": 0.4786, "lr": 1.905756564689991e-05, "epoch": 1.5569620253164556, "percentage": 22.77, "elapsed_time": "2:05:34", "remaining_time": "7:05:58"}
|
52 |
-
{"current_steps": 52, "total_steps": 224, "loss": 0.4756, "lr": 1.8990223129594146e-05, "epoch": 1.5873417721518988, "percentage": 23.21, "elapsed_time": "2:08:01", "remaining_time": "7:03:29"}
|
53 |
-
{"current_steps": 53, "total_steps": 224, "loss": 0.4681, "lr": 1.8920684425573865e-05, "epoch": 1.6177215189873417, "percentage": 23.66, "elapsed_time": "2:10:42", "remaining_time": "7:01:43"}
|
54 |
-
{"current_steps": 54, "total_steps": 224, "loss": 0.4692, "lr": 1.884896652217917e-05, "epoch": 1.6481012658227847, "percentage": 24.11, "elapsed_time": "2:13:04", "remaining_time": "6:58:55"}
|
55 |
-
{"current_steps": 55, "total_steps": 224, "loss": 0.4776, "lr": 1.877508693909831e-05, "epoch": 1.678481012658228, "percentage": 24.55, "elapsed_time": "2:15:27", "remaining_time": "6:56:13"}
|
56 |
-
{"current_steps": 56, "total_steps": 224, "loss": 0.4581, "lr": 1.8699063724087905e-05, "epoch": 1.7088607594936709, "percentage": 25.0, "elapsed_time": "2:17:46", "remaining_time": "6:53:19"}
|
57 |
-
{"current_steps": 57, "total_steps": 224, "loss": 0.4676, "lr": 1.862091544856407e-05, "epoch": 1.7392405063291139, "percentage": 25.45, "elapsed_time": "2:20:27", "remaining_time": "6:51:30"}
|
58 |
-
{"current_steps": 58, "total_steps": 224, "loss": 0.4675, "lr": 1.854066120306571e-05, "epoch": 1.769620253164557, "percentage": 25.89, "elapsed_time": "2:22:43", "remaining_time": "6:48:29"}
|
59 |
-
{"current_steps": 59, "total_steps": 224, "loss": 0.4676, "lr": 1.8458320592590976e-05, "epoch": 1.8, "percentage": 26.34, "elapsed_time": "2:25:22", "remaining_time": "6:46:32"}
|
60 |
-
{"current_steps": 60, "total_steps": 224, "loss": 0.4576, "lr": 1.837391373180801e-05, "epoch": 1.830379746835443, "percentage": 26.79, "elapsed_time": "2:27:37", "remaining_time": "6:43:29"}
|
61 |
-
{"current_steps": 61, "total_steps": 224, "loss": 0.4538, "lr": 1.8287461240141217e-05, "epoch": 1.8607594936708862, "percentage": 27.23, "elapsed_time": "2:29:55", "remaining_time": "6:40:36"}
|
62 |
-
{"current_steps": 62, "total_steps": 224, "loss": 0.444, "lr": 1.8198984236734246e-05, "epoch": 1.891139240506329, "percentage": 27.68, "elapsed_time": "2:32:16", "remaining_time": "6:37:52"}
|
63 |
-
{"current_steps": 63, "total_steps": 224, "loss": 0.4488, "lr": 1.8108504335290852e-05, "epoch": 1.9215189873417722, "percentage": 28.12, "elapsed_time": "2:34:51", "remaining_time": "6:35:44"}
|
64 |
-
{"current_steps": 64, "total_steps": 224, "loss": 0.4782, "lr": 1.8016043638794975e-05, "epoch": 1.9518987341772152, "percentage": 28.57, "elapsed_time": "2:37:22", "remaining_time": "6:33:27"}
|
65 |
-
{"current_steps": 65, "total_steps": 224, "loss": 0.4688, "lr": 1.7921624734111292e-05, "epoch": 1.9822784810126581, "percentage": 29.02, "elapsed_time": "2:39:52", "remaining_time": "6:31:05"}
|
66 |
-
{"current_steps": 66, "total_steps": 224, "loss": 0.4474, "lr": 1.7825270686467567e-05, "epoch": 2.020253164556962, "percentage": 29.46, "elapsed_time": "2:43:07", "remaining_time": "6:30:30"}
|
67 |
-
{"current_steps": 67, "total_steps": 224, "loss": 0.4178, "lr": 1.7727005033820117e-05, "epoch": 2.050632911392405, "percentage": 29.91, "elapsed_time": "2:45:28", "remaining_time": "6:27:45"}
|
68 |
-
{"current_steps": 68, "total_steps": 224, "loss": 0.4287, "lr": 1.762685178110382e-05, "epoch": 2.081012658227848, "percentage": 30.36, "elapsed_time": "2:47:51", "remaining_time": "6:25:04"}
|
69 |
-
{"current_steps": 69, "total_steps": 224, "loss": 0.3979, "lr": 1.752483539436807e-05, "epoch": 2.1113924050632913, "percentage": 30.8, "elapsed_time": "2:50:15", "remaining_time": "6:22:27"}
|
70 |
-
{"current_steps": 70, "total_steps": 224, "loss": 0.4105, "lr": 1.7420980794800013e-05, "epoch": 2.141772151898734, "percentage": 31.25, "elapsed_time": "2:52:42", "remaining_time": "6:19:57"}
|
71 |
-
{"current_steps": 71, "total_steps": 224, "loss": 0.4061, "lr": 1.731531335263669e-05, "epoch": 2.1721518987341772, "percentage": 31.7, "elapsed_time": "2:55:02", "remaining_time": "6:17:12"}
|
72 |
-
{"current_steps": 72, "total_steps": 224, "loss": 0.4174, "lr": 1.720785888096743e-05, "epoch": 2.2025316455696204, "percentage": 32.14, "elapsed_time": "2:57:30", "remaining_time": "6:14:44"}
|
73 |
-
{"current_steps": 73, "total_steps": 224, "loss": 0.4163, "lr": 1.7098643629428035e-05, "epoch": 2.232911392405063, "percentage": 32.59, "elapsed_time": "3:00:08", "remaining_time": "6:12:38"}
|
74 |
-
{"current_steps": 74, "total_steps": 224, "loss": 0.3913, "lr": 1.698769427778842e-05, "epoch": 2.2632911392405064, "percentage": 33.04, "elapsed_time": "3:02:34", "remaining_time": "6:10:04"}
|
75 |
-
{"current_steps": 75, "total_steps": 224, "loss": 0.4204, "lr": 1.687503792943506e-05, "epoch": 2.293670886075949, "percentage": 33.48, "elapsed_time": "3:05:06", "remaining_time": "6:07:44"}
|
76 |
-
{"current_steps": 76, "total_steps": 224, "loss": 0.3909, "lr": 1.6760702104750046e-05, "epoch": 2.3240506329113924, "percentage": 33.93, "elapsed_time": "3:07:21", "remaining_time": "6:04:50"}
|
77 |
-
{"current_steps": 77, "total_steps": 224, "loss": 0.4006, "lr": 1.664471473438822e-05, "epoch": 2.3544303797468356, "percentage": 34.38, "elapsed_time": "3:09:52", "remaining_time": "6:02:29"}
|
78 |
-
{"current_steps": 78, "total_steps": 224, "loss": 0.4245, "lr": 1.6527104152454096e-05, "epoch": 2.3848101265822783, "percentage": 34.82, "elapsed_time": "3:12:34", "remaining_time": "6:00:27"}
|
79 |
-
{"current_steps": 79, "total_steps": 224, "loss": 0.4257, "lr": 1.6407899089580263e-05, "epoch": 2.4151898734177215, "percentage": 35.27, "elapsed_time": "3:14:54", "remaining_time": "5:57:44"}
|
80 |
-
{"current_steps": 80, "total_steps": 224, "loss": 0.4176, "lr": 1.628712866590885e-05, "epoch": 2.4455696202531647, "percentage": 35.71, "elapsed_time": "3:17:29", "remaining_time": "5:55:28"}
|
81 |
-
{"current_steps": 81, "total_steps": 224, "loss": 0.4061, "lr": 1.6164822383977912e-05, "epoch": 2.4759493670886075, "percentage": 36.16, "elapsed_time": "3:19:55", "remaining_time": "5:52:57"}
|
82 |
-
{"current_steps": 82, "total_steps": 224, "loss": 0.4037, "lr": 1.604101012151436e-05, "epoch": 2.5063291139240507, "percentage": 36.61, "elapsed_time": "3:22:17", "remaining_time": "5:50:17"}
|
83 |
-
{"current_steps": 83, "total_steps": 224, "loss": 0.4074, "lr": 1.5915722124135227e-05, "epoch": 2.536708860759494, "percentage": 37.05, "elapsed_time": "3:24:38", "remaining_time": "5:47:37"}
|
84 |
-
{"current_steps": 84, "total_steps": 224, "loss": 0.4198, "lr": 1.5788988997959115e-05, "epoch": 2.5670886075949366, "percentage": 37.5, "elapsed_time": "3:27:12", "remaining_time": "5:45:20"}
|
85 |
-
{"current_steps": 85, "total_steps": 224, "loss": 0.4224, "lr": 1.5660841702129533e-05, "epoch": 2.59746835443038, "percentage": 37.95, "elapsed_time": "3:29:40", "remaining_time": "5:42:52"}
|
86 |
-
{"current_steps": 86, "total_steps": 224, "loss": 0.3984, "lr": 1.5531311541251995e-05, "epoch": 2.6278481012658226, "percentage": 38.39, "elapsed_time": "3:32:11", "remaining_time": "5:40:29"}
|
87 |
-
{"current_steps": 87, "total_steps": 224, "loss": 0.3933, "lr": 1.540043015774676e-05, "epoch": 2.6582278481012658, "percentage": 38.84, "elapsed_time": "3:34:48", "remaining_time": "5:38:16"}
|
88 |
-
{"current_steps": 88, "total_steps": 224, "loss": 0.4038, "lr": 1.5268229524119007e-05, "epoch": 2.688607594936709, "percentage": 39.29, "elapsed_time": "3:37:13", "remaining_time": "5:35:42"}
|
89 |
-
{"current_steps": 89, "total_steps": 224, "loss": 0.4069, "lr": 1.513474193514842e-05, "epoch": 2.7189873417721517, "percentage": 39.73, "elapsed_time": "3:39:42", "remaining_time": "5:33:16"}
|
90 |
-
{"current_steps": 90, "total_steps": 224, "loss": 0.3885, "lr": 1.5000000000000002e-05, "epoch": 2.749367088607595, "percentage": 40.18, "elapsed_time": "3:42:07", "remaining_time": "5:30:43"}
|
91 |
-
{"current_steps": 91, "total_steps": 224, "loss": 0.4182, "lr": 1.4864036634258112e-05, "epoch": 2.779746835443038, "percentage": 40.62, "elapsed_time": "3:44:37", "remaining_time": "5:28:18"}
|
92 |
-
{"current_steps": 92, "total_steps": 224, "loss": 0.4062, "lr": 1.4726885051885654e-05, "epoch": 2.810126582278481, "percentage": 41.07, "elapsed_time": "3:46:51", "remaining_time": "5:25:29"}
|
93 |
-
{"current_steps": 93, "total_steps": 224, "loss": 0.41, "lr": 1.4588578757110359e-05, "epoch": 2.840506329113924, "percentage": 41.52, "elapsed_time": "3:49:18", "remaining_time": "5:22:59"}
|
94 |
-
{"current_steps": 94, "total_steps": 224, "loss": 0.4064, "lr": 1.4449151536240167e-05, "epoch": 2.8708860759493673, "percentage": 41.96, "elapsed_time": "3:51:47", "remaining_time": "5:20:33"}
|
95 |
-
{"current_steps": 95, "total_steps": 224, "loss": 0.3911, "lr": 1.4308637449409705e-05, "epoch": 2.90126582278481, "percentage": 42.41, "elapsed_time": "3:54:12", "remaining_time": "5:18:02"}
|
96 |
-
{"current_steps": 96, "total_steps": 224, "loss": 0.4108, "lr": 1.4167070822259868e-05, "epoch": 2.9316455696202532, "percentage": 42.86, "elapsed_time": "3:56:46", "remaining_time": "5:15:41"}
|
97 |
-
{"current_steps": 97, "total_steps": 224, "loss": 0.4131, "lr": 1.402448623755254e-05, "epoch": 2.962025316455696, "percentage": 43.3, "elapsed_time": "3:59:19", "remaining_time": "5:13:20"}
|
98 |
-
{"current_steps": 98, "total_steps": 224, "loss": 0.4051, "lr": 1.3880918526722497e-05, "epoch": 2.992405063291139, "percentage": 43.75, "elapsed_time": "4:01:54", "remaining_time": "5:11:01"}
|
99 |
-
{"current_steps": 99, "total_steps": 224, "loss": 0.3711, "lr": 1.3736402761368597e-05, "epoch": 3.030379746835443, "percentage": 44.2, "elapsed_time": "4:05:11", "remaining_time": "5:09:35"}
|
100 |
-
{"current_steps": 100, "total_steps": 224, "loss": 0.3663, "lr": 1.3590974244686248e-05, "epoch": 3.060759493670886, "percentage": 44.64, "elapsed_time": "4:07:29", "remaining_time": "5:06:52"}
|
101 |
-
{"current_steps": 101, "total_steps": 224, "loss": 0.3646, "lr": 1.344466850284333e-05, "epoch": 3.091139240506329, "percentage": 45.09, "elapsed_time": "4:09:50", "remaining_time": "5:04:15"}
|
102 |
-
{"current_steps": 102, "total_steps": 224, "loss": 0.3614, "lr": 1.3297521276301666e-05, "epoch": 3.1215189873417724, "percentage": 45.54, "elapsed_time": "4:12:15", "remaining_time": "5:01:43"}
|
103 |
-
{"current_steps": 103, "total_steps": 224, "loss": 0.3541, "lr": 1.3149568511086104e-05, "epoch": 3.151898734177215, "percentage": 45.98, "elapsed_time": "4:14:47", "remaining_time": "4:59:19"}
|
104 |
-
{"current_steps": 104, "total_steps": 224, "loss": 0.3596, "lr": 1.300084635000341e-05, "epoch": 3.1822784810126583, "percentage": 46.43, "elapsed_time": "4:17:06", "remaining_time": "4:56:39"}
|
105 |
-
{"current_steps": 105, "total_steps": 224, "loss": 0.3651, "lr": 1.2851391123813075e-05, "epoch": 3.212658227848101, "percentage": 46.88, "elapsed_time": "4:19:31", "remaining_time": "4:54:07"}
|
106 |
-
{"current_steps": 106, "total_steps": 224, "loss": 0.3608, "lr": 1.2701239342352223e-05, "epoch": 3.2430379746835443, "percentage": 47.32, "elapsed_time": "4:22:06", "remaining_time": "4:51:47"}
|
107 |
-
{"current_steps": 107, "total_steps": 224, "loss": 0.3608, "lr": 1.2550427685616767e-05, "epoch": 3.2734177215189875, "percentage": 47.77, "elapsed_time": "4:24:34", "remaining_time": "4:49:18"}
|
108 |
-
{"current_steps": 108, "total_steps": 224, "loss": 0.363, "lr": 1.239899299480098e-05, "epoch": 3.3037974683544302, "percentage": 48.21, "elapsed_time": "4:27:10", "remaining_time": "4:46:57"}
|
109 |
-
{"current_steps": 109, "total_steps": 224, "loss": 0.3618, "lr": 1.2246972263297718e-05, "epoch": 3.3341772151898734, "percentage": 48.66, "elapsed_time": "4:29:30", "remaining_time": "4:44:20"}
|
110 |
-
{"current_steps": 110, "total_steps": 224, "loss": 0.3554, "lr": 1.2094402627661447e-05, "epoch": 3.3645569620253166, "percentage": 49.11, "elapsed_time": "4:32:07", "remaining_time": "4:42:00"}
|
111 |
-
{"current_steps": 111, "total_steps": 224, "loss": 0.338, "lr": 1.1941321358536278e-05, "epoch": 3.3949367088607594, "percentage": 49.55, "elapsed_time": "4:34:45", "remaining_time": "4:39:42"}
|
112 |
-
{"current_steps": 112, "total_steps": 224, "loss": 0.3557, "lr": 1.1787765851551296e-05, "epoch": 3.4253164556962026, "percentage": 50.0, "elapsed_time": "4:37:05", "remaining_time": "4:37:05"}
|
113 |
-
{"current_steps": 113, "total_steps": 224, "loss": 0.3499, "lr": 1.1633773618185302e-05, "epoch": 3.4556962025316453, "percentage": 50.45, "elapsed_time": "4:39:32", "remaining_time": "4:34:35"}
|
114 |
-
{"current_steps": 114, "total_steps": 224, "loss": 0.3602, "lr": 1.14793822766033e-05, "epoch": 3.4860759493670885, "percentage": 50.89, "elapsed_time": "4:42:06", "remaining_time": "4:32:12"}
|
115 |
-
{"current_steps": 115, "total_steps": 224, "loss": 0.3468, "lr": 1.132462954246688e-05, "epoch": 3.5164556962025317, "percentage": 51.34, "elapsed_time": "4:44:24", "remaining_time": "4:29:34"}
|
116 |
-
{"current_steps": 116, "total_steps": 224, "loss": 0.3591, "lr": 1.1169553219720828e-05, "epoch": 3.546835443037975, "percentage": 51.79, "elapsed_time": "4:46:44", "remaining_time": "4:26:57"}
|
117 |
-
{"current_steps": 117, "total_steps": 224, "loss": 0.3512, "lr": 1.1014191191358118e-05, "epoch": 3.5772151898734177, "percentage": 52.23, "elapsed_time": "4:49:11", "remaining_time": "4:24:28"}
|
118 |
-
{"current_steps": 118, "total_steps": 224, "loss": 0.3529, "lr": 1.085858141016566e-05, "epoch": 3.607594936708861, "percentage": 52.68, "elapsed_time": "4:51:37", "remaining_time": "4:21:58"}
|
119 |
-
{"current_steps": 119, "total_steps": 224, "loss": 0.3446, "lr": 1.070276188945293e-05, "epoch": 3.6379746835443036, "percentage": 53.12, "elapsed_time": "4:53:48", "remaining_time": "4:19:14"}
|
120 |
-
{"current_steps": 120, "total_steps": 224, "loss": 0.3585, "lr": 1.0546770693765859e-05, "epoch": 3.668354430379747, "percentage": 53.57, "elapsed_time": "4:56:19", "remaining_time": "4:16:48"}
|
121 |
-
{"current_steps": 121, "total_steps": 224, "loss": 0.3554, "lr": 1.0390645929588197e-05, "epoch": 3.69873417721519, "percentage": 54.02, "elapsed_time": "4:58:48", "remaining_time": "4:14:21"}
|
122 |
-
{"current_steps": 122, "total_steps": 224, "loss": 0.3579, "lr": 1.0234425736032607e-05, "epoch": 3.729113924050633, "percentage": 54.46, "elapsed_time": "5:01:13", "remaining_time": "4:11:50"}
|
123 |
-
{"current_steps": 123, "total_steps": 224, "loss": 0.361, "lr": 1.007814827552384e-05, "epoch": 3.759493670886076, "percentage": 54.91, "elapsed_time": "5:03:40", "remaining_time": "4:09:21"}
|
124 |
-
{"current_steps": 124, "total_steps": 224, "loss": 0.3545, "lr": 9.92185172447616e-06, "epoch": 3.7898734177215188, "percentage": 55.36, "elapsed_time": "5:06:00", "remaining_time": "4:06:46"}
|
125 |
-
{"current_steps": 125, "total_steps": 224, "loss": 0.3653, "lr": 9.765574263967397e-06, "epoch": 3.820253164556962, "percentage": 55.8, "elapsed_time": "5:08:18", "remaining_time": "4:04:10"}
|
126 |
-
{"current_steps": 126, "total_steps": 224, "loss": 0.3433, "lr": 9.609354070411807e-06, "epoch": 3.850632911392405, "percentage": 56.25, "elapsed_time": "5:10:33", "remaining_time": "4:01:32"}
|
127 |
-
{"current_steps": 127, "total_steps": 224, "loss": 0.3476, "lr": 9.453229306234143e-06, "epoch": 3.8810126582278484, "percentage": 56.7, "elapsed_time": "5:13:00", "remaining_time": "3:59:03"}
|
128 |
-
{"current_steps": 128, "total_steps": 224, "loss": 0.3673, "lr": 9.297238110547075e-06, "epoch": 3.911392405063291, "percentage": 57.14, "elapsed_time": "5:15:33", "remaining_time": "3:56:40"}
|
129 |
-
{"current_steps": 129, "total_steps": 224, "loss": 0.3683, "lr": 9.14141858983434e-06, "epoch": 3.9417721518987343, "percentage": 57.59, "elapsed_time": "5:18:18", "remaining_time": "3:54:25"}
|
130 |
-
{"current_steps": 130, "total_steps": 224, "loss": 0.3583, "lr": 8.985808808641883e-06, "epoch": 3.972151898734177, "percentage": 58.04, "elapsed_time": "5:20:39", "remaining_time": "3:51:51"}
|
131 |
-
{"current_steps": 131, "total_steps": 224, "loss": 0.3381, "lr": 8.830446780279175e-06, "epoch": 4.010126582278481, "percentage": 58.48, "elapsed_time": "5:24:03", "remaining_time": "3:50:03"}
|
|
|
1 |
+
{"current_steps": 1, "total_steps": 231, "loss": 0.8189, "lr": 8.333333333333333e-07, "epoch": 0.030303030303030304, "percentage": 0.43, "elapsed_time": "0:00:50", "remaining_time": "3:12:11"}
|
2 |
+
{"current_steps": 2, "total_steps": 231, "loss": 0.8207, "lr": 1.6666666666666667e-06, "epoch": 0.06060606060606061, "percentage": 0.87, "elapsed_time": "0:01:24", "remaining_time": "2:40:30"}
|
3 |
+
{"current_steps": 3, "total_steps": 231, "loss": 0.7979, "lr": 2.5e-06, "epoch": 0.09090909090909091, "percentage": 1.3, "elapsed_time": "0:01:56", "remaining_time": "2:27:56"}
|
4 |
+
{"current_steps": 4, "total_steps": 231, "loss": 0.8516, "lr": 3.3333333333333333e-06, "epoch": 0.12121212121212122, "percentage": 1.73, "elapsed_time": "0:02:29", "remaining_time": "2:21:33"}
|
5 |
+
{"current_steps": 5, "total_steps": 231, "loss": 0.7496, "lr": 4.166666666666667e-06, "epoch": 0.15151515151515152, "percentage": 2.16, "elapsed_time": "0:03:03", "remaining_time": "2:18:34"}
|
6 |
+
{"current_steps": 6, "total_steps": 231, "loss": 0.7536, "lr": 5e-06, "epoch": 0.18181818181818182, "percentage": 2.6, "elapsed_time": "0:03:36", "remaining_time": "2:15:23"}
|
7 |
+
{"current_steps": 7, "total_steps": 231, "loss": 0.7444, "lr": 5.833333333333334e-06, "epoch": 0.21212121212121213, "percentage": 3.03, "elapsed_time": "0:04:09", "remaining_time": "2:13:09"}
|
8 |
+
{"current_steps": 8, "total_steps": 231, "loss": 0.7076, "lr": 6.666666666666667e-06, "epoch": 0.24242424242424243, "percentage": 3.46, "elapsed_time": "0:04:41", "remaining_time": "2:10:59"}
|
9 |
+
{"current_steps": 9, "total_steps": 231, "loss": 0.7421, "lr": 7.500000000000001e-06, "epoch": 0.2727272727272727, "percentage": 3.9, "elapsed_time": "0:05:14", "remaining_time": "2:09:13"}
|
10 |
+
{"current_steps": 10, "total_steps": 231, "loss": 0.738, "lr": 8.333333333333334e-06, "epoch": 0.30303030303030304, "percentage": 4.33, "elapsed_time": "0:05:47", "remaining_time": "2:07:52"}
|
11 |
+
{"current_steps": 11, "total_steps": 231, "loss": 0.6751, "lr": 9.166666666666666e-06, "epoch": 0.3333333333333333, "percentage": 4.76, "elapsed_time": "0:06:19", "remaining_time": "2:06:39"}
|
12 |
+
{"current_steps": 12, "total_steps": 231, "loss": 0.6577, "lr": 1e-05, "epoch": 0.36363636363636365, "percentage": 5.19, "elapsed_time": "0:06:54", "remaining_time": "2:05:59"}
|
13 |
+
{"current_steps": 13, "total_steps": 231, "loss": 0.6452, "lr": 1.0833333333333334e-05, "epoch": 0.3939393939393939, "percentage": 5.63, "elapsed_time": "0:07:28", "remaining_time": "2:05:23"}
|
14 |
+
{"current_steps": 14, "total_steps": 231, "loss": 0.6395, "lr": 1.1666666666666668e-05, "epoch": 0.42424242424242425, "percentage": 6.06, "elapsed_time": "0:08:01", "remaining_time": "2:04:22"}
|
15 |
+
{"current_steps": 15, "total_steps": 231, "loss": 0.6299, "lr": 1.25e-05, "epoch": 0.45454545454545453, "percentage": 6.49, "elapsed_time": "0:08:35", "remaining_time": "2:03:44"}
|
16 |
+
{"current_steps": 16, "total_steps": 231, "loss": 0.6146, "lr": 1.3333333333333333e-05, "epoch": 0.48484848484848486, "percentage": 6.93, "elapsed_time": "0:09:09", "remaining_time": "2:03:03"}
|
17 |
+
{"current_steps": 17, "total_steps": 231, "loss": 0.5941, "lr": 1.416666666666667e-05, "epoch": 0.5151515151515151, "percentage": 7.36, "elapsed_time": "0:09:43", "remaining_time": "2:02:31"}
|
18 |
+
{"current_steps": 18, "total_steps": 231, "loss": 0.5994, "lr": 1.5000000000000002e-05, "epoch": 0.5454545454545454, "percentage": 7.79, "elapsed_time": "0:10:16", "remaining_time": "2:01:37"}
|
19 |
+
{"current_steps": 19, "total_steps": 231, "loss": 0.569, "lr": 1.5833333333333333e-05, "epoch": 0.5757575757575758, "percentage": 8.23, "elapsed_time": "0:10:48", "remaining_time": "2:00:40"}
|
20 |
+
{"current_steps": 20, "total_steps": 231, "loss": 0.5855, "lr": 1.6666666666666667e-05, "epoch": 0.6060606060606061, "percentage": 8.66, "elapsed_time": "0:11:22", "remaining_time": "1:59:57"}
|
21 |
+
{"current_steps": 21, "total_steps": 231, "loss": 0.5397, "lr": 1.7500000000000002e-05, "epoch": 0.6363636363636364, "percentage": 9.09, "elapsed_time": "0:11:54", "remaining_time": "1:59:02"}
|
22 |
+
{"current_steps": 22, "total_steps": 231, "loss": 0.5603, "lr": 1.8333333333333333e-05, "epoch": 0.6666666666666666, "percentage": 9.52, "elapsed_time": "0:12:28", "remaining_time": "1:58:34"}
|
23 |
+
{"current_steps": 23, "total_steps": 231, "loss": 0.5443, "lr": 1.916666666666667e-05, "epoch": 0.696969696969697, "percentage": 9.96, "elapsed_time": "0:13:04", "remaining_time": "1:58:10"}
|
24 |
+
{"current_steps": 24, "total_steps": 231, "loss": 0.5537, "lr": 2e-05, "epoch": 0.7272727272727273, "percentage": 10.39, "elapsed_time": "0:13:36", "remaining_time": "1:57:22"}
|
25 |
+
{"current_steps": 25, "total_steps": 231, "loss": 0.5419, "lr": 1.999884834944106e-05, "epoch": 0.7575757575757576, "percentage": 10.82, "elapsed_time": "0:14:10", "remaining_time": "1:56:44"}
|
26 |
+
{"current_steps": 26, "total_steps": 231, "loss": 0.539, "lr": 1.9995393663024054e-05, "epoch": 0.7878787878787878, "percentage": 11.26, "elapsed_time": "0:14:44", "remaining_time": "1:56:10"}
|
27 |
+
{"current_steps": 27, "total_steps": 231, "loss": 0.5373, "lr": 1.9989636736467278e-05, "epoch": 0.8181818181818182, "percentage": 11.69, "elapsed_time": "0:15:16", "remaining_time": "1:55:24"}
|
28 |
+
{"current_steps": 28, "total_steps": 231, "loss": 0.5277, "lr": 1.9981578895764272e-05, "epoch": 0.8484848484848485, "percentage": 12.12, "elapsed_time": "0:15:46", "remaining_time": "1:54:24"}
|
29 |
+
{"current_steps": 29, "total_steps": 231, "loss": 0.53, "lr": 1.9971221996878395e-05, "epoch": 0.8787878787878788, "percentage": 12.55, "elapsed_time": "0:16:19", "remaining_time": "1:53:43"}
|
30 |
+
{"current_steps": 30, "total_steps": 231, "loss": 0.5373, "lr": 1.9958568425315316e-05, "epoch": 0.9090909090909091, "percentage": 12.99, "elapsed_time": "0:16:52", "remaining_time": "1:53:04"}
|
31 |
+
{"current_steps": 31, "total_steps": 231, "loss": 0.5237, "lr": 1.9943621095573588e-05, "epoch": 0.9393939393939394, "percentage": 13.42, "elapsed_time": "0:17:26", "remaining_time": "1:52:29"}
|
32 |
+
{"current_steps": 32, "total_steps": 231, "loss": 0.5194, "lr": 1.9926383450473344e-05, "epoch": 0.9696969696969697, "percentage": 13.85, "elapsed_time": "0:17:59", "remaining_time": "1:51:53"}
|
33 |
+
{"current_steps": 33, "total_steps": 231, "loss": 0.5332, "lr": 1.9906859460363307e-05, "epoch": 1.0, "percentage": 14.29, "elapsed_time": "0:18:33", "remaining_time": "1:51:20"}
|
34 |
+
{"current_steps": 34, "total_steps": 231, "loss": 0.5013, "lr": 1.9885053622206305e-05, "epoch": 1.0303030303030303, "percentage": 14.72, "elapsed_time": "0:21:47", "remaining_time": "2:06:16"}
|
35 |
+
{"current_steps": 35, "total_steps": 231, "loss": 0.519, "lr": 1.986097095854347e-05, "epoch": 1.0606060606060606, "percentage": 15.15, "elapsed_time": "0:22:20", "remaining_time": "2:05:07"}
|
36 |
+
{"current_steps": 36, "total_steps": 231, "loss": 0.489, "lr": 1.9834617016337424e-05, "epoch": 1.0909090909090908, "percentage": 15.58, "elapsed_time": "0:22:53", "remaining_time": "2:03:58"}
|
37 |
+
{"current_steps": 37, "total_steps": 231, "loss": 0.4708, "lr": 1.9805997865694616e-05, "epoch": 1.121212121212121, "percentage": 16.02, "elapsed_time": "0:23:25", "remaining_time": "2:02:48"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 7288
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9359b4147a6ad2943d5c8acbf60972437aa28bc705e2e6d27ea15879e2bd0c80
|
3 |
size 7288
|