Upload folder using huggingface_hub
Browse files- README.md +1 -1
- checkpoint-12/model-00001-of-00003.safetensors +1 -1
- checkpoint-12/model-00002-of-00003.safetensors +1 -1
- checkpoint-12/model-00003-of-00003.safetensors +1 -1
- checkpoint-12/trainer_state.json +4 -4
- checkpoint-12/training_args.bin +1 -1
- checkpoint-16/model-00001-of-00003.safetensors +1 -1
- checkpoint-16/model-00002-of-00003.safetensors +1 -1
- checkpoint-16/model-00003-of-00003.safetensors +1 -1
- checkpoint-16/trainer_state.json +4 -4
- checkpoint-16/training_args.bin +1 -1
- checkpoint-20/model-00001-of-00003.safetensors +1 -1
- checkpoint-20/model-00002-of-00003.safetensors +1 -1
- checkpoint-20/model-00003-of-00003.safetensors +1 -1
- checkpoint-20/trainer_state.json +7 -7
- checkpoint-20/training_args.bin +1 -1
- checkpoint-24/model-00001-of-00003.safetensors +1 -1
- checkpoint-24/model-00002-of-00003.safetensors +1 -1
- checkpoint-24/model-00003-of-00003.safetensors +1 -1
- checkpoint-24/trainer_state.json +7 -7
- checkpoint-24/training_args.bin +1 -1
- checkpoint-4/model-00001-of-00003.safetensors +1 -1
- checkpoint-4/model-00002-of-00003.safetensors +1 -1
- checkpoint-4/model-00003-of-00003.safetensors +1 -1
- checkpoint-4/trainer_state.json +1 -1
- checkpoint-4/training_args.bin +1 -1
- checkpoint-8/model-00001-of-00003.safetensors +1 -1
- checkpoint-8/model-00002-of-00003.safetensors +1 -1
- checkpoint-8/model-00003-of-00003.safetensors +1 -1
- checkpoint-8/trainer_state.json +1 -1
- checkpoint-8/training_args.bin +1 -1
README.md
CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chenwu/huggingface/runs/
|
31 |
|
32 |
|
33 |
This model was trained with SFT.
|
|
|
27 |
|
28 |
## Training procedure
|
29 |
|
30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chenwu/huggingface/runs/78qqbyni)
|
31 |
|
32 |
|
33 |
This model was trained with SFT.
|
checkpoint-12/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4987202208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45e7e16e3f94be5c66c07c8d95bb2a328726b9e444939504687e430bdc26765f
|
3 |
size 4987202208
|
checkpoint-12/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4980945440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27e9b55f8602ff6780c1fe176f1bcb084a68b3af5bbdffbb75635fefdf131da7
|
3 |
size 4980945440
|
checkpoint-12/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3852615520
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62d2ead9d2342e8d73051d16f455e149ea9f0e328cd56b73e55bffd38d85cfd5
|
3 |
size 3852615520
|
checkpoint-12/trainer_state.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
-
"grad_norm": 15.
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
@@ -20,10 +20,10 @@
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
-
"grad_norm": 0.
|
24 |
"learning_rate": 0.0001,
|
25 |
-
"loss": 0.
|
26 |
-
"mean_token_accuracy": 0.
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
}
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
+
"grad_norm": 15.946330863810116,
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
+
"grad_norm": 0.42994698419563937,
|
24 |
"learning_rate": 0.0001,
|
25 |
+
"loss": 0.3108,
|
26 |
+
"mean_token_accuracy": 0.9019484138279631,
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
}
|
checkpoint-12/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15f41cb084a1787473827f98e9a8a3c9e4bcc235e71ad54358c0b732c2faa36f
|
3 |
size 8017
|
checkpoint-16/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4987202208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:425dd39d8a4fe9a73322873d6115a4c4698241c83d7e09bc7a5cc607c3047251
|
3 |
size 4987202208
|
checkpoint-16/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4980945440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63b7641ed96e94ed22def923ab3a3aafcd70009051d029bf5f5aba4e1cbac5f4
|
3 |
size 4980945440
|
checkpoint-16/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3852615520
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:922807eae22661146a4b62d38e61b405875f5f796a76ef92b0bcd27fdd0f8a68
|
3 |
size 3852615520
|
checkpoint-16/trainer_state.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
-
"grad_norm": 15.
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
@@ -20,10 +20,10 @@
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
-
"grad_norm": 0.
|
24 |
"learning_rate": 0.0001,
|
25 |
-
"loss": 0.
|
26 |
-
"mean_token_accuracy": 0.
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
}
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
+
"grad_norm": 15.946330863810116,
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
+
"grad_norm": 0.42994698419563937,
|
24 |
"learning_rate": 0.0001,
|
25 |
+
"loss": 0.3108,
|
26 |
+
"mean_token_accuracy": 0.9019484138279631,
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
}
|
checkpoint-16/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15f41cb084a1787473827f98e9a8a3c9e4bcc235e71ad54358c0b732c2faa36f
|
3 |
size 8017
|
checkpoint-20/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4987202208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e87647c139332a98369a7e5e2c0ad3377d5c8d965321a79b175d68a311e38d6
|
3 |
size 4987202208
|
checkpoint-20/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4980945440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c0f7b510e683b5d3bfcf78e1ac8750e1102fb57a3f246729d8e664042ea667d
|
3 |
size 4980945440
|
checkpoint-20/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3852615520
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5eb25114b529a557aaf9f1348f3d70065a2de39f4803a4e5ed7a757674d7e3cf
|
3 |
size 3852615520
|
checkpoint-20/trainer_state.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
-
"grad_norm": 15.
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
@@ -20,19 +20,19 @@
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
-
"grad_norm": 0.
|
24 |
"learning_rate": 0.0001,
|
25 |
-
"loss": 0.
|
26 |
-
"mean_token_accuracy": 0.
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
},
|
30 |
{
|
31 |
"epoch": 5.0,
|
32 |
-
"grad_norm": 0.
|
33 |
"learning_rate": 0.0001,
|
34 |
-
"loss": 0.
|
35 |
-
"mean_token_accuracy": 0.
|
36 |
"num_tokens": 4708046.0,
|
37 |
"step": 20
|
38 |
}
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
+
"grad_norm": 15.946330863810116,
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
+
"grad_norm": 0.42994698419563937,
|
24 |
"learning_rate": 0.0001,
|
25 |
+
"loss": 0.3108,
|
26 |
+
"mean_token_accuracy": 0.9019484138279631,
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
},
|
30 |
{
|
31 |
"epoch": 5.0,
|
32 |
+
"grad_norm": 0.7870531010460464,
|
33 |
"learning_rate": 0.0001,
|
34 |
+
"loss": 0.142,
|
35 |
+
"mean_token_accuracy": 0.9460842391718989,
|
36 |
"num_tokens": 4708046.0,
|
37 |
"step": 20
|
38 |
}
|
checkpoint-20/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15f41cb084a1787473827f98e9a8a3c9e4bcc235e71ad54358c0b732c2faa36f
|
3 |
size 8017
|
checkpoint-24/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4987202208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9832ee851609fec9a7137c1b6ac00bed0a1467c0e422e1cb72598e96c7245b72
|
3 |
size 4987202208
|
checkpoint-24/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4980945440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7abba12b333f3bd44f43890c9f2474af8f55e913b21e914929d77937cd513b30
|
3 |
size 4980945440
|
checkpoint-24/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3852615520
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:712c7e50e7f615c0127f911103b424860642e47232ae3df392153b04ec73e100
|
3 |
size 3852615520
|
checkpoint-24/trainer_state.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
-
"grad_norm": 15.
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
@@ -20,19 +20,19 @@
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
-
"grad_norm": 0.
|
24 |
"learning_rate": 0.0001,
|
25 |
-
"loss": 0.
|
26 |
-
"mean_token_accuracy": 0.
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
},
|
30 |
{
|
31 |
"epoch": 5.0,
|
32 |
-
"grad_norm": 0.
|
33 |
"learning_rate": 0.0001,
|
34 |
-
"loss": 0.
|
35 |
-
"mean_token_accuracy": 0.
|
36 |
"num_tokens": 4708046.0,
|
37 |
"step": 20
|
38 |
}
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
+
"grad_norm": 15.946330863810116,
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
|
|
20 |
},
|
21 |
{
|
22 |
"epoch": 2.6530612244897958,
|
23 |
+
"grad_norm": 0.42994698419563937,
|
24 |
"learning_rate": 0.0001,
|
25 |
+
"loss": 0.3108,
|
26 |
+
"mean_token_accuracy": 0.9019484138279631,
|
27 |
"num_tokens": 2495915.0,
|
28 |
"step": 10
|
29 |
},
|
30 |
{
|
31 |
"epoch": 5.0,
|
32 |
+
"grad_norm": 0.7870531010460464,
|
33 |
"learning_rate": 0.0001,
|
34 |
+
"loss": 0.142,
|
35 |
+
"mean_token_accuracy": 0.9460842391718989,
|
36 |
"num_tokens": 4708046.0,
|
37 |
"step": 20
|
38 |
}
|
checkpoint-24/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15f41cb084a1787473827f98e9a8a3c9e4bcc235e71ad54358c0b732c2faa36f
|
3 |
size 8017
|
checkpoint-4/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4987202208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33419a5df28c03757db3956e079ca93868945c856ac822f9d9f9dd993895f7f5
|
3 |
size 4987202208
|
checkpoint-4/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4980945440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ed8f9c46df31522f2c14f49f5cfaf34a089e3997b2e32be1c2aa99b406e2ef3
|
3 |
size 4980945440
|
checkpoint-4/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3852615520
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:948530506d504d6426f308efca66fbff78ba94eb9d3c5bb90057e8c99dbf1353
|
3 |
size 3852615520
|
checkpoint-4/trainer_state.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
-
"grad_norm": 15.
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
+
"grad_norm": 15.946330863810116,
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
checkpoint-4/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15f41cb084a1787473827f98e9a8a3c9e4bcc235e71ad54358c0b732c2faa36f
|
3 |
size 8017
|
checkpoint-8/model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4987202208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bec2c7c9ef3eb3e3054bc55a856dff7b1f903640679847a0fced5e717b1c080
|
3 |
size 4987202208
|
checkpoint-8/model-00002-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4980945440
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b230c3e61eb30f85cc9ccdb527bdf73f40b7f96203f27cb1abc8a67b43dcec56
|
3 |
size 4980945440
|
checkpoint-8/model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3852615520
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b9a88f8c37cdb0b970a82fa82ca17919c72c004b6ab3f4287a81ef1b962bf55
|
3 |
size 3852615520
|
checkpoint-8/trainer_state.json
CHANGED
@@ -11,7 +11,7 @@
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
-
"grad_norm": 15.
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
|
|
11 |
"log_history": [
|
12 |
{
|
13 |
"epoch": 0.32653061224489793,
|
14 |
+
"grad_norm": 15.946330863810116,
|
15 |
"learning_rate": 0.0001,
|
16 |
"loss": 0.4052,
|
17 |
"mean_token_accuracy": 0.9151133690029383,
|
checkpoint-8/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 8017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15f41cb084a1787473827f98e9a8a3c9e4bcc235e71ad54358c0b732c2faa36f
|
3 |
size 8017
|