Upload tiny random Llama-3.3 model (seed 42)
Browse files- README.md +7 -7
- config.json +6 -7
- model.safetensors +2 -2
README.md
CHANGED
@@ -16,16 +16,16 @@ This is a tiny random version of the EleutherAI/pythia-14m model, created for te
|
|
16 |
|
17 |
- **Base model**: EleutherAI/pythia-14m
|
18 |
- **Seed**: 42
|
19 |
-
- **Hidden size**:
|
20 |
-
- **Number of layers**:
|
21 |
-
- **Number of attention heads**:
|
22 |
-
- **Vocabulary size**:
|
23 |
-
- **Max position embeddings**:
|
24 |
|
25 |
## Parameters
|
26 |
|
27 |
-
- **Total parameters**: ~
|
28 |
-
- **Trainable parameters**: ~
|
29 |
|
30 |
## Usage
|
31 |
|
|
|
16 |
|
17 |
- **Base model**: EleutherAI/pythia-14m
|
18 |
- **Seed**: 42
|
19 |
+
- **Hidden size**: 128
|
20 |
+
- **Number of layers**: 6
|
21 |
+
- **Number of attention heads**: 4
|
22 |
+
- **Vocabulary size**: 50304
|
23 |
+
- **Max position embeddings**: 2048
|
24 |
|
25 |
## Parameters
|
26 |
|
27 |
+
- **Total parameters**: ~14,067,712
|
28 |
+
- **Trainable parameters**: ~14,067,712
|
29 |
|
30 |
## Usage
|
31 |
|
config.json
CHANGED
@@ -9,15 +9,14 @@
|
|
9 |
"eos_token_id": 0,
|
10 |
"hidden_act": "gelu",
|
11 |
"hidden_dropout": 0.0,
|
12 |
-
"hidden_size":
|
13 |
"initializer_range": 0.02,
|
14 |
-
"intermediate_size":
|
15 |
"layer_norm_eps": 1e-05,
|
16 |
-
"max_position_embeddings":
|
17 |
"model_type": "gpt_neox",
|
18 |
-
"num_attention_heads":
|
19 |
-
"num_hidden_layers":
|
20 |
-
"num_key_value_heads": 2,
|
21 |
"partial_rotary_factor": 0.25,
|
22 |
"rope_scaling": null,
|
23 |
"rope_theta": 10000,
|
@@ -28,5 +27,5 @@
|
|
28 |
"transformers_version": "4.54.0",
|
29 |
"use_cache": true,
|
30 |
"use_parallel_residual": true,
|
31 |
-
"vocab_size":
|
32 |
}
|
|
|
9 |
"eos_token_id": 0,
|
10 |
"hidden_act": "gelu",
|
11 |
"hidden_dropout": 0.0,
|
12 |
+
"hidden_size": 128,
|
13 |
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 512,
|
15 |
"layer_norm_eps": 1e-05,
|
16 |
+
"max_position_embeddings": 2048,
|
17 |
"model_type": "gpt_neox",
|
18 |
+
"num_attention_heads": 4,
|
19 |
+
"num_hidden_layers": 6,
|
|
|
20 |
"partial_rotary_factor": 0.25,
|
21 |
"rope_scaling": null,
|
22 |
"rope_theta": 10000,
|
|
|
27 |
"transformers_version": "4.54.0",
|
28 |
"use_cache": true,
|
29 |
"use_parallel_residual": true,
|
30 |
+
"vocab_size": 50304
|
31 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2fc323f05723792edad751ae5ed494357fd1fcd77dad5f8736b3007076df64b
|
3 |
+
size 56279344
|