skar0 commited on
Commit
febc7c3
·
verified ·
1 Parent(s): 589f8eb

Upload tiny random Llama-3.3 model (seed 42)

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: mit
3
- base_model: EleutherAI/pythia-14m
4
  tags:
5
  - tiny-model
6
  - random-weights
@@ -10,22 +10,22 @@ tags:
10
 
11
  # Llama-3.3-Tiny-Instruct
12
 
13
- This is a tiny random version of the EleutherAI/pythia-14m model, created for testing and experimentation purposes.
14
 
15
  ## Model Details
16
 
17
- - **Base model**: EleutherAI/pythia-14m
18
  - **Seed**: 42
19
- - **Hidden size**: 128
20
- - **Number of layers**: 6
21
- - **Number of attention heads**: 4
22
- - **Vocabulary size**: 50304
23
  - **Max position embeddings**: 2048
24
 
25
  ## Parameters
26
 
27
- - **Total parameters**: ~14,067,712
28
- - **Trainable parameters**: ~14,067,712
29
 
30
  ## Usage
31
 
 
1
  ---
2
  license: mit
3
+ base_model: JackFram/llama-68m
4
  tags:
5
  - tiny-model
6
  - random-weights
 
10
 
11
  # Llama-3.3-Tiny-Instruct
12
 
13
+ This is a tiny random version of the JackFram/llama-68m model, created for testing and experimentation purposes.
14
 
15
  ## Model Details
16
 
17
+ - **Base model**: JackFram/llama-68m
18
  - **Seed**: 42
19
+ - **Hidden size**: 768
20
+ - **Number of layers**: 2
21
+ - **Number of attention heads**: 12
22
+ - **Vocabulary size**: 32000
23
  - **Max position embeddings**: 2048
24
 
25
  ## Parameters
26
 
27
+ - **Total parameters**: ~68,030,208
28
+ - **Trainable parameters**: ~68,030,208
29
 
30
  ## Usage
31
 
config.json CHANGED
@@ -1,31 +1,30 @@
1
  {
2
  "architectures": [
3
- "GPTNeoXForCausalLM"
4
  ],
5
- "attention_bias": true,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
- "classifier_dropout": 0.1,
9
- "eos_token_id": 0,
10
- "hidden_act": "gelu",
11
- "hidden_dropout": 0.0,
12
- "hidden_size": 128,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 512,
15
- "layer_norm_eps": 1e-05,
16
  "max_position_embeddings": 2048,
17
- "model_type": "gpt_neox",
18
- "num_attention_heads": 4,
19
- "num_hidden_layers": 6,
20
- "partial_rotary_factor": 0.25,
 
 
 
 
21
  "rope_scaling": null,
22
- "rope_theta": 10000,
23
- "rotary_emb_base": 10000,
24
- "rotary_pct": 0.25,
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.54.0",
28
  "use_cache": true,
29
- "use_parallel_residual": true,
30
- "vocab_size": 50304
31
  }
 
1
  {
2
  "architectures": [
3
+ "LlamaForCausalLM"
4
  ],
5
+ "attention_bias": false,
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "head_dim": 64,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 768,
 
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
 
14
  "max_position_embeddings": 2048,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 2,
19
+ "num_key_value_heads": 12,
20
+ "pad_token_id": 1,
21
+ "pretraining_tp": 1,
22
+ "rms_norm_eps": 1e-06,
23
  "rope_scaling": null,
24
+ "rope_theta": 10000.0,
 
 
25
  "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.54.0",
28
  "use_cache": true,
29
+ "vocab_size": 32000
 
30
  }
generation_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
- "eos_token_id": 0,
 
5
  "transformers_version": "4.54.0"
6
  }
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
  "transformers_version": "4.54.0"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2fc323f05723792edad751ae5ed494357fd1fcd77dad5f8736b3007076df64b
3
- size 56279344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8529f6f74c806571a73ab301220bbe62adfa900bea773c553077fe6e1ea58230
3
+ size 272123144
special_tokens_map.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "bos_token": {
3
- "content": "<|endoftext|>",
4
  "lstrip": false,
5
- "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
- "content": "<|endoftext|>",
11
  "lstrip": false,
12
- "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "unk_token": {
17
- "content": "<|endoftext|>",
18
  "lstrip": false,
19
- "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  }
 
1
  {
2
  "bos_token": {
3
+ "content": "<s>",
4
  "lstrip": false,
5
+ "normalized": true,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
+ "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "unk_token": {
17
+ "content": "<unk>",
18
  "lstrip": false,
19
+ "normalized": true,
20
  "rstrip": false,
21
  "single_word": false
22
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4c0d99d84af59e9126913fafe5210822963e9a3065ee43e6833b358b0c2f825
3
- size 3564303
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a75511f846a980e08bc5874db204a56c740113450f1e37744e9bcafee84d785
3
+ size 3619013
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json CHANGED
@@ -1,215 +1,44 @@
1
  {
2
- "add_bos_token": false,
3
  "add_eos_token": false,
4
- "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
  "0": {
7
- "content": "<|endoftext|>",
8
  "lstrip": false,
9
- "normalized": false,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
13
  },
14
  "1": {
15
- "content": "<|padding|>",
16
- "lstrip": false,
17
- "normalized": false,
18
- "rstrip": false,
19
- "single_word": false,
20
- "special": true
21
- },
22
- "50254": {
23
- "content": " ",
24
  "lstrip": false,
25
  "normalized": true,
26
  "rstrip": false,
27
  "single_word": false,
28
- "special": false
29
- },
30
- "50255": {
31
- "content": " ",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false,
36
- "special": false
37
- },
38
- "50256": {
39
- "content": " ",
40
- "lstrip": false,
41
- "normalized": true,
42
- "rstrip": false,
43
- "single_word": false,
44
- "special": false
45
- },
46
- "50257": {
47
- "content": " ",
48
- "lstrip": false,
49
- "normalized": true,
50
- "rstrip": false,
51
- "single_word": false,
52
- "special": false
53
- },
54
- "50258": {
55
- "content": " ",
56
- "lstrip": false,
57
- "normalized": true,
58
- "rstrip": false,
59
- "single_word": false,
60
- "special": false
61
- },
62
- "50259": {
63
- "content": " ",
64
- "lstrip": false,
65
- "normalized": true,
66
- "rstrip": false,
67
- "single_word": false,
68
- "special": false
69
- },
70
- "50260": {
71
- "content": " ",
72
- "lstrip": false,
73
- "normalized": true,
74
- "rstrip": false,
75
- "single_word": false,
76
- "special": false
77
- },
78
- "50261": {
79
- "content": " ",
80
- "lstrip": false,
81
- "normalized": true,
82
- "rstrip": false,
83
- "single_word": false,
84
- "special": false
85
- },
86
- "50262": {
87
- "content": " ",
88
- "lstrip": false,
89
- "normalized": true,
90
- "rstrip": false,
91
- "single_word": false,
92
- "special": false
93
- },
94
- "50263": {
95
- "content": " ",
96
- "lstrip": false,
97
- "normalized": true,
98
- "rstrip": false,
99
- "single_word": false,
100
- "special": false
101
- },
102
- "50264": {
103
- "content": " ",
104
- "lstrip": false,
105
- "normalized": true,
106
- "rstrip": false,
107
- "single_word": false,
108
- "special": false
109
- },
110
- "50265": {
111
- "content": " ",
112
- "lstrip": false,
113
- "normalized": true,
114
- "rstrip": false,
115
- "single_word": false,
116
- "special": false
117
- },
118
- "50266": {
119
- "content": " ",
120
- "lstrip": false,
121
- "normalized": true,
122
- "rstrip": false,
123
- "single_word": false,
124
- "special": false
125
- },
126
- "50267": {
127
- "content": " ",
128
- "lstrip": false,
129
- "normalized": true,
130
- "rstrip": false,
131
- "single_word": false,
132
- "special": false
133
- },
134
- "50268": {
135
- "content": " ",
136
- "lstrip": false,
137
- "normalized": true,
138
- "rstrip": false,
139
- "single_word": false,
140
- "special": false
141
- },
142
- "50269": {
143
- "content": " ",
144
- "lstrip": false,
145
- "normalized": true,
146
- "rstrip": false,
147
- "single_word": false,
148
- "special": false
149
- },
150
- "50270": {
151
- "content": " ",
152
- "lstrip": false,
153
- "normalized": true,
154
- "rstrip": false,
155
- "single_word": false,
156
- "special": false
157
- },
158
- "50271": {
159
- "content": " ",
160
- "lstrip": false,
161
- "normalized": true,
162
- "rstrip": false,
163
- "single_word": false,
164
- "special": false
165
- },
166
- "50272": {
167
- "content": " ",
168
- "lstrip": false,
169
- "normalized": true,
170
- "rstrip": false,
171
- "single_word": false,
172
- "special": false
173
- },
174
- "50273": {
175
- "content": " ",
176
- "lstrip": false,
177
- "normalized": true,
178
- "rstrip": false,
179
- "single_word": false,
180
- "special": false
181
- },
182
- "50274": {
183
- "content": " ",
184
- "lstrip": false,
185
- "normalized": true,
186
- "rstrip": false,
187
- "single_word": false,
188
- "special": false
189
- },
190
- "50275": {
191
- "content": " ",
192
- "lstrip": false,
193
- "normalized": true,
194
- "rstrip": false,
195
- "single_word": false,
196
- "special": false
197
  },
198
- "50276": {
199
- "content": " ",
200
  "lstrip": false,
201
  "normalized": true,
202
  "rstrip": false,
203
  "single_word": false,
204
- "special": false
205
  }
206
  },
207
- "bos_token": "<|endoftext|>",
208
- "clean_up_tokenization_spaces": true,
209
- "eos_token": "<|endoftext|>",
210
  "extra_special_tokens": {},
 
211
  "model_max_length": 1000000000000000019884624838656,
212
  "pad_token": null,
213
- "tokenizer_class": "GPTNeoXTokenizer",
214
- "unk_token": "<|endoftext|>"
 
 
 
 
215
  }
 
1
  {
2
+ "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
+ "content": "<unk>",
8
  "lstrip": false,
9
+ "normalized": true,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
13
  },
14
  "1": {
15
+ "content": "<s>",
 
 
 
 
 
 
 
 
16
  "lstrip": false,
17
  "normalized": true,
18
  "rstrip": false,
19
  "single_word": false,
20
+ "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
+ "2": {
23
+ "content": "</s>",
24
  "lstrip": false,
25
  "normalized": true,
26
  "rstrip": false,
27
  "single_word": false,
28
+ "special": true
29
  }
30
  },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
  "extra_special_tokens": {},
35
+ "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
  "pad_token": null,
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false,
43
+ "use_fast": true
44
  }