skar0 commited on
Commit
3dbe449
·
verified ·
1 Parent(s): ada7afa

Upload tiny random Llama-3.3 model (seed 42)

Browse files
Files changed (3) hide show
  1. README.md +5 -5
  2. config.json +5 -5
  3. model.safetensors +2 -2
README.md CHANGED
@@ -16,16 +16,16 @@ This is a tiny random version of the meta-llama/Llama-3.3-70B-Instruct model, cr
16
 
17
  - **Base model**: meta-llama/Llama-3.3-70B-Instruct
18
  - **Seed**: 42
19
- - **Hidden size**: 768
20
  - **Number of layers**: 12
21
- - **Number of attention heads**: 12
22
  - **Vocabulary size**: 128256
23
- - **Max position embeddings**: 2048
24
 
25
  ## Parameters
26
 
27
- - **Total parameters**: ~272,517,888
28
- - **Trainable parameters**: ~272,517,888
29
 
30
  ## Usage
31
 
 
16
 
17
  - **Base model**: meta-llama/Llama-3.3-70B-Instruct
18
  - **Seed**: 42
19
+ - **Hidden size**: 256
20
  - **Number of layers**: 12
21
+ - **Number of attention heads**: 4
22
  - **Vocabulary size**: 128256
23
+ - **Max position embeddings**: 131072
24
 
25
  ## Parameters
26
 
27
+ - **Total parameters**: ~75,110,656
28
+ - **Trainable parameters**: ~75,110,656
29
 
30
  ## Usage
31
 
config.json CHANGED
@@ -12,15 +12,15 @@
12
  ],
13
  "head_dim": 64,
14
  "hidden_act": "silu",
15
- "hidden_size": 768,
16
  "initializer_range": 0.02,
17
- "intermediate_size": 2048,
18
- "max_position_embeddings": 2048,
19
  "mlp_bias": false,
20
  "model_type": "llama",
21
- "num_attention_heads": 12,
22
  "num_hidden_layers": 12,
23
- "num_key_value_heads": 4,
24
  "pretraining_tp": 1,
25
  "rms_norm_eps": 1e-05,
26
  "rope_scaling": {
 
12
  ],
13
  "head_dim": 64,
14
  "hidden_act": "silu",
15
+ "hidden_size": 256,
16
  "initializer_range": 0.02,
17
+ "intermediate_size": 768,
18
+ "max_position_embeddings": 131072,
19
  "mlp_bias": false,
20
  "model_type": "llama",
21
+ "num_attention_heads": 4,
22
  "num_hidden_layers": 12,
23
+ "num_key_value_heads": 2,
24
  "pretraining_tp": 1,
25
  "rms_norm_eps": 1e-05,
26
  "rope_scaling": {
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51111d7052986501f4f8976ed86e6c838a273ddea1d9d8ce12d92d05c4d75c80
3
- size 1090083944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90be80471e3b86e6f1aaea7c053977f9aa0f720669ca9635b5f97bba661a0f2c
3
+ size 300454912