skar0 commited on
Commit
589f8eb
·
verified ·
1 Parent(s): 32a6826

Upload tiny random Llama-3.3 model (seed 42)

Browse files
Files changed (3) hide show
  1. README.md +7 -7
  2. config.json +6 -7
  3. model.safetensors +2 -2
README.md CHANGED
@@ -16,16 +16,16 @@ This is a tiny random version of the EleutherAI/pythia-14m model, created for te
16
 
17
  - **Base model**: EleutherAI/pythia-14m
18
  - **Seed**: 42
19
- - **Hidden size**: 64
20
- - **Number of layers**: 2
21
- - **Number of attention heads**: 2
22
- - **Vocabulary size**: 1000
23
- - **Max position embeddings**: 512
24
 
25
  ## Parameters
26
 
27
- - **Total parameters**: ~195,072
28
- - **Trainable parameters**: ~195,072
29
 
30
  ## Usage
31
 
 
16
 
17
  - **Base model**: EleutherAI/pythia-14m
18
  - **Seed**: 42
19
+ - **Hidden size**: 128
20
+ - **Number of layers**: 6
21
+ - **Number of attention heads**: 4
22
+ - **Vocabulary size**: 50304
23
+ - **Max position embeddings**: 2048
24
 
25
  ## Parameters
26
 
27
+ - **Total parameters**: ~14,067,712
28
+ - **Trainable parameters**: ~14,067,712
29
 
30
  ## Usage
31
 
config.json CHANGED
@@ -9,15 +9,14 @@
9
  "eos_token_id": 0,
10
  "hidden_act": "gelu",
11
  "hidden_dropout": 0.0,
12
- "hidden_size": 64,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 128,
15
  "layer_norm_eps": 1e-05,
16
- "max_position_embeddings": 512,
17
  "model_type": "gpt_neox",
18
- "num_attention_heads": 2,
19
- "num_hidden_layers": 2,
20
- "num_key_value_heads": 2,
21
  "partial_rotary_factor": 0.25,
22
  "rope_scaling": null,
23
  "rope_theta": 10000,
@@ -28,5 +27,5 @@
28
  "transformers_version": "4.54.0",
29
  "use_cache": true,
30
  "use_parallel_residual": true,
31
- "vocab_size": 1000
32
  }
 
9
  "eos_token_id": 0,
10
  "hidden_act": "gelu",
11
  "hidden_dropout": 0.0,
12
+ "hidden_size": 128,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
  "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 2048,
17
  "model_type": "gpt_neox",
18
+ "num_attention_heads": 4,
19
+ "num_hidden_layers": 6,
 
20
  "partial_rotary_factor": 0.25,
21
  "rope_scaling": null,
22
  "rope_theta": 10000,
 
27
  "transformers_version": "4.54.0",
28
  "use_cache": true,
29
  "use_parallel_residual": true,
30
+ "vocab_size": 50304
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58e8409da8ee6ad38c51e0c68718a4dd271ebaf25472b9ddc014acc952361db5
3
- size 783264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2fc323f05723792edad751ae5ed494357fd1fcd77dad5f8736b3007076df64b
3
+ size 56279344