Add checkpoint
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint_metadata.json +9 -0
- config.yaml +144 -0
- model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors +3 -0
- model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors +3 -0
- model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors +3 -0
checkpoint_metadata.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dp": 64,
|
3 |
+
"metas": {
|
4 |
+
"consumed_train_samples": 11776000,
|
5 |
+
"last_train_step": 11500
|
6 |
+
},
|
7 |
+
"tp": 1,
|
8 |
+
"version": "1.2"
|
9 |
+
}
|
config.yaml
ADDED
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
checkpoints:
|
2 |
+
checkpoint_interval: 500
|
3 |
+
checkpoints_path: /scratch/craffel/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
|
4 |
+
checkpoints_path_is_shared_file_system: false
|
5 |
+
resume_checkpoint_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
|
6 |
+
save_initial_state: true
|
7 |
+
data:
|
8 |
+
dataset:
|
9 |
+
dataloader_type: single
|
10 |
+
dataset_max_tokens: null
|
11 |
+
dataset_weights: null
|
12 |
+
datasets:
|
13 |
+
- bits_per_token: 16
|
14 |
+
filename_pattern: .*\.ds$
|
15 |
+
folder: /scratch/dataset/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-/
|
16 |
+
original_folder: null
|
17 |
+
seed: 6
|
18 |
+
shuffle: true
|
19 |
+
skip_tokens: 0
|
20 |
+
pad_samples_to_global_batch_size: false
|
21 |
+
skip_in_stream: true
|
22 |
+
num_loading_workers: 0
|
23 |
+
seed: 6
|
24 |
+
experiment_logger:
|
25 |
+
tensorboard_logger:
|
26 |
+
push_to_hub_interval: 300
|
27 |
+
repo_id: craffel/commav0p1-ablations
|
28 |
+
repo_public: false
|
29 |
+
tensorboard_dir: /scratch/craffel/tensorboard-craffel-commav0p1-ablations
|
30 |
+
wandb_logger: null
|
31 |
+
general:
|
32 |
+
benchmark_csv_path: null
|
33 |
+
consumed_train_samples: 11776000
|
34 |
+
ignore_sanity_checks: true
|
35 |
+
project: commav0p1-ablations
|
36 |
+
run: commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
|
37 |
+
seed: 42
|
38 |
+
step: 11500
|
39 |
+
kill_switch_path: null
|
40 |
+
lighteval:
|
41 |
+
batch_size: 16
|
42 |
+
checkpoints_path: null
|
43 |
+
generation: null
|
44 |
+
logging:
|
45 |
+
hub_repo_details: null
|
46 |
+
hub_repo_results: null
|
47 |
+
hub_repo_tensorboard: craffel/commav0p1-ablations
|
48 |
+
local_output_path: /scratch/craffel/lighteval/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
|
49 |
+
push_details_to_hub: false
|
50 |
+
push_results_to_hub: false
|
51 |
+
push_results_to_tensorboard: true
|
52 |
+
tensorboard_metric_prefix: e
|
53 |
+
parallelism:
|
54 |
+
dp: 8
|
55 |
+
expert_parallel_size: 1
|
56 |
+
pp: 1
|
57 |
+
pp_engine: 1f1b
|
58 |
+
tp: 1
|
59 |
+
tp_linear_async_communication: false
|
60 |
+
tp_mode: ALL_REDUCE
|
61 |
+
slurm_script_dir: /fsx/craffel/train/eval-scripts
|
62 |
+
slurm_template: /fsx/craffel/run_eval.slurm.jinja
|
63 |
+
tasks:
|
64 |
+
custom_tasks: brrr.lighteval.evaluation_tasks
|
65 |
+
dataset_loading_processes: 8
|
66 |
+
max_samples: 1000
|
67 |
+
multichoice_continuations_start_space: null
|
68 |
+
no_multichoice_continuations_start_space: null
|
69 |
+
num_fewshot_seeds: null
|
70 |
+
tasks: early-signal
|
71 |
+
wandb: null
|
72 |
+
logging:
|
73 |
+
iteration_step_info_interval: 1
|
74 |
+
log_level: info
|
75 |
+
log_level_replica: info
|
76 |
+
model:
|
77 |
+
ddp_bucket_cap_mb: 25
|
78 |
+
dtype: bfloat16
|
79 |
+
init_method:
|
80 |
+
std: 0.02
|
81 |
+
make_vocab_size_divisible_by: 1
|
82 |
+
model_config:
|
83 |
+
bos_token_id: 1
|
84 |
+
eos_token_id: 2
|
85 |
+
hidden_act: silu
|
86 |
+
hidden_size: 2048
|
87 |
+
initializer_range: 0.02
|
88 |
+
intermediate_size: 8192
|
89 |
+
is_llama_config: true
|
90 |
+
max_position_embeddings: 2048
|
91 |
+
num_attention_heads: 32
|
92 |
+
num_hidden_layers: 24
|
93 |
+
num_key_value_heads: 32
|
94 |
+
pad_token_id: null
|
95 |
+
pretraining_tp: 1
|
96 |
+
rms_norm_eps: 1.0e-05
|
97 |
+
rope_scaling: null
|
98 |
+
tie_word_embeddings: true
|
99 |
+
use_cache: true
|
100 |
+
vocab_size: 50272
|
101 |
+
optimizer:
|
102 |
+
accumulate_grad_in_fp32: true
|
103 |
+
adam_beta1: 0.9
|
104 |
+
adam_beta2: 0.95
|
105 |
+
adam_eps: 1.0e-08
|
106 |
+
clip_grad: 1.0
|
107 |
+
learning_rate_scheduler:
|
108 |
+
learning_rate: 0.0003
|
109 |
+
lr_decay_starting_step: null
|
110 |
+
lr_decay_steps: null
|
111 |
+
lr_decay_style: cosine
|
112 |
+
lr_warmup_steps: 500
|
113 |
+
lr_warmup_style: linear
|
114 |
+
min_decay_lr: 3.0e-05
|
115 |
+
torch_adam_is_fused: true
|
116 |
+
weight_decay: 0.1
|
117 |
+
zero_stage: 0
|
118 |
+
parallelism:
|
119 |
+
dp: 64
|
120 |
+
expert_parallel_size: 1
|
121 |
+
pp: 1
|
122 |
+
pp_engine: 1f1b
|
123 |
+
tp: 1
|
124 |
+
tp_linear_async_communication: true
|
125 |
+
tp_mode: REDUCE_SCATTER
|
126 |
+
profiler: null
|
127 |
+
s3_upload:
|
128 |
+
remove_after_upload: true
|
129 |
+
s5cmd_concurrency: 5
|
130 |
+
s5cmd_numworkers: 16
|
131 |
+
s5cmd_path: /fsx/craffel/miniconda3/envs/exp/bin/s5cmd
|
132 |
+
upload_s3_path: s3://comma-v0.1-ablations/checkpoints/commav0p1-ablations-1p82G-commonpile0p1filteredrebalanceddespaced-seed-6-
|
133 |
+
tokenizer:
|
134 |
+
tokenizer_max_length: null
|
135 |
+
tokenizer_name_or_path: gpt2
|
136 |
+
tokenizer_revision: null
|
137 |
+
tokens:
|
138 |
+
batch_accumulation_per_replica: 4
|
139 |
+
limit_test_batches: 0
|
140 |
+
limit_val_batches: 0
|
141 |
+
micro_batch_size: 4
|
142 |
+
sequence_length: 2048
|
143 |
+
train_steps: 166893
|
144 |
+
val_check_interval: 100
|
model/model/decoder/0/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f595f843328e4dbfbfe4bb03702ee99aa945848aa992cdb9f381cafb7752f226
|
3 |
+
size 8388848
|
model/model/decoder/0/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62c6865cb7f19fa47c0357a78a30379fccd92cf9c6029fad264952cfc34b901d
|
3 |
+
size 25166176
|
model/model/decoder/0/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23e2b0d83742b9f670fbb0987fafda40a18ab097160a765307e9c760f29a3ebb
|
3 |
+
size 4192
|
model/model/decoder/0/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:547c3af525a293f758643eb51858e5ed85beacaca4403eb7f3c46d2cb1af180f
|
3 |
+
size 33554672
|
model/model/decoder/0/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ce4ca52dcd754a5b7be9853ab60d55b54b6745005176872cc978765112bed7b
|
3 |
+
size 67109160
|
model/model/decoder/0/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f789059786919335721746905b2fed6983b972cd976adfc65a0b63b5af9e3f9
|
3 |
+
size 4192
|
model/model/decoder/1/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:857b31f40d13617a9afa0119ad2493bb9581252de91d95a0b0cc38baa3c2ed5b
|
3 |
+
size 8388848
|
model/model/decoder/1/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16ea0dbd8bf6a1b0d8268884e18fe51f2d2e008c7a25237e19cf7366cbd9851d
|
3 |
+
size 25166176
|
model/model/decoder/1/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09208cc005d4079fdaf9eb5f60027316757fcd2e952f2b8119f1fbf62e9a9ef7
|
3 |
+
size 4192
|
model/model/decoder/1/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a079c60b58cc02e979f530490d1244f4c12d2cdb575b62734583f4186cb1d04
|
3 |
+
size 33554672
|
model/model/decoder/1/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:345cdcd95a92037a84cdada460c1b549763495770e58e7d17f1ba05f88cc9d3f
|
3 |
+
size 67109160
|
model/model/decoder/1/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e73698f3aab361c9f71a5cdb6e6d20b3c9a264d26e367946d701cb5a59ba943
|
3 |
+
size 4192
|
model/model/decoder/10/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9c786e1cdf095dead8d577e450a916ee186f094c18810b176637309fbf528a6
|
3 |
+
size 8388848
|
model/model/decoder/10/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd249d1d16f0e112e0fbe42f465d2e0f839671fcb3cfc01d300a084e4b5def83
|
3 |
+
size 25166176
|
model/model/decoder/10/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c7511e6536720d0ab495e3f6e9e014228a2b0992b9d905fb261339845cf623c
|
3 |
+
size 4192
|
model/model/decoder/10/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0381c3a4bebbb052d2e94a03bfaf77b366773a504da76bbc76e5fca3a1ca29cc
|
3 |
+
size 33554672
|
model/model/decoder/10/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3543891896f068c6067808c320ff65ddccc851d01fd82ebb6b7ca258e8f6cce
|
3 |
+
size 67109160
|
model/model/decoder/10/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4d35ec64afc5c9c35640ad05a9ed653f1b726d38ccee8bcd1382fb16ec03de8
|
3 |
+
size 4192
|
model/model/decoder/11/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac80ad0b0858a3cd628fbe411bf8e25eac602a5cb143983aa25c540ddfb7471f
|
3 |
+
size 8388848
|
model/model/decoder/11/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31adda5f22fa301b272589f5077b9ef4c72997e3726a6cc365bd0a1f8a75c17f
|
3 |
+
size 25166176
|
model/model/decoder/11/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cda020b7d376de3e3318e90773d43d07e65a4f0bb25dedb5f7e7f373001912d2
|
3 |
+
size 4192
|
model/model/decoder/11/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:03bb08bc15845088fb71bf35e0fbd47e047a846110a5dd2b38d374fa6de2e9b3
|
3 |
+
size 33554672
|
model/model/decoder/11/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:657d742c73746fb8be4ee69826641f6d2dc4a682428dcf72b5316d21d74323f6
|
3 |
+
size 67109160
|
model/model/decoder/11/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:909e56fdf401265785d845168cb0dc200c7af771d0f57b0c7357fa3ca199afed
|
3 |
+
size 4192
|
model/model/decoder/12/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:330d7d6710fff1c67078bfd47e3e66fd4c57edcbc00d9cfc387213869849b8d7
|
3 |
+
size 8388848
|
model/model/decoder/12/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:244a293c15e9f62d6602091d955d8e0ba835abbe59be6b8a99bc959e87073ac3
|
3 |
+
size 25166176
|
model/model/decoder/12/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:741db7520d4105a068fe664b8c685938160d02c019a0f3f539f235b0f8afa9dd
|
3 |
+
size 4192
|
model/model/decoder/12/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4854fff099477f5a89b9a6f90388dc72291970623c6c50430702faa52ae54b81
|
3 |
+
size 33554672
|
model/model/decoder/12/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4da6220b7c9da37be4d0fd11d215bd60fedfef20133ba07309a896b731ebfd38
|
3 |
+
size 67109160
|
model/model/decoder/12/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec19650183c6723bafec772e7753ff82b854f0ba504473cd26d38f314d007286
|
3 |
+
size 4192
|
model/model/decoder/13/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebe2c38452c95c4fd7513eaf391126fc2ba8eb8ec4d3977b9630a2869caf8f07
|
3 |
+
size 8388848
|
model/model/decoder/13/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69df2cf77996f34b6818b40abfe827e9abf3d13c33c145b4168b5410401afcdc
|
3 |
+
size 25166176
|
model/model/decoder/13/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e16bb44d659cbfc89bb76143adc90f3d002ad6d2e433f06ce1fb3b536358a568
|
3 |
+
size 4192
|
model/model/decoder/13/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c5e2d80309962ddab74c797b5fe38d96bab3517e537f8e3137ac9076b52f579
|
3 |
+
size 33554672
|
model/model/decoder/13/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1662d032158c41c5b3580253f32b3895f33e06dbd49be3e1fdcc606531134f92
|
3 |
+
size 67109160
|
model/model/decoder/13/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4c157081b4232b3688029601d8a3701da500c452ccde7e109a8aefc4f099e53
|
3 |
+
size 4192
|
model/model/decoder/14/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d0d2f5b186b8a6eb7db79a68d917233d4dc30c84505361d6afe35998d84b89f
|
3 |
+
size 8388848
|
model/model/decoder/14/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4742ccc61006f1bfb2876219f5df3b42c1a5346053b8b0332dd9ec1f812626fe
|
3 |
+
size 25166176
|
model/model/decoder/14/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2eec94d8153c9550c0726821b4ee11077639f2ced03cd4082a6ed94c2389cc6b
|
3 |
+
size 4192
|
model/model/decoder/14/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c224ac691fb31507663bbeee7b32e969c8e836e6571ae249c01e6240151f2ce
|
3 |
+
size 33554672
|
model/model/decoder/14/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d4deba2b45b7e1f162c31d962a618bf39cfedc64e6179cb025afe220e1c3d48
|
3 |
+
size 67109160
|
model/model/decoder/14/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13bfbc176224d4f4b850d74682fb1dc5436ee08cdda021cc829a272a2314fd1f
|
3 |
+
size 4192
|
model/model/decoder/15/pp_block/attn/o_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebed027c57964e1f8dc1e5fb82fd52304b0a3de85a7ef68eb4c269ee81c1941c
|
3 |
+
size 8388848
|
model/model/decoder/15/pp_block/attn/qkv_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ccebed5a572e93346a2f96b2b78ea94ae20a57956b13b7cac4c40eb10a9f660
|
3 |
+
size 25166176
|
model/model/decoder/15/pp_block/input_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9005570002611a550a2f2230e8173bde314413699e302d6280fde4ec93e8976
|
3 |
+
size 4192
|
model/model/decoder/15/pp_block/mlp/down_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b6ab383f8bb91a95cb60375b40d9fd89ad0447a6026c99a89bf669f23397b6a
|
3 |
+
size 33554672
|
model/model/decoder/15/pp_block/mlp/gate_up_proj/model_weight_pp-rank-0-of-1_tp-rank-0-of-1.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48b52d3a8c390c6bc92708c94a111e7ea456b8a650d2807e50cadf6f72605f5c
|
3 |
+
size 67109160
|
model/model/decoder/15/pp_block/post_attention_layernorm/model_weight.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9667691612c019d12bc11b3fb9707122937aaa75a66f6ebd67fd6e3a411032ea
|
3 |
+
size 4192
|