|
slices:
|
|
- sources:
|
|
- model: Steelskull/L3.3-MS-Nevoria-70b
|
|
layer_range: [0,40]
|
|
- sources:
|
|
- model: Writer/Palmyra-Med-70B-32K
|
|
parameters:
|
|
scale:
|
|
- filter: self_attn.q_proj
|
|
value: .5
|
|
- filter: self_attn.k_proj
|
|
value: .5
|
|
- filter: self_attn.v_proj
|
|
value: 0
|
|
- filter: self_attn.o_proj
|
|
value: 0.
|
|
- filter: mlp.*_proj
|
|
value: .4
|
|
- filter: input_layernorm
|
|
value: 0
|
|
- filter: post_attention_layernorm
|
|
value: 0
|
|
- filter: embed_tokens
|
|
value: 0
|
|
- filter: model.norm
|
|
value: 0
|
|
- filter: lm_head
|
|
value: 0
|
|
- value: 0
|
|
layer_range: [39,40]
|
|
- sources:
|
|
- model: Steelskull/L3.3-MS-Nevoria-70b
|
|
layer_range: [40,45]
|
|
- sources:
|
|
- model: Writer/Palmyra-Med-70B-32K
|
|
parameters:
|
|
scale:
|
|
- filter: self_attn.q_proj
|
|
value: .5
|
|
- filter: self_attn.k_proj
|
|
value: .5
|
|
- filter: self_attn.v_proj
|
|
value: 0
|
|
- filter: self_attn.o_proj
|
|
value: 0.
|
|
- filter: mlp.*_proj
|
|
value: .4
|
|
- filter: input_layernorm
|
|
value: 0
|
|
- filter: post_attention_layernorm
|
|
value: 0
|
|
- filter: embed_tokens
|
|
value: 0
|
|
- filter: model.norm
|
|
value: 0
|
|
- filter: lm_head
|
|
value: 0
|
|
- value: 0
|
|
|
|
layer_range: [44,45]
|
|
- sources:
|
|
- model: Steelskull/L3.3-MS-Nevoria-70b
|
|
layer_range: [45,50]
|
|
- sources:
|
|
- model: Writer/Palmyra-Med-70B-32K
|
|
parameters:
|
|
scale:
|
|
- filter: self_attn.q_proj
|
|
value: .5
|
|
- filter: self_attn.k_proj
|
|
value: .5
|
|
- filter: self_attn.v_proj
|
|
value: 0
|
|
- filter: self_attn.o_proj
|
|
value: 0.
|
|
- filter: mlp.*_proj
|
|
value: .4
|
|
- filter: input_layernorm
|
|
value: 0
|
|
- filter: post_attention_layernorm
|
|
value: 0
|
|
- filter: embed_tokens
|
|
value: 0
|
|
- filter: model.norm
|
|
value: 0
|
|
- filter: lm_head
|
|
value: 0
|
|
- value: 0
|
|
|
|
layer_range: [49,50]
|
|
- sources:
|
|
- model: Steelskull/L3.3-MS-Nevoria-70b
|
|
layer_range: [50,55]
|
|
- sources:
|
|
- model: Writer/Palmyra-Med-70B-32K
|
|
parameters:
|
|
scale:
|
|
- filter: self_attn.q_proj
|
|
value: .5
|
|
- filter: self_attn.k_proj
|
|
value: .5
|
|
- filter: self_attn.v_proj
|
|
value: 0
|
|
- filter: self_attn.o_proj
|
|
value: 0.
|
|
- filter: mlp.*_proj
|
|
value: .4
|
|
- filter: input_layernorm
|
|
value: 0
|
|
- filter: post_attention_layernorm
|
|
value: 0
|
|
- filter: embed_tokens
|
|
value: 0
|
|
- filter: model.norm
|
|
value: 0
|
|
- filter: lm_head
|
|
value: 0
|
|
- value: 0
|
|
|
|
layer_range: [54,55]
|
|
- sources:
|
|
- model: Steelskull/L3.3-MS-Nevoria-70b
|
|
layer_range: [55,80]
|
|
|
|
merge_method: passthrough
|
|
base_model: Steelskull/L3.3-MS-Nevoria-70b
|
|
parameters:
|
|
normalize: false
|
|
dtype: float32
|
|
out_dtype: bfloat16
|
|
chat_template: llama3
|
|
tokenizer:
|
|
source: Steelskull/L3.3-MS-Nevoria-70b
|
|
|