schonsense commited on
Commit
88aaa1b
·
verified ·
1 Parent(s): 0a50351

Upload 2 files

Browse files
Files changed (2) hide show
  1. Medvoria_A1_FULL.yaml +133 -0
  2. Medvoria_A2_HALF.yaml +133 -0
Medvoria_A1_FULL.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ slices:
2
+ - sources:
3
+ - model: Steelskull/L3.3-MS-Nevoria-70b
4
+ layer_range: [0,40]
5
+ - sources:
6
+ - model: Writer/Palmyra-Med-70B-32K
7
+ parameters:
8
+ scale:
9
+ - filter: self_attn.q_proj
10
+ value: .5
11
+ - filter: self_attn.k_proj
12
+ value: .5
13
+ - filter: self_attn.v_proj
14
+ value: 0
15
+ - filter: self_attn.o_proj
16
+ value: 0.
17
+ - filter: mlp.*_proj
18
+ value: .4
19
+ - filter: input_layernorm
20
+ value: 0
21
+ - filter: post_attention_layernorm
22
+ value: 0
23
+ - filter: embed_tokens
24
+ value: 0
25
+ - filter: model.norm
26
+ value: 0
27
+ - filter: lm_head
28
+ value: 0
29
+ - value: 0
30
+ layer_range: [39,40]
31
+ - sources:
32
+ - model: Steelskull/L3.3-MS-Nevoria-70b
33
+ layer_range: [40,45]
34
+ - sources:
35
+ - model: Writer/Palmyra-Med-70B-32K
36
+ parameters:
37
+ scale:
38
+ - filter: self_attn.q_proj
39
+ value: .5
40
+ - filter: self_attn.k_proj
41
+ value: .5
42
+ - filter: self_attn.v_proj
43
+ value: 0
44
+ - filter: self_attn.o_proj
45
+ value: 0.
46
+ - filter: mlp.*_proj
47
+ value: .4
48
+ - filter: input_layernorm
49
+ value: 0
50
+ - filter: post_attention_layernorm
51
+ value: 0
52
+ - filter: embed_tokens
53
+ value: 0
54
+ - filter: model.norm
55
+ value: 0
56
+ - filter: lm_head
57
+ value: 0
58
+ - value: 0
59
+
60
+ layer_range: [44,45]
61
+ - sources:
62
+ - model: Steelskull/L3.3-MS-Nevoria-70b
63
+ layer_range: [45,50]
64
+ - sources:
65
+ - model: Writer/Palmyra-Med-70B-32K
66
+ parameters:
67
+ scale:
68
+ - filter: self_attn.q_proj
69
+ value: .5
70
+ - filter: self_attn.k_proj
71
+ value: .5
72
+ - filter: self_attn.v_proj
73
+ value: 0
74
+ - filter: self_attn.o_proj
75
+ value: 0.
76
+ - filter: mlp.*_proj
77
+ value: .4
78
+ - filter: input_layernorm
79
+ value: 0
80
+ - filter: post_attention_layernorm
81
+ value: 0
82
+ - filter: embed_tokens
83
+ value: 0
84
+ - filter: model.norm
85
+ value: 0
86
+ - filter: lm_head
87
+ value: 0
88
+ - value: 0
89
+
90
+ layer_range: [49,50]
91
+ - sources:
92
+ - model: Steelskull/L3.3-MS-Nevoria-70b
93
+ layer_range: [50,55]
94
+ - sources:
95
+ - model: Writer/Palmyra-Med-70B-32K
96
+ parameters:
97
+ scale:
98
+ - filter: self_attn.q_proj
99
+ value: .5
100
+ - filter: self_attn.k_proj
101
+ value: .5
102
+ - filter: self_attn.v_proj
103
+ value: 0
104
+ - filter: self_attn.o_proj
105
+ value: 0.
106
+ - filter: mlp.*_proj
107
+ value: .4
108
+ - filter: input_layernorm
109
+ value: 0
110
+ - filter: post_attention_layernorm
111
+ value: 0
112
+ - filter: embed_tokens
113
+ value: 0
114
+ - filter: model.norm
115
+ value: 0
116
+ - filter: lm_head
117
+ value: 0
118
+ - value: 0
119
+
120
+ layer_range: [54,55]
121
+ - sources:
122
+ - model: Steelskull/L3.3-MS-Nevoria-70b
123
+ layer_range: [55,80]
124
+
125
+ merge_method: passthrough
126
+ base_model: Steelskull/L3.3-MS-Nevoria-70b
127
+ parameters:
128
+ normalize: false
129
+ dtype: float32
130
+ out_dtype: bfloat16
131
+ chat_template: llama3
132
+ tokenizer:
133
+ source: Steelskull/L3.3-MS-Nevoria-70b
Medvoria_A2_HALF.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ slices:
2
+ - sources:
3
+ - model: Steelskull/L3.3-MS-Nevoria-70b
4
+ layer_range: [0,40]
5
+ - sources:
6
+ - model: Writer/Palmyra-Med-70B-32K
7
+ parameters:
8
+ scale:
9
+ - filter: self_attn.q_proj
10
+ value: .5
11
+ - filter: self_attn.k_proj
12
+ value: .5
13
+ - filter: self_attn.v_proj
14
+ value: 0
15
+ - filter: self_attn.o_proj
16
+ value: 0.
17
+ - filter: mlp.*_proj
18
+ value: .4
19
+ - filter: input_layernorm
20
+ value: 0
21
+ - filter: post_attention_layernorm
22
+ value: 0
23
+ - filter: embed_tokens
24
+ value: 0
25
+ - filter: model.norm
26
+ value: 0
27
+ - filter: lm_head
28
+ value: 0
29
+ - value: 0
30
+ layer_range: [39,40]
31
+ - sources:
32
+ - model: Steelskull/L3.3-MS-Nevoria-70b
33
+ layer_range: [40,45]
34
+ - sources:
35
+ - model: Writer/Palmyra-Med-70B-32K
36
+ parameters:
37
+ scale:
38
+ - filter: self_attn.q_proj
39
+ value: .5
40
+ - filter: self_attn.k_proj
41
+ value: .5
42
+ - filter: self_attn.v_proj
43
+ value: 0
44
+ - filter: self_attn.o_proj
45
+ value: 0.
46
+ - filter: mlp.*_proj
47
+ value: .4
48
+ - filter: input_layernorm
49
+ value: 0
50
+ - filter: post_attention_layernorm
51
+ value: 0
52
+ - filter: embed_tokens
53
+ value: 0
54
+ - filter: model.norm
55
+ value: 0
56
+ - filter: lm_head
57
+ value: 0
58
+ - value: 0
59
+
60
+ layer_range: [44,45]
61
+ - sources:
62
+ - model: Steelskull/L3.3-MS-Nevoria-70b
63
+ layer_range: [45,50]
64
+ - sources:
65
+ - model: Writer/Palmyra-Med-70B-32K
66
+ parameters:
67
+ scale:
68
+ - filter: self_attn.q_proj
69
+ value: .5
70
+ - filter: self_attn.k_proj
71
+ value: .5
72
+ - filter: self_attn.v_proj
73
+ value: 0
74
+ - filter: self_attn.o_proj
75
+ value: 0.
76
+ - filter: mlp.*_proj
77
+ value: .4
78
+ - filter: input_layernorm
79
+ value: 0
80
+ - filter: post_attention_layernorm
81
+ value: 0
82
+ - filter: embed_tokens
83
+ value: 0
84
+ - filter: model.norm
85
+ value: 0
86
+ - filter: lm_head
87
+ value: 0
88
+ - value: 0
89
+
90
+ layer_range: [49,50]
91
+ - sources:
92
+ - model: Steelskull/L3.3-MS-Nevoria-70b
93
+ layer_range: [50,55]
94
+ - sources:
95
+ - model: Writer/Palmyra-Med-70B-32K
96
+ parameters:
97
+ scale:
98
+ - filter: self_attn.q_proj
99
+ value: .5
100
+ - filter: self_attn.k_proj
101
+ value: .5
102
+ - filter: self_attn.v_proj
103
+ value: 0
104
+ - filter: self_attn.o_proj
105
+ value: 0.
106
+ - filter: mlp.*_proj
107
+ value: .4
108
+ - filter: input_layernorm
109
+ value: 0
110
+ - filter: post_attention_layernorm
111
+ value: 0
112
+ - filter: embed_tokens
113
+ value: 0
114
+ - filter: model.norm
115
+ value: 0
116
+ - filter: lm_head
117
+ value: 0
118
+ - value: 0
119
+
120
+ layer_range: [54,55]
121
+ - sources:
122
+ - model: Steelskull/L3.3-MS-Nevoria-70b
123
+ layer_range: [55,80]
124
+
125
+ merge_method: passthrough
126
+ base_model: Steelskull/L3.3-MS-Nevoria-70b
127
+ parameters:
128
+ normalize: false
129
+ dtype: float32
130
+ out_dtype: bfloat16
131
+ chat_template: llama3
132
+ tokenizer:
133
+ source: Steelskull/L3.3-MS-Nevoria-70b