File size: 853 Bytes
9e6552e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
models:
  - model: deepseek-ai/DeepSeek-R1-Distill-Llama-70B
    parameters:
      select_topk:
        - filter: self_attn
          value: 0.2
        - filter: "q_proj|k_proj|v_proj"
          value: 0.2
        - filter: "up_proj|down_proj"
          value: 0.2
        - filter: mlp
          value: 0.1
        - value: 0.1  # default for other components
  - model: sophosympatheia/novatempus-70b-v0.1
    parameters:
      select_topk:
        - filter: self_attn
          value: 0.1
        - filter: "q_proj|k_proj|v_proj"
          value: 0.1
        - filter: "up_proj|down_proj"
          value: 0.1
        - filter: mlp
          value: 0.2
        - value: 0.1  # default for other components
merge_method: sce
base_model: meta-llama/Llama-3.3-70B-Instruct
dtype: bfloat16
tokenizer:
  source: deepseek-ai/DeepSeek-R1-Distill-Llama-70B