base_model: Qwen-Qwen2.5-1.5B-Instruct | |
models: | |
- model: Qwen2.5-1.5B-gsm8k-grpo-full | |
parameters: | |
weight: 1.0 | |
- model: Qwen2.5-1.5B-gsm8k-sft-full | |
parameters: | |
weight: -1.0 | |
merge_method: task_arithmetic | |
dtype: bfloat16 |
base_model: Qwen-Qwen2.5-1.5B-Instruct | |
models: | |
- model: Qwen2.5-1.5B-gsm8k-grpo-full | |
parameters: | |
weight: 1.0 | |
- model: Qwen2.5-1.5B-gsm8k-sft-full | |
parameters: | |
weight: -1.0 | |
merge_method: task_arithmetic | |
dtype: bfloat16 |