base_model: Qwen-Qwen2.5-1.5B-Instruct models: - model: Qwen2.5-1.5B-gsm8k-grpo-full parameters: weight: 1.0 - model: Qwen2.5-1.5B-gsm8k-sft-full parameters: weight: -1.0 merge_method: task_arithmetic dtype: bfloat16