Ctrl+K
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-contra0.1
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-contra0.5
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-old0.1contrareward
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-old0.5contrareward
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-old1.0contrareward
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-ref0.1contrareward
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-ref0.5contrareward
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-ref1.0contrareward
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-theta0.01contraloss
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0-theta0.0contraloss
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-rougeL-t1.0
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-t1.0-contra0.0
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-t1.0-contra0.1
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-t1.0-contra0.5
- matheasy-r1-grpo-qwen2.5-3b-em-warm0.05-t1.0
- matheasy-r1-grpo-qwen2.5-3b-em-warmup-0.05-rouge-rougeL-t1.0-contra0.1
- matheasy-r1-grpo-qwen2.5-3b-em-warmup-0.05-rouge-rougeL-t1.0-contra0.5
- matheasy-r1-grpo-qwen2.5-3b-em-warmup-0.05-rouge-rougeL-t1.0
- matheasy-r1-mutualpo0.1-qwen2.5-3b-em-warm0.05-t1.0-old0.0reward
- matheasy-r1-mutualpo0.1-qwen2.5-3b-em-warm0.05-t1.0-ref0.0reward
- matheasy-r1-mutualpo0.5-qwen2.5-3b-em-warm0.05-t1.0-ref0.0reward
- matheasy-r1-mutualpo1.0-qwen2.5-3b-em-warm0.05-t1.0-old0.0reward
- matheasy-r1-mutualpo1.0-qwen2.5-3b-em-warm0.05-t1.0-ref0.0reward
- matheasy-r1-mutualpo10.0-qwen2.5-3b-em-warm0.05-t1.0-old0.0reward
- matheasy-r1-mutualpo10.0-qwen2.5-3b-em-warm0.05-t1.0-ref0.0reward
- matheasy-r1-mutualpo2.0-qwen2.5-3b-em-warm0.05-t1.0-old0.0reward
- matheasy-r1-mutualpo2.0-qwen2.5-3b-em-warm0.05-t1.0-ref0.0reward
- matheasy-r1-mutualpo5.0-qwen2.5-3b-em-warm0.05-t1.0-old0.0reward
- matheasy-r1-mutualpo5.0-qwen2.5-3b-em-warm0.05-t1.0-ref0.0reward
- mathhard2-grpo-qwen2.5-3b-kl0.0-4k
- mathhard2-grpo-qwen2.5-3b
- mathhard2-grpo-qwen2.5-math-1.5b
- mathhard2-mutualpo0.0-qwen2.5-3b-old0.0reward-token_id-4k
- mathhard2-mutualpo0.1-qwen2.5-3b-old0.0reward-token_id-4k
- mathhard2-mutualpo0.2-qwen2.5-3b-old0.0reward-token_id-4k
- mathhard2-mutualpo0.5-qwen2.5-3b-old0.0reward-token_id-4k
- mathhard2-mutualpos0.1-qwen2.5-3b-ref0.0reward-token_id-4k
- mathhard2-mutualpos0.2-qwen2.5-3b-old0.0reward-token_id-4k
- mathhard2-mutualpos0.2-qwen2.5-3b-ref0.0reward-token_id-4k
- mathhard2-mutualpos0.2-qwen2.5-3b
- mathhard2-mutualposclip0.1-qwen2.5-3b-old0.0reward-token_id-4k
- mathhard2-mutualposclip0.1-qwen2.5-3b-ref0.0reward-token_id-4k
-
20.8 kB