acecoder-fsdp_agent-mimo-7b-base-grpo-n16-b128-t1.0-lr1e-6-69k-mtrl-sys9-new2-debug-120-step
/
configuration_mimo.py
from transformers.models.qwen2.configuration_qwen2 import Qwen2Config | |
class MiMoConfig(Qwen2Config): | |
model_type = "mimo" | |
def __init__( | |
self, | |
*args, | |
num_nextn_predict_layers=0, | |
**kwargs | |
): | |
self.num_nextn_predict_layers = num_nextn_predict_layers | |
super().__init__( | |
*args, | |
**kwargs, | |
) | |