Upload MoLA-LM: Mixture of LoRA Adapters Language Model
Browse files- README.md +8 -8
- modeling_mola_lm.py +20 -2
README.md
CHANGED
@@ -12,18 +12,14 @@ language:
|
|
12 |
- en
|
13 |
pipeline_tag: text-generation
|
14 |
---
|
15 |
-
|
|
|
16 |
|
17 |
# MoLA-LM: Mixture of LoRA Adapters LLM
|
18 |
|
19 |
MoLA-LM combines multiple LoRA adapters with an intelligent router to automatically select the best adapter for each input prompt. This approach enables specialized performance across different tasks while maintaining efficiency.
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
**Important Note**: *The v0.5 had issues with the lora applying part of the custom lm class and its router was a bit too small with little generalization.
|
24 |
-
In v0.6 and future models, all of these issues are/will be resolved.*
|
25 |
-
|
26 |
-
**TLDR:** *Dont use v0.5, use v0.6 and above.*
|
27 |
|
28 |
## Model Details
|
29 |
|
@@ -36,6 +32,7 @@ In v0.6 and future models, all of these issues are/will be resolved.*
|
|
36 |
|
37 |
```python
|
38 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
39 |
# Load the model (trust_remote_code=True is required for custom architecture)
|
40 |
model = AutoModelForCausalLM.from_pretrained(
|
41 |
"MoLA-LLM/MoLA-v0.6-9x4b",
|
@@ -43,6 +40,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
43 |
device_map="auto"
|
44 |
)
|
45 |
tokenizer = AutoTokenizer.from_pretrained("MoLA-LLM/MoLA-v0.6-9x4b", trust_remote_code=True)
|
|
|
46 |
# Use like any other language model - adapter selection is automatic
|
47 |
prompt = "Write a Python function to calculate fibonacci numbers"
|
48 |
messages = [{"role": "user", "content": prompt}]
|
@@ -53,8 +51,10 @@ inputs = tokenizer.apply_chat_template(
|
|
53 |
return_dict=True,
|
54 |
return_tensors="pt",
|
55 |
).to(model.device)
|
|
|
56 |
outputs = model.generate(**inputs, max_new_tokens=8192, temperature=.6, do_sample=True)
|
57 |
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
|
|
|
58 |
print(f"Selected LoRA: {model.get_current_lora()}")
|
59 |
print(response)
|
60 |
```
|
@@ -65,7 +65,7 @@ print(response)
|
|
65 |
The MoLA-LM architecture consists of:
|
66 |
|
67 |
1. **Base Model**: Qwen/Qwen3-4B-Thinking-2507
|
68 |
-
2. **Router Network**: Frozen encoder as Sentence transformer + decoder as MLP for adapter selection
|
69 |
3. **LoRA Adapters**: 9 task-specific fine-tuned adapters
|
70 |
4. **Dynamic Switching**: Automatic adapter application based on input
|
71 |
|
|
|
12 |
- en
|
13 |
pipeline_tag: text-generation
|
14 |
---
|
15 |
+
|
16 |
+
Image here
|
17 |
|
18 |
# MoLA-LM: Mixture of LoRA Adapters LLM
|
19 |
|
20 |
MoLA-LM combines multiple LoRA adapters with an intelligent router to automatically select the best adapter for each input prompt. This approach enables specialized performance across different tasks while maintaining efficiency.
|
21 |
|
22 |
+
Evals are coming...
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
## Model Details
|
25 |
|
|
|
32 |
|
33 |
```python
|
34 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
35 |
+
|
36 |
# Load the model (trust_remote_code=True is required for custom architecture)
|
37 |
model = AutoModelForCausalLM.from_pretrained(
|
38 |
"MoLA-LLM/MoLA-v0.6-9x4b",
|
|
|
40 |
device_map="auto"
|
41 |
)
|
42 |
tokenizer = AutoTokenizer.from_pretrained("MoLA-LLM/MoLA-v0.6-9x4b", trust_remote_code=True)
|
43 |
+
|
44 |
# Use like any other language model - adapter selection is automatic
|
45 |
prompt = "Write a Python function to calculate fibonacci numbers"
|
46 |
messages = [{"role": "user", "content": prompt}]
|
|
|
51 |
return_dict=True,
|
52 |
return_tensors="pt",
|
53 |
).to(model.device)
|
54 |
+
|
55 |
outputs = model.generate(**inputs, max_new_tokens=8192, temperature=.6, do_sample=True)
|
56 |
response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
|
57 |
+
|
58 |
print(f"Selected LoRA: {model.get_current_lora()}")
|
59 |
print(response)
|
60 |
```
|
|
|
65 |
The MoLA-LM architecture consists of:
|
66 |
|
67 |
1. **Base Model**: Qwen/Qwen3-4B-Thinking-2507
|
68 |
+
2. **Router Network**: Frozen encoder as Sentence transformer + decoder as one layer MLP for adapter selection
|
69 |
3. **LoRA Adapters**: 9 task-specific fine-tuned adapters
|
70 |
4. **Dynamic Switching**: Automatic adapter application based on input
|
71 |
|
modeling_mola_lm.py
CHANGED
@@ -226,7 +226,16 @@ class MoLAForCausalLM(PreTrainedModel, GenerationMixin):
|
|
226 |
repo_id=self.model_path,
|
227 |
filename=f"loras/{first_adapter}/adapter_config.json"
|
228 |
)
|
229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
print(f"Downloaded first adapter to: {first_lora_path}")
|
231 |
except Exception as e:
|
232 |
raise Exception(f"Failed to download first adapter {first_adapter}: {e}")
|
@@ -261,7 +270,16 @@ class MoLAForCausalLM(PreTrainedModel, GenerationMixin):
|
|
261 |
repo_id=self.model_path,
|
262 |
filename=f"loras/{task_name}/adapter_config.json"
|
263 |
)
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
265 |
except Exception as e:
|
266 |
print(f"❌ Failed to download LoRA {task_name}: {e}")
|
267 |
continue
|
|
|
226 |
repo_id=self.model_path,
|
227 |
filename=f"loras/{first_adapter}/adapter_config.json"
|
228 |
)
|
229 |
+
|
230 |
+
# Create a temporary directory with both files for PEFT
|
231 |
+
temp_dir = tempfile.mkdtemp()
|
232 |
+
first_lora_path = os.path.join(temp_dir, first_adapter)
|
233 |
+
os.makedirs(first_lora_path, exist_ok=True)
|
234 |
+
|
235 |
+
# Copy both files to the same directory
|
236 |
+
shutil.copy2(adapter_weights_file, os.path.join(first_lora_path, "adapter_model.safetensors"))
|
237 |
+
shutil.copy2(adapter_config_file, os.path.join(first_lora_path, "adapter_config.json"))
|
238 |
+
|
239 |
print(f"Downloaded first adapter to: {first_lora_path}")
|
240 |
except Exception as e:
|
241 |
raise Exception(f"Failed to download first adapter {first_adapter}: {e}")
|
|
|
270 |
repo_id=self.model_path,
|
271 |
filename=f"loras/{task_name}/adapter_config.json"
|
272 |
)
|
273 |
+
|
274 |
+
# Create a temporary directory with both files for PEFT
|
275 |
+
temp_dir = tempfile.mkdtemp()
|
276 |
+
lora_path = os.path.join(temp_dir, task_name)
|
277 |
+
os.makedirs(lora_path, exist_ok=True)
|
278 |
+
|
279 |
+
# Copy both files to the same directory
|
280 |
+
shutil.copy2(adapter_weights_file, os.path.join(lora_path, "adapter_model.safetensors"))
|
281 |
+
shutil.copy2(adapter_config_file, os.path.join(lora_path, "adapter_config.json"))
|
282 |
+
|
283 |
except Exception as e:
|
284 |
print(f"❌ Failed to download LoRA {task_name}: {e}")
|
285 |
continue
|