AtAndDev commited on
Commit
a319f1f
·
verified ·
1 Parent(s): 0ec5688

Upload MoLA-LM: Mixture of LoRA Adapters Language Model

Browse files
Files changed (2) hide show
  1. README.md +7 -8
  2. modeling_mola_lm.py +56 -83
README.md CHANGED
@@ -13,18 +13,13 @@ language:
13
  pipeline_tag: text-generation
14
  ---
15
 
16
- ![image/png](https://cdn-uploads.huggingface.co/production/uploads/630f3e4002ce39336c411048/3gVVmArsXVoogpkXvsBs7.png)
17
 
18
  # MoLA-LM: Mixture of LoRA Adapters LLM
19
 
20
  MoLA-LM combines multiple LoRA adapters with an intelligent router to automatically select the best adapter for each input prompt. This approach enables specialized performance across different tasks while maintaining efficiency.
21
 
22
- [**Click for evals**](https://github.com/alkinun/MoLA/blob/main/README.md)
23
-
24
- **Important Note**: *The v0.5 had issues with the lora applying part of the custom lm class and its router was a bit too small with little generalization.
25
- In v0.6 and future models, all of these issues are/will be resolved.*
26
-
27
- **TLDR:** *Dont use v0.5, use v0.6 and above.*
28
 
29
  ## Model Details
30
 
@@ -37,6 +32,7 @@ In v0.6 and future models, all of these issues are/will be resolved.*
37
 
38
  ```python
39
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
40
  # Load the model (trust_remote_code=True is required for custom architecture)
41
  model = AutoModelForCausalLM.from_pretrained(
42
  "MoLA-LLM/MoLA-v0.6-9x4b",
@@ -44,6 +40,7 @@ model = AutoModelForCausalLM.from_pretrained(
44
  device_map="auto"
45
  )
46
  tokenizer = AutoTokenizer.from_pretrained("MoLA-LLM/MoLA-v0.6-9x4b", trust_remote_code=True)
 
47
  # Use like any other language model - adapter selection is automatic
48
  prompt = "Write a Python function to calculate fibonacci numbers"
49
  messages = [{"role": "user", "content": prompt}]
@@ -54,8 +51,10 @@ inputs = tokenizer.apply_chat_template(
54
  return_dict=True,
55
  return_tensors="pt",
56
  ).to(model.device)
 
57
  outputs = model.generate(**inputs, max_new_tokens=8192, temperature=.6, do_sample=True)
58
  response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
 
59
  print(f"Selected LoRA: {model.get_current_lora()}")
60
  print(response)
61
  ```
@@ -66,7 +65,7 @@ print(response)
66
  The MoLA-LM architecture consists of:
67
 
68
  1. **Base Model**: Qwen/Qwen3-4B-Thinking-2507
69
- 2. **Router Network**: Frozen encoder as Sentence transformer + decoder as MLP for adapter selection
70
  3. **LoRA Adapters**: 9 task-specific fine-tuned adapters
71
  4. **Dynamic Switching**: Automatic adapter application based on input
72
 
 
13
  pipeline_tag: text-generation
14
  ---
15
 
16
+ Image here
17
 
18
  # MoLA-LM: Mixture of LoRA Adapters LLM
19
 
20
  MoLA-LM combines multiple LoRA adapters with an intelligent router to automatically select the best adapter for each input prompt. This approach enables specialized performance across different tasks while maintaining efficiency.
21
 
22
+ Evals are coming...
 
 
 
 
 
23
 
24
  ## Model Details
25
 
 
32
 
33
  ```python
34
  from transformers import AutoModelForCausalLM, AutoTokenizer
35
+
36
  # Load the model (trust_remote_code=True is required for custom architecture)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  "MoLA-LLM/MoLA-v0.6-9x4b",
 
40
  device_map="auto"
41
  )
42
  tokenizer = AutoTokenizer.from_pretrained("MoLA-LLM/MoLA-v0.6-9x4b", trust_remote_code=True)
43
+
44
  # Use like any other language model - adapter selection is automatic
45
  prompt = "Write a Python function to calculate fibonacci numbers"
46
  messages = [{"role": "user", "content": prompt}]
 
51
  return_dict=True,
52
  return_tensors="pt",
53
  ).to(model.device)
54
+
55
  outputs = model.generate(**inputs, max_new_tokens=8192, temperature=.6, do_sample=True)
56
  response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True)
57
+
58
  print(f"Selected LoRA: {model.get_current_lora()}")
59
  print(response)
60
  ```
 
65
  The MoLA-LM architecture consists of:
66
 
67
  1. **Base Model**: Qwen/Qwen3-4B-Thinking-2507
68
+ 2. **Router Network**: Frozen encoder as Sentence transformer + decoder as one layer MLP for adapter selection
69
  3. **LoRA Adapters**: 9 task-specific fine-tuned adapters
70
  4. **Dynamic Switching**: Automatic adapter application based on input
71
 
modeling_mola_lm.py CHANGED
@@ -194,64 +194,29 @@ class MoLAForCausalLM(PreTrainedModel, GenerationMixin):
194
  raise ImportError(f"Required dependencies not found: {e}")
195
 
196
  def _load_lora_adapters(self):
197
- """Load LoRA adapters using PEFT (single wrapper, multiple adapters)."""
198
- from huggingface_hub import hf_hub_download
199
 
200
  if not self.model_path:
201
  print("No model path specified, skipping LoRA loading")
202
  return
203
-
204
- print("Loading LoRA adapters (single wrapper)...")
205
-
206
- # Get the first adapter to create the initial PEFT wrapper
207
- first_adapter = str(self.config.task_labels[0])
208
- first_lora_path = None
209
 
 
210
  try:
211
- # Handle both local and Hub paths for first adapter
212
- if os.path.exists(self.model_path):
213
- # Local path
214
- first_lora_path = os.path.join(self.model_path, "loras", first_adapter)
215
- if not os.path.exists(first_lora_path):
216
- raise FileNotFoundError(f"First adapter directory not found: {first_lora_path}")
217
- else:
218
- # Hub path - download first adapter
219
- try:
220
- # Download both required files for first adapter
221
- adapter_weights_file = hf_hub_download(
222
- repo_id=self.model_path,
223
- filename=f"loras/{first_adapter}/adapter_model.safetensors"
224
- )
225
- adapter_config_file = hf_hub_download(
226
- repo_id=self.model_path,
227
- filename=f"loras/{first_adapter}/adapter_config.json"
228
- )
229
-
230
- # Create a temporary directory with both files for PEFT
231
- temp_dir = tempfile.mkdtemp()
232
- first_lora_path = os.path.join(temp_dir, first_adapter)
233
- os.makedirs(first_lora_path, exist_ok=True)
234
-
235
- # Copy both files to the same directory
236
- shutil.copy2(adapter_weights_file, os.path.join(first_lora_path, "adapter_model.safetensors"))
237
- shutil.copy2(adapter_config_file, os.path.join(first_lora_path, "adapter_config.json"))
238
-
239
- print(f"Downloaded first adapter to: {first_lora_path}")
240
- except Exception as e:
241
- raise Exception(f"Failed to download first adapter {first_adapter}: {e}")
242
 
243
- # Create the initial PEFT wrapper WITHOUT specifying adapter_name to use default
244
- peft_model = PeftModel.from_pretrained(
245
- self.mola_model,
246
- first_lora_path,
247
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
248
- )
249
- print(f"✅ Loaded first LoRA: {first_adapter} (as default)")
250
 
251
- # Load remaining adapters into the same wrapper with unique names
252
- for task_name in self.config.task_labels[1:]:
 
 
253
  try:
254
- lora_path = None
255
 
256
  if os.path.exists(self.model_path):
257
  # Local path
@@ -260,45 +225,53 @@ class MoLAForCausalLM(PreTrainedModel, GenerationMixin):
260
  print(f"⚠️ LoRA directory not found: {lora_path}")
261
  continue
262
  else:
263
- # Hub path - download adapter
264
- try:
265
- adapter_weights_file = hf_hub_download(
266
- repo_id=self.model_path,
267
- filename=f"loras/{task_name}/adapter_model.safetensors"
268
- )
269
- adapter_config_file = hf_hub_download(
270
- repo_id=self.model_path,
271
- filename=f"loras/{task_name}/adapter_config.json"
272
- )
273
-
274
- # Create a temporary directory with both files for PEFT
275
- temp_dir = tempfile.mkdtemp()
276
- lora_path = os.path.join(temp_dir, task_name)
277
- os.makedirs(lora_path, exist_ok=True)
278
-
279
- # Copy both files to the same directory
280
- shutil.copy2(adapter_weights_file, os.path.join(lora_path, "adapter_model.safetensors"))
281
- shutil.copy2(adapter_config_file, os.path.join(lora_path, "adapter_config.json"))
282
-
283
- except Exception as e:
284
- print(f"❌ Failed to download LoRA {task_name}: {e}")
285
- continue
 
 
 
 
 
 
286
 
287
- # Load adapter into the same PEFT model with unique name
288
- peft_model.load_adapter(lora_path, adapter_name=task_name)
289
- print(f"✅ Loaded LoRA: {task_name}")
290
 
291
  except Exception as e:
292
  print(f"❌ Failed to load LoRA {task_name}: {e}")
 
293
 
294
- # Store single PEFT model for all adapters
295
- self.lora_models = {str(name): peft_model for name in self.config.task_labels}
296
- self._current_lora = first_adapter
297
- self._current_adapted_model = peft_model
298
-
299
- print(f"Loaded {len(self.config.task_labels)} LoRA adapters into one PEFT model.")
300
- print(f"Available adapter names: {list(peft_model.peft_config.keys())}")
301
-
 
 
 
302
  except Exception as e:
303
  print(f"❌ Failed to initialize LoRA loading: {e}")
304
  self.lora_models = {}
 
194
  raise ImportError(f"Required dependencies not found: {e}")
195
 
196
  def _load_lora_adapters(self):
197
+ """Load LoRA adapters using PEFT - simplified approach."""
198
+ print("Loading LoRA adapters...")
199
 
200
  if not self.model_path:
201
  print("No model path specified, skipping LoRA loading")
202
  return
 
 
 
 
 
 
203
 
204
+ # Simple approach: try to load each LoRA directly from Hub using PEFT's built-in capabilities
205
  try:
206
+ from huggingface_hub import hf_hub_download
207
+ import tempfile
208
+ import shutil
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ # Create a working directory for all LoRAs
211
+ work_dir = tempfile.mkdtemp(prefix="mola_loras_")
212
+ print(f"Working directory: {work_dir}")
 
 
 
 
213
 
214
+ peft_model = None
215
+ loaded_adapters = []
216
+
217
+ for i, task_name in enumerate(self.config.task_labels):
218
  try:
219
+ print(f"Loading LoRA {task_name}...")
220
 
221
  if os.path.exists(self.model_path):
222
  # Local path
 
225
  print(f"⚠️ LoRA directory not found: {lora_path}")
226
  continue
227
  else:
228
+ # Hub path - create proper structure
229
+ lora_path = os.path.join(work_dir, task_name)
230
+ os.makedirs(lora_path, exist_ok=True)
231
+
232
+ # Download files
233
+ weights_file = hf_hub_download(
234
+ repo_id=self.model_path,
235
+ filename=f"loras/{task_name}/adapter_model.safetensors"
236
+ )
237
+ config_file = hf_hub_download(
238
+ repo_id=self.model_path,
239
+ filename=f"loras/{task_name}/adapter_config.json"
240
+ )
241
+
242
+ # Copy to working directory
243
+ shutil.copy2(weights_file, os.path.join(lora_path, "adapter_model.safetensors"))
244
+ shutil.copy2(config_file, os.path.join(lora_path, "adapter_config.json"))
245
+
246
+ # Load the first adapter as base, others as additional
247
+ if i == 0:
248
+ peft_model = PeftModel.from_pretrained(
249
+ self.mola_model,
250
+ lora_path,
251
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
252
+ )
253
+ print(f"✅ Loaded base LoRA: {task_name}")
254
+ else:
255
+ peft_model.load_adapter(lora_path, adapter_name=task_name)
256
+ print(f"✅ Loaded additional LoRA: {task_name}")
257
 
258
+ loaded_adapters.append(task_name)
 
 
259
 
260
  except Exception as e:
261
  print(f"❌ Failed to load LoRA {task_name}: {e}")
262
+ continue
263
 
264
+ if peft_model and loaded_adapters:
265
+ # Store the PEFT model
266
+ self.lora_models = {name: peft_model for name in loaded_adapters}
267
+ self._current_adapted_model = peft_model
268
+ self._current_lora = loaded_adapters[0]
269
+
270
+ print(f" Successfully loaded {len(loaded_adapters)} LoRA adapters")
271
+ print(f"Available adapters: {loaded_adapters}")
272
+ else:
273
+ raise Exception("No LoRA adapters could be loaded")
274
+
275
  except Exception as e:
276
  print(f"❌ Failed to initialize LoRA loading: {e}")
277
  self.lora_models = {}