wisent-ai
/

qwen2.5-coder-7b-wisent-caa

@@ -76,8 +76,8 @@ pip install -r requirements.txt
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Load model - CAA steering is automatically applied!
-model = AutoModelForCausalLM.from_pretrained("./huggingface_qwen_generated", trust_remote_code=True)
-tokenizer = AutoTokenizer.from_pretrained("./huggingface_qwen_generated")
 # Generate code
 prompt = "Write a Python function to calculate the factorial of a number"
@@ -123,7 +123,7 @@ The model uses a trait-based organization for steering vectors:
 ```
 vectors/
-├── coding/         # Current: Optimized for code generation
 ├── safety/         # Future: Safety-aligned behavior
 ├── creativity/     # Future: Enhanced creative outputs
 ├── helpfulness/    # Future: Improved helpfulness
@@ -147,7 +147,7 @@ To switch traits, simply update the configuration:
 - **Steering Strength (α)**: 0.9
 - **Vector Format**: Safetensors format for efficient loading and HuggingFace compatibility
 - **Vector Dimension**: 3584 (pre-normalized during training)
-- **Storage Path**: `./vectors/coding/steering_vector.safetensors`
 ### How It Works
@@ -172,21 +172,21 @@ The CAA parameters were optimized using:
 WisentQwen2ForCausalLM
 ├── Base: Qwen2.5-Coder-7B-Instruct
 ├── CAA Integration: Layer 24
-├── Steering Vector: ./vectors/coding/steering_vector.safetensors
 └── Auto-applied during generation
 ```
 ## File Structure
 ```
-huggingface_qwen_generated/
 ├── config.json                    # Model configuration with CAA params
 ├── modeling_wisent_qwen.py        # Custom model class
 ├── tokenizer files               # Standard Qwen tokenizer
 ├── wisent_config.json            # Optimization results
 └── vectors/                       # Trait-based steering vectors
-    └── coding/
-        └── steering_vector.safetensors  # Optimized coding steering vector
 ```
 ## Evaluation
@@ -202,7 +202,7 @@ The model should be evaluated on the complete MBPP Plus dataset (378 problems) t
 from transformers import AutoModelForCausalLM
 model = AutoModelForCausalLM.from_pretrained(
-    "./huggingface_qwen_generated",
     trust_remote_code=True
 )

 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Load model - CAA steering is automatically applied!
+model = AutoModelForCausalLM.from_pretrained("./huggingface_qwen25-7b-coder-caa", trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained("./huggingface_qwen25-7b-coder-caa")
 # Generate code
 prompt = "Write a Python function to calculate the factorial of a number"
 ```
 vectors/
+├── mbpp_plus/      # Current: Optimized for MBPP Plus benchmark
 ├── safety/         # Future: Safety-aligned behavior
 ├── creativity/     # Future: Enhanced creative outputs
 ├── helpfulness/    # Future: Improved helpfulness
 - **Steering Strength (α)**: 0.9
 - **Vector Format**: Safetensors format for efficient loading and HuggingFace compatibility
 - **Vector Dimension**: 3584 (pre-normalized during training)
+- **Storage Path**: `./vectors/mbpp_plus/steering_vector.safetensors`
 ### How It Works
 WisentQwen2ForCausalLM
 ├── Base: Qwen2.5-Coder-7B-Instruct
 ├── CAA Integration: Layer 24
+├── Steering Vector: ./vectors/mbpp_plus/steering_vector.safetensors
 └── Auto-applied during generation
 ```
 ## File Structure
 ```
+huggingface_qwen25-7b-coder-caa/
 ├── config.json                    # Model configuration with CAA params
 ├── modeling_wisent_qwen.py        # Custom model class
 ├── tokenizer files               # Standard Qwen tokenizer
 ├── wisent_config.json            # Optimization results
 └── vectors/                       # Trait-based steering vectors
+    └── mbpp_plus/
+        └── steering_vector.safetensors  # MBPP Plus optimized steering vector
 ```
 ## Evaluation
 from transformers import AutoModelForCausalLM
 model = AutoModelForCausalLM.from_pretrained(
+    "./huggingface_qwen25-7b-coder-caa",
     trust_remote_code=True
 )

config.json CHANGED Viewed

@@ -123,5 +123,5 @@
     "timestamp": "20250818_221712",
     "commit_hash": "a2181df6155f0d5d20170f307b61d10e74d31889"
   },
-  "steering_vector_path": "./vectors/coding/steering_vector.safetensors"
 }

     "timestamp": "20250818_221712",
     "commit_hash": "a2181df6155f0d5d20170f307b61d10e74d31889"
   },
+  "steering_vector_path": "./vectors/mbpp_plus/steering_vector.safetensors"
 }

modeling_wisent_qwen.py CHANGED Viewed

@@ -15,9 +15,9 @@ from transformers.cache_utils import Cache
 class WisentQwen2Config(Qwen2Config):
     """Extended Qwen2 configuration with CAA steering parameters."""
     model_type = "wisent_qwen2"
     def __init__(
         self,
         caa_enabled: bool = True,
@@ -25,7 +25,7 @@ class WisentQwen2Config(Qwen2Config):
         caa_alpha: float = 0.9,
         steering_vector_path: str = "./vectors/coding/steering_vector.safetensors",
         steering_method: str = "caa",
-        **kwargs
     ):
         super().__init__(**kwargs)
         self.caa_enabled = caa_enabled
@@ -38,33 +38,34 @@ class WisentQwen2Config(Qwen2Config):
 class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
     """
     Qwen2 model with integrated CAA steering for improved code generation.
     This model automatically applies Contrastive Activation Addition (CAA) steering
     during the forward pass, eliminating the need for manual hook management.
     """
     config_class = WisentQwen2Config
     def __init__(self, config: WisentQwen2Config):
         super().__init__(config)
         # CAA steering parameters
         self.caa_enabled = config.caa_enabled
         self.caa_layer_id = config.caa_layer_id
         self.caa_alpha = config.caa_alpha
         self.steering_method = config.steering_method
         # Load steering vector from file
         self.steering_vector = None
         if self.caa_enabled:
             self._load_steering_vector_from_file(config.steering_vector_path)
         # Hook handle for cleanup
         self._steering_hook_handle = None
     def _load_steering_vector_from_file(self, path: str):
         """Load the CAA steering vector from safetensors or pytorch file."""
         import os
         try:
             # Try relative path first
             if os.path.exists(path):
@@ -76,77 +77,82 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
                 print(f"Warning: Steering vector not found at {path}, CAA disabled")
                 self.caa_enabled = False
                 return
             # Load based on file extension
-            if vector_path.endswith('.safetensors'):
                 # Load from safetensors format (preferred)
                 try:
                     from safetensors.torch import load_file
                     steering_data = load_file(vector_path)
-                    self.steering_vector = steering_data['steering_vector']
                 except ImportError:
                     print("Warning: safetensors not installed, install with: pip install safetensors")
                     self.caa_enabled = False
                     return
             else:
                 # Load from pytorch format (fallback)
-                steering_data = torch.load(vector_path, map_location='cpu')
                 # Handle different storage formats
                 if isinstance(steering_data, dict):
-                    if 'vector' in steering_data:
-                        self.steering_vector = steering_data['vector']
-                    elif 'steering_vector' in steering_data:
-                        self.steering_vector = steering_data['steering_vector']
                     else:
                         # Assume the dict values are the vectors
                         self.steering_vector = next(iter(steering_data.values()))
                 else:
                     self.steering_vector = steering_data
             # Ensure it's a tensor
             if not isinstance(self.steering_vector, torch.Tensor):
                 self.steering_vector = torch.tensor(self.steering_vector)
-            print(f"✅ Loaded CAA steering vector from {vector_path}: shape {self.steering_vector.shape}, norm {torch.norm(self.steering_vector).item():.4f}")
         except Exception as e:
             print(f"Warning: Failed to load steering vector: {e}, CAA disabled")
             self.caa_enabled = False
             self.steering_vector = None
     def _apply_caa_steering(self, module, input, output):
         """
         Hook function that applies CAA steering to the specified layer.
         This follows the implementation from wisent_guard/core/steering_methods/caa.py
         and the patterns from wisent_guard/core/optuna/optuna_pipeline.py
         """
         if not self.caa_enabled or self.steering_vector is None:
             return output
         # Extract hidden states from output
         if isinstance(output, tuple):
             hidden_states = output[0]
         else:
             hidden_states = output
         # Apply steering to the last token position (standard CAA behavior)
         # This matches the implementation in optuna_pipeline.py lines 744-746
         if hidden_states.dim() == 3:  # [batch, seq, hidden]
             # Move steering vector to the same device and dtype
             steering_vector = self.steering_vector.to(hidden_states.device, hidden_states.dtype)
             # Apply steering with configured alpha (strength)
             # Steering is applied to the last token position
-            hidden_states[:, -1:, :] = hidden_states[:, -1:, :] + self.caa_alpha * steering_vector.unsqueeze(0).unsqueeze(0)
         # Return modified output
         if isinstance(output, tuple):
             return (hidden_states,) + output[1:]
         else:
             return hidden_states
     def forward(
         self,
         input_ids: torch.LongTensor = None,
@@ -163,16 +169,16 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
     ) -> Union[Tuple, CausalLMOutputWithPast]:
         """
         Forward pass with automatic CAA steering application.
         The steering is applied via a forward hook on the specified layer,
         following the pattern from optuna_pipeline.py.
         """
         # Register CAA steering hook if enabled and not already registered
         if self.caa_enabled and self.steering_vector is not None and self._steering_hook_handle is None:
             target_layer = self.model.layers[self.caa_layer_id]
             self._steering_hook_handle = target_layer.register_forward_hook(self._apply_caa_steering)
         # Call parent forward method
         outputs = super().forward(
             input_ids=input_ids,
@@ -185,22 +191,22 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
             output_attentions=output_attentions,
             output_hidden_states=output_hidden_states,
             return_dict=return_dict,
-            cache_position=cache_position if hasattr(self, 'cache_position') else None,
         )
         return outputs
     def generate(self, *args, **kwargs):
         """
         Generate method with automatic CAA steering.
         The steering hook is registered before generation and cleaned up after.
         """
         # Register hook if needed
         if self.caa_enabled and self.steering_vector is not None and self._steering_hook_handle is None:
             target_layer = self.model.layers[self.caa_layer_id]
             self._steering_hook_handle = target_layer.register_forward_hook(self._apply_caa_steering)
         try:
             # Call parent generate method
             outputs = super().generate(*args, **kwargs)
@@ -209,65 +215,71 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
             if self._steering_hook_handle is not None:
                 self._steering_hook_handle.remove()
                 self._steering_hook_handle = None
         return outputs
     def set_caa_enabled(self, enabled: bool):
         """Enable or disable CAA steering at runtime."""
         self.caa_enabled = enabled
         if not enabled and self._steering_hook_handle is not None:
             self._steering_hook_handle.remove()
             self._steering_hook_handle = None
     def set_caa_alpha(self, alpha: float):
         """Adjust CAA steering strength at runtime."""
         self.caa_alpha = alpha
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """
         Load model with automatic CAA configuration.
         This method ensures the steering vector is loaded from the embedded config.
         If no weights are found locally, it loads from the base Qwen model.
         """
         import os
         from pathlib import Path
         # Check if we have local weights
         local_path = Path(pretrained_model_name_or_path)
         has_weights = any(
-            (local_path / f).exists()
-            for f in ["pytorch_model.bin", "model.safetensors", "pytorch_model.bin.index.json", "model.safetensors.index.json"]
         )
         if not has_weights and local_path.exists() and (local_path / "config.json").exists():
             # We have config but no weights - load from base model
             print(f"Loading weights from base model: Qwen/Qwen2.5-Coder-7B-Instruct")
             # First, load config from local path
             from transformers import AutoConfig
             config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
             # Load model with base weights
             # Remove config from kwargs if it exists to avoid conflict
             kwargs_copy = kwargs.copy()
-            kwargs_copy.pop('config', None)
             model = super().from_pretrained(
                 "Qwen/Qwen2.5-Coder-7B-Instruct",
                 *model_args,
                 config=config,  # Use our custom config
-                **kwargs_copy
             )
             # Initialize CAA components
             model.caa_enabled = config.caa_enabled
             model.caa_layer_id = config.caa_layer_id
             model.caa_alpha = config.caa_alpha
             model.steering_method = config.steering_method
             model._steering_hook_handle = None
             # Load steering vector from config
             if model.caa_enabled:
                 vector_path = config.steering_vector_path
@@ -277,14 +289,14 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
         else:
             # Standard loading path
             model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
             # Load steering vector from config if not already loaded
             if model.caa_enabled and model.steering_vector is None:
                 vector_path = model.config.steering_vector_path
                 if not os.path.isabs(vector_path):
                     vector_path = os.path.join(pretrained_model_name_or_path, vector_path)
                 model._load_steering_vector_from_file(vector_path)
         return model
@@ -292,4 +304,4 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
 from transformers import AutoModelForCausalLM, AutoConfig
 AutoConfig.register("wisent_qwen2", WisentQwen2Config)
-AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)

 class WisentQwen2Config(Qwen2Config):
     """Extended Qwen2 configuration with CAA steering parameters."""
     model_type = "wisent_qwen2"
     def __init__(
         self,
         caa_enabled: bool = True,
         caa_alpha: float = 0.9,
         steering_vector_path: str = "./vectors/coding/steering_vector.safetensors",
         steering_method: str = "caa",
+        **kwargs,
     ):
         super().__init__(**kwargs)
         self.caa_enabled = caa_enabled
 class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
     """
     Qwen2 model with integrated CAA steering for improved code generation.
     This model automatically applies Contrastive Activation Addition (CAA) steering
     during the forward pass, eliminating the need for manual hook management.
     """
     config_class = WisentQwen2Config
     def __init__(self, config: WisentQwen2Config):
         super().__init__(config)
         # CAA steering parameters
         self.caa_enabled = config.caa_enabled
         self.caa_layer_id = config.caa_layer_id
         self.caa_alpha = config.caa_alpha
         self.steering_method = config.steering_method
         # Load steering vector from file
         self.steering_vector = None
         if self.caa_enabled:
             self._load_steering_vector_from_file(config.steering_vector_path)
         # Hook handle for cleanup
         self._steering_hook_handle = None
     def _load_steering_vector_from_file(self, path: str):
         """Load the CAA steering vector from safetensors or pytorch file."""
         import os
         try:
             # Try relative path first
             if os.path.exists(path):
                 print(f"Warning: Steering vector not found at {path}, CAA disabled")
                 self.caa_enabled = False
                 return
             # Load based on file extension
+            if vector_path.endswith(".safetensors"):
                 # Load from safetensors format (preferred)
                 try:
                     from safetensors.torch import load_file
                     steering_data = load_file(vector_path)
+                    self.steering_vector = steering_data["steering_vector"]
                 except ImportError:
                     print("Warning: safetensors not installed, install with: pip install safetensors")
                     self.caa_enabled = False
                     return
             else:
                 # Load from pytorch format (fallback)
+                steering_data = torch.load(vector_path, map_location="cpu")
                 # Handle different storage formats
                 if isinstance(steering_data, dict):
+                    if "vector" in steering_data:
+                        self.steering_vector = steering_data["vector"]
+                    elif "steering_vector" in steering_data:
+                        self.steering_vector = steering_data["steering_vector"]
                     else:
                         # Assume the dict values are the vectors
                         self.steering_vector = next(iter(steering_data.values()))
                 else:
                     self.steering_vector = steering_data
             # Ensure it's a tensor
             if not isinstance(self.steering_vector, torch.Tensor):
                 self.steering_vector = torch.tensor(self.steering_vector)
+            print(
+                f"✅ Loaded CAA steering vector from {vector_path}: shape {self.steering_vector.shape}, norm {torch.norm(self.steering_vector).item():.4f}"
+            )
         except Exception as e:
             print(f"Warning: Failed to load steering vector: {e}, CAA disabled")
             self.caa_enabled = False
             self.steering_vector = None
     def _apply_caa_steering(self, module, input, output):
         """
         Hook function that applies CAA steering to the specified layer.
         This follows the implementation from wisent_guard/core/steering_methods/caa.py
         and the patterns from wisent_guard/core/optuna/optuna_pipeline.py
         """
         if not self.caa_enabled or self.steering_vector is None:
             return output
         # Extract hidden states from output
         if isinstance(output, tuple):
             hidden_states = output[0]
         else:
             hidden_states = output
         # Apply steering to the last token position (standard CAA behavior)
         # This matches the implementation in optuna_pipeline.py lines 744-746
         if hidden_states.dim() == 3:  # [batch, seq, hidden]
             # Move steering vector to the same device and dtype
             steering_vector = self.steering_vector.to(hidden_states.device, hidden_states.dtype)
             # Apply steering with configured alpha (strength)
             # Steering is applied to the last token position
+            hidden_states[:, -1:, :] = hidden_states[:, -1:, :] + self.caa_alpha * steering_vector.unsqueeze(
+                0
+            ).unsqueeze(0)
         # Return modified output
         if isinstance(output, tuple):
             return (hidden_states,) + output[1:]
         else:
             return hidden_states
     def forward(
         self,
         input_ids: torch.LongTensor = None,
     ) -> Union[Tuple, CausalLMOutputWithPast]:
         """
         Forward pass with automatic CAA steering application.
         The steering is applied via a forward hook on the specified layer,
         following the pattern from optuna_pipeline.py.
         """
         # Register CAA steering hook if enabled and not already registered
         if self.caa_enabled and self.steering_vector is not None and self._steering_hook_handle is None:
             target_layer = self.model.layers[self.caa_layer_id]
             self._steering_hook_handle = target_layer.register_forward_hook(self._apply_caa_steering)
         # Call parent forward method
         outputs = super().forward(
             input_ids=input_ids,
             output_attentions=output_attentions,
             output_hidden_states=output_hidden_states,
             return_dict=return_dict,
+            cache_position=cache_position if hasattr(self, "cache_position") else None,
         )
         return outputs
     def generate(self, *args, **kwargs):
         """
         Generate method with automatic CAA steering.
         The steering hook is registered before generation and cleaned up after.
         """
         # Register hook if needed
         if self.caa_enabled and self.steering_vector is not None and self._steering_hook_handle is None:
             target_layer = self.model.layers[self.caa_layer_id]
             self._steering_hook_handle = target_layer.register_forward_hook(self._apply_caa_steering)
         try:
             # Call parent generate method
             outputs = super().generate(*args, **kwargs)
             if self._steering_hook_handle is not None:
                 self._steering_hook_handle.remove()
                 self._steering_hook_handle = None
         return outputs
     def set_caa_enabled(self, enabled: bool):
         """Enable or disable CAA steering at runtime."""
         self.caa_enabled = enabled
         if not enabled and self._steering_hook_handle is not None:
             self._steering_hook_handle.remove()
             self._steering_hook_handle = None
     def set_caa_alpha(self, alpha: float):
         """Adjust CAA steering strength at runtime."""
         self.caa_alpha = alpha
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
         """
         Load model with automatic CAA configuration.
         This method ensures the steering vector is loaded from the embedded config.
         If no weights are found locally, it loads from the base Qwen model.
         """
         import os
         from pathlib import Path
         # Check if we have local weights
         local_path = Path(pretrained_model_name_or_path)
         has_weights = any(
+            (local_path / f).exists()
+            for f in [
+                "pytorch_model.bin",
+                "model.safetensors",
+                "pytorch_model.bin.index.json",
+                "model.safetensors.index.json",
+            ]
         )
         if not has_weights and local_path.exists() and (local_path / "config.json").exists():
             # We have config but no weights - load from base model
             print(f"Loading weights from base model: Qwen/Qwen2.5-Coder-7B-Instruct")
             # First, load config from local path
             from transformers import AutoConfig
             config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
             # Load model with base weights
             # Remove config from kwargs if it exists to avoid conflict
             kwargs_copy = kwargs.copy()
+            kwargs_copy.pop("config", None)
             model = super().from_pretrained(
                 "Qwen/Qwen2.5-Coder-7B-Instruct",
                 *model_args,
                 config=config,  # Use our custom config
+                **kwargs_copy,
             )
             # Initialize CAA components
             model.caa_enabled = config.caa_enabled
             model.caa_layer_id = config.caa_layer_id
             model.caa_alpha = config.caa_alpha
             model.steering_method = config.steering_method
             model._steering_hook_handle = None
             # Load steering vector from config
             if model.caa_enabled:
                 vector_path = config.steering_vector_path
         else:
             # Standard loading path
             model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
             # Load steering vector from config if not already loaded
             if model.caa_enabled and model.steering_vector is None:
                 vector_path = model.config.steering_vector_path
                 if not os.path.isabs(vector_path):
                     vector_path = os.path.join(pretrained_model_name_or_path, vector_path)
                 model._load_steering_vector_from_file(vector_path)
         return model
 from transformers import AutoModelForCausalLM, AutoConfig
 AutoConfig.register("wisent_qwen2", WisentQwen2Config)
+AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)

vectors/mbpp_plus/steering_vector.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2e8bc7bbdbdee38910662c28ca924d5e2432a14a873ada066d4eab4db041235
+size 7256