Upload folder using huggingface_hub
Browse files- README.md +3 -16
- modeling_wisent_qwen.py +6 -8
README.md
CHANGED
@@ -11,23 +11,10 @@ tags:
|
|
11 |
- wisent
|
12 |
library_name: transformers
|
13 |
datasets:
|
14 |
-
-
|
15 |
metrics:
|
16 |
- pass@1
|
17 |
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
18 |
-
model-index:
|
19 |
-
- name: wisent-ai/qwen2.5-coder-7b-wisent-caa
|
20 |
-
results:
|
21 |
-
- task:
|
22 |
-
type: code-generation
|
23 |
-
name: Code Generation
|
24 |
-
dataset:
|
25 |
-
type: mbpp
|
26 |
-
name: MBPP Plus
|
27 |
-
metrics:
|
28 |
-
- type: pass@1
|
29 |
-
value: 0.67
|
30 |
-
name: Pass@1
|
31 |
---
|
32 |
|
33 |
# Wisent-Qwen2.5-Coder-7B-Instruct with CAA Steering
|
@@ -164,7 +151,7 @@ The CAA parameters were optimized using:
|
|
164 |
- **Framework**: Optuna with TPE sampler
|
165 |
- **Search Space**: Layers 15-28, α ∈ [0.1, 5.0]
|
166 |
- **Objective**: Maximize accuracy on MBPP Plus validation set
|
167 |
-
- **
|
168 |
|
169 |
## Model Architecture
|
170 |
|
@@ -193,7 +180,7 @@ huggingface_qwen25-7b-coder-caa/
|
|
193 |
|
194 |
### MBPP Plus Benchmark
|
195 |
|
196 |
-
The model should be
|
197 |
|
198 |
### Running Evaluation
|
199 |
|
|
|
11 |
- wisent
|
12 |
library_name: transformers
|
13 |
datasets:
|
14 |
+
- evalplus/mbppplus
|
15 |
metrics:
|
16 |
- pass@1
|
17 |
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
---
|
19 |
|
20 |
# Wisent-Qwen2.5-Coder-7B-Instruct with CAA Steering
|
|
|
151 |
- **Framework**: Optuna with TPE sampler
|
152 |
- **Search Space**: Layers 15-28, α ∈ [0.1, 5.0]
|
153 |
- **Objective**: Maximize accuracy on MBPP Plus validation set
|
154 |
+
- **Validation Results**: Optimized for improved performance on MBPP Plus tasks
|
155 |
|
156 |
## Model Architecture
|
157 |
|
|
|
180 |
|
181 |
### MBPP Plus Benchmark
|
182 |
|
183 |
+
The model has been optimized using Optuna on MBPP Plus tasks. For reliable performance metrics, evaluation should be conducted on the complete MBPP Plus dataset (378 problems) using the [evalplus/mbppplus](https://huggingface.co/datasets/evalplus/mbppplus) dataset.
|
184 |
|
185 |
### Running Evaluation
|
186 |
|
modeling_wisent_qwen.py
CHANGED
@@ -5,12 +5,11 @@ This model automatically applies CAA steering during generation without requirin
|
|
5 |
The steering parameters are optimized using Optuna and stored in the model configuration.
|
6 |
"""
|
7 |
|
8 |
-
from typing import Optional, Tuple, Union
|
|
|
9 |
import torch
|
10 |
-
import
|
11 |
-
from transformers import Qwen2ForCausalLM, Qwen2Config
|
12 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
13 |
-
from transformers.cache_utils import Cache
|
14 |
|
15 |
|
16 |
class WisentQwen2Config(Qwen2Config):
|
@@ -150,8 +149,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
|
|
150 |
# Return modified output
|
151 |
if isinstance(output, tuple):
|
152 |
return (hidden_states,) + output[1:]
|
153 |
-
|
154 |
-
return hidden_states
|
155 |
|
156 |
def forward(
|
157 |
self,
|
@@ -254,7 +252,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
|
|
254 |
|
255 |
if not has_weights and local_path.exists() and (local_path / "config.json").exists():
|
256 |
# We have config but no weights - load from base model
|
257 |
-
print(
|
258 |
|
259 |
# First, load config from local path
|
260 |
from transformers import AutoConfig
|
@@ -301,7 +299,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
|
|
301 |
|
302 |
|
303 |
# Register the model
|
304 |
-
from transformers import
|
305 |
|
306 |
AutoConfig.register("wisent_qwen2", WisentQwen2Config)
|
307 |
AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)
|
|
|
5 |
The steering parameters are optimized using Optuna and stored in the model configuration.
|
6 |
"""
|
7 |
|
8 |
+
from typing import List, Optional, Tuple, Union
|
9 |
+
|
10 |
import torch
|
11 |
+
from transformers import Qwen2Config, Qwen2ForCausalLM
|
|
|
12 |
from transformers.modeling_outputs import CausalLMOutputWithPast
|
|
|
13 |
|
14 |
|
15 |
class WisentQwen2Config(Qwen2Config):
|
|
|
149 |
# Return modified output
|
150 |
if isinstance(output, tuple):
|
151 |
return (hidden_states,) + output[1:]
|
152 |
+
return hidden_states
|
|
|
153 |
|
154 |
def forward(
|
155 |
self,
|
|
|
252 |
|
253 |
if not has_weights and local_path.exists() and (local_path / "config.json").exists():
|
254 |
# We have config but no weights - load from base model
|
255 |
+
print("Loading weights from base model: Qwen/Qwen2.5-Coder-7B-Instruct")
|
256 |
|
257 |
# First, load config from local path
|
258 |
from transformers import AutoConfig
|
|
|
299 |
|
300 |
|
301 |
# Register the model
|
302 |
+
from transformers import AutoConfig, AutoModelForCausalLM
|
303 |
|
304 |
AutoConfig.register("wisent_qwen2", WisentQwen2Config)
|
305 |
AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)
|