jfpio commited on
Commit
3bb8d99
·
verified ·
1 Parent(s): 0afa2f5

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +3 -16
  2. modeling_wisent_qwen.py +6 -8
README.md CHANGED
@@ -11,23 +11,10 @@ tags:
11
  - wisent
12
  library_name: transformers
13
  datasets:
14
- - mbpp
15
  metrics:
16
  - pass@1
17
  base_model: Qwen/Qwen2.5-Coder-7B-Instruct
18
- model-index:
19
- - name: wisent-ai/qwen2.5-coder-7b-wisent-caa
20
- results:
21
- - task:
22
- type: code-generation
23
- name: Code Generation
24
- dataset:
25
- type: mbpp
26
- name: MBPP Plus
27
- metrics:
28
- - type: pass@1
29
- value: 0.67
30
- name: Pass@1
31
  ---
32
 
33
  # Wisent-Qwen2.5-Coder-7B-Instruct with CAA Steering
@@ -164,7 +151,7 @@ The CAA parameters were optimized using:
164
  - **Framework**: Optuna with TPE sampler
165
  - **Search Space**: Layers 15-28, α ∈ [0.1, 5.0]
166
  - **Objective**: Maximize accuracy on MBPP Plus validation set
167
- - **Best Validation Score**: 64% accuracy
168
 
169
  ## Model Architecture
170
 
@@ -193,7 +180,7 @@ huggingface_qwen25-7b-coder-caa/
193
 
194
  ### MBPP Plus Benchmark
195
 
196
- The model should be evaluated on the complete MBPP Plus dataset (378 problems) to measure improvement over the baseline. Expected improvements based on validation results.
197
 
198
  ### Running Evaluation
199
 
 
11
  - wisent
12
  library_name: transformers
13
  datasets:
14
+ - evalplus/mbppplus
15
  metrics:
16
  - pass@1
17
  base_model: Qwen/Qwen2.5-Coder-7B-Instruct
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  ---
19
 
20
  # Wisent-Qwen2.5-Coder-7B-Instruct with CAA Steering
 
151
  - **Framework**: Optuna with TPE sampler
152
  - **Search Space**: Layers 15-28, α ∈ [0.1, 5.0]
153
  - **Objective**: Maximize accuracy on MBPP Plus validation set
154
+ - **Validation Results**: Optimized for improved performance on MBPP Plus tasks
155
 
156
  ## Model Architecture
157
 
 
180
 
181
  ### MBPP Plus Benchmark
182
 
183
+ The model has been optimized using Optuna on MBPP Plus tasks. For reliable performance metrics, evaluation should be conducted on the complete MBPP Plus dataset (378 problems) using the [evalplus/mbppplus](https://huggingface.co/datasets/evalplus/mbppplus) dataset.
184
 
185
  ### Running Evaluation
186
 
modeling_wisent_qwen.py CHANGED
@@ -5,12 +5,11 @@ This model automatically applies CAA steering during generation without requirin
5
  The steering parameters are optimized using Optuna and stored in the model configuration.
6
  """
7
 
8
- from typing import Optional, Tuple, Union, List
 
9
  import torch
10
- import torch.nn as nn
11
- from transformers import Qwen2ForCausalLM, Qwen2Config
12
  from transformers.modeling_outputs import CausalLMOutputWithPast
13
- from transformers.cache_utils import Cache
14
 
15
 
16
  class WisentQwen2Config(Qwen2Config):
@@ -150,8 +149,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
150
  # Return modified output
151
  if isinstance(output, tuple):
152
  return (hidden_states,) + output[1:]
153
- else:
154
- return hidden_states
155
 
156
  def forward(
157
  self,
@@ -254,7 +252,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
254
 
255
  if not has_weights and local_path.exists() and (local_path / "config.json").exists():
256
  # We have config but no weights - load from base model
257
- print(f"Loading weights from base model: Qwen/Qwen2.5-Coder-7B-Instruct")
258
 
259
  # First, load config from local path
260
  from transformers import AutoConfig
@@ -301,7 +299,7 @@ class WisentQwen2ForCausalLM(Qwen2ForCausalLM):
301
 
302
 
303
  # Register the model
304
- from transformers import AutoModelForCausalLM, AutoConfig
305
 
306
  AutoConfig.register("wisent_qwen2", WisentQwen2Config)
307
  AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)
 
5
  The steering parameters are optimized using Optuna and stored in the model configuration.
6
  """
7
 
8
+ from typing import List, Optional, Tuple, Union
9
+
10
  import torch
11
+ from transformers import Qwen2Config, Qwen2ForCausalLM
 
12
  from transformers.modeling_outputs import CausalLMOutputWithPast
 
13
 
14
 
15
  class WisentQwen2Config(Qwen2Config):
 
149
  # Return modified output
150
  if isinstance(output, tuple):
151
  return (hidden_states,) + output[1:]
152
+ return hidden_states
 
153
 
154
  def forward(
155
  self,
 
252
 
253
  if not has_weights and local_path.exists() and (local_path / "config.json").exists():
254
  # We have config but no weights - load from base model
255
+ print("Loading weights from base model: Qwen/Qwen2.5-Coder-7B-Instruct")
256
 
257
  # First, load config from local path
258
  from transformers import AutoConfig
 
299
 
300
 
301
  # Register the model
302
+ from transformers import AutoConfig, AutoModelForCausalLM
303
 
304
  AutoConfig.register("wisent_qwen2", WisentQwen2Config)
305
  AutoModelForCausalLM.register(WisentQwen2Config, WisentQwen2ForCausalLM)