kirigayahitsugi commited on
Commit
6ae1fb6
·
verified ·
1 Parent(s): ed2e50b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +33 -19
README.md CHANGED
@@ -52,7 +52,10 @@ import torch
52
  import torch.nn as nn
53
  from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
54
  import torch.nn.functional as F
55
- from transformers import AutoTokenizer
 
 
 
56
 
57
  def get_tokenizer(pretrain, model, padding_side="left", use_fast=True):
58
  tokenizer = AutoTokenizer.from_pretrained(pretrain, trust_remote_code=True, use_fast=use_fast)
@@ -63,22 +66,17 @@ def get_tokenizer(pretrain, model, padding_side="left", use_fast=True):
63
  model.config.pad_token_id = tokenizer.pad_token_id
64
  return tokenizer
65
 
66
- def get_reward_model(base_causal_model, base_llm_model, is_general_preference: bool=False, add_prompt_head: bool=False, value_head_dim: int=2):
67
  class CustomRewardModel(base_causal_model):
68
 
69
  def __init__(self, config: AutoConfig):
70
  super().__init__(config)
71
  setattr(self, self.base_model_prefix, base_llm_model(config))
72
- if not is_general_preference:
73
- self.value_head = nn.Linear(config.hidden_size, 1, bias=False)
74
- else:
75
- self.value_head = nn.Linear(config.hidden_size, value_head_dim, bias=False)
76
- if add_prompt_head:
77
- self.prompt_head = nn.Linear(config.hidden_size, value_head_dim // 2, bias=False)
78
-
79
- self.is_general_preference = is_general_preference
80
 
81
- self.post_init()
 
 
82
 
83
  def custom_forward(
84
  self,
@@ -115,7 +113,7 @@ def get_reward_model(base_causal_model, base_llm_model, is_general_preference: b
115
  eos_indices = attention_mask.size(1) - 1 - attention_mask.long().fliplr().argmax(dim=1)
116
  eos_indices = eos_indices.unsqueeze(1) # Change shape to [batch_size, 1]
117
  reward_list = []
118
- for dim in range(value_head_dim):
119
  reward_list.append(values[:,:,dim].gather(dim=1, index=eos_indices))
120
  reward = torch.cat(reward_list, dim=1)
121
  reward = F.normalize(reward, p=2, dim=-1) # Shape will be [batch_size, value_head_dim]
@@ -169,11 +167,10 @@ def generate_high_dim_result_with_prompt(model, value_head_dim, chosen_reward, r
169
  return result
170
 
171
  class GPMPipeline:
172
- def __init__(self, model_name_or_path, device=torch.device("cuda:0"), is_general_preference: bool=True, add_prompt_head: bool=True, value_head_dim: int=2, bf16: bool=True, truncation: bool=True, max_length: int=4096, padding: bool=True, tau: float=0.1):
173
  self.device = device
174
  self.is_general_preference = is_general_preference
175
- self.add_prompt_head = add_prompt_head
176
- self.value_head_dim = value_head_dim
177
  self.truncation = truncation
178
  self.max_length = max_length
179
  self.padding = padding
@@ -183,7 +180,24 @@ class GPMPipeline:
183
  config._attn_implementation = "flash_attention_2"
184
  base_class = AutoModel._model_mapping[type(config)]
185
  base_causal_class = AutoModelForCausalLM._model_mapping.get(type(config), None)
186
- cls_class = get_reward_model(base_causal_class, base_class, is_general_preference, add_prompt_head, value_head_dim)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # configure model
189
  self.model = cls_class.from_pretrained(
@@ -192,6 +206,7 @@ class GPMPipeline:
192
  trust_remote_code=True,
193
  torch_dtype=torch.bfloat16 if bf16 else "auto",
194
  )
 
195
  # configure tokenizer
196
  self.tokenizer = get_tokenizer(model_name_or_path, self.model, "left", use_fast=True)
197
  self.tokenizer.truncation_side = "right"
@@ -262,12 +277,13 @@ context2 = [
262
  {"role": "assistant", "content": response2}
263
  ]
264
 
265
- rm = GPMPipeline("general-preference/GPM-Llama-3.1-8B-Instruct", value_head_dim=4)
266
 
267
  reward1, prompt_hidden_state = rm([context1], return_prompt=True)
268
  reward2 = rm([context2])
269
 
270
  result = generate_high_dim_result_with_prompt(rm.model, rm.value_head_dim, reward1, reward2, prompt_hidden_state)
 
271
 
272
  result_batch = result.float().cpu().detach().numpy().tolist()
273
 
@@ -278,6 +294,4 @@ results = []
278
  ]
279
 
280
  print(result_batch)
281
-
282
-
283
  ```
 
52
  import torch.nn as nn
53
  from transformers import AutoConfig, AutoModel, AutoModelForCausalLM
54
  import torch.nn.functional as F
55
+ from transformers import AutoTokenizer
56
+ import os
57
+ from safetensors.torch import load_file
58
+ from huggingface_hub import snapshot_download
59
 
60
  def get_tokenizer(pretrain, model, padding_side="left", use_fast=True):
61
  tokenizer = AutoTokenizer.from_pretrained(pretrain, trust_remote_code=True, use_fast=use_fast)
 
66
  model.config.pad_token_id = tokenizer.pad_token_id
67
  return tokenizer
68
 
69
+ def get_reward_model(base_causal_model, base_llm_model, value_head_dim: int, add_prompt_head: bool, is_general_preference: bool=False):
70
  class CustomRewardModel(base_causal_model):
71
 
72
  def __init__(self, config: AutoConfig):
73
  super().__init__(config)
74
  setattr(self, self.base_model_prefix, base_llm_model(config))
75
+ self.is_general_preference = is_general_preference
 
 
 
 
 
 
 
76
 
77
+ self.value_head = nn.Linear(config.hidden_size, value_head_dim, bias=False)
78
+ if add_prompt_head:
79
+ self.prompt_head = nn.Linear(config.hidden_size, value_head_dim // 2, bias=False)
80
 
81
  def custom_forward(
82
  self,
 
113
  eos_indices = attention_mask.size(1) - 1 - attention_mask.long().fliplr().argmax(dim=1)
114
  eos_indices = eos_indices.unsqueeze(1) # Change shape to [batch_size, 1]
115
  reward_list = []
116
+ for dim in range(self.value_head.out_features):
117
  reward_list.append(values[:,:,dim].gather(dim=1, index=eos_indices))
118
  reward = torch.cat(reward_list, dim=1)
119
  reward = F.normalize(reward, p=2, dim=-1) # Shape will be [batch_size, value_head_dim]
 
167
  return result
168
 
169
  class GPMPipeline:
170
+ def __init__(self, model_name_or_path, device=torch.device("cuda:0"), is_general_preference: bool=True, bf16: bool=True, truncation: bool=True, max_length: int=4096, padding: bool=True, tau: float=0.1):
171
  self.device = device
172
  self.is_general_preference = is_general_preference
173
+
 
174
  self.truncation = truncation
175
  self.max_length = max_length
176
  self.padding = padding
 
180
  config._attn_implementation = "flash_attention_2"
181
  base_class = AutoModel._model_mapping[type(config)]
182
  base_causal_class = AutoModelForCausalLM._model_mapping.get(type(config), None)
183
+
184
+ try:
185
+ dir_path = snapshot_download(repo_id=model_name_or_path)
186
+ except Exception as e:
187
+ dir_path = model_name_or_path
188
+ combined_weights = {}
189
+ for filename in os.listdir(dir_path):
190
+ if filename.endswith(".safetensors"):
191
+ file_path = os.path.join(dir_path, filename)
192
+ weights = load_file(file_path)
193
+ combined_weights.update(weights)
194
+
195
+ if "value_head.weight" in combined_weights:
196
+ self.value_head_dim = combined_weights["value_head.weight"].shape[0]
197
+
198
+ self.add_prompt_head = True if "prompt_head.weight" in combined_weights else False
199
+
200
+ cls_class = get_reward_model(base_causal_class, base_class, add_prompt_head=self.add_prompt_head, value_head_dim=self.value_head_dim, is_general_preference=is_general_preference)
201
 
202
  # configure model
203
  self.model = cls_class.from_pretrained(
 
206
  trust_remote_code=True,
207
  torch_dtype=torch.bfloat16 if bf16 else "auto",
208
  )
209
+
210
  # configure tokenizer
211
  self.tokenizer = get_tokenizer(model_name_or_path, self.model, "left", use_fast=True)
212
  self.tokenizer.truncation_side = "right"
 
277
  {"role": "assistant", "content": response2}
278
  ]
279
 
280
+ rm = GPMPipeline("general-preference/GPM-Llama-3.1-8B-Instruct")
281
 
282
  reward1, prompt_hidden_state = rm([context1], return_prompt=True)
283
  reward2 = rm([context2])
284
 
285
  result = generate_high_dim_result_with_prompt(rm.model, rm.value_head_dim, reward1, reward2, prompt_hidden_state)
286
+ # score = result / rm.tau
287
 
288
  result_batch = result.float().cpu().detach().numpy().tolist()
289
 
 
294
  ]
295
 
296
  print(result_batch)
 
 
297
  ```