velocity-ai commited on
Commit
0ebdffc
·
verified ·
1 Parent(s): c410e3a

Update code/inference.py

Browse files
Files changed (1) hide show
  1. code/inference.py +30 -4
code/inference.py CHANGED
@@ -1,7 +1,8 @@
1
  import os
2
  import json
3
  import torch
4
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
5
  import logging
6
 
7
  logger = logging.getLogger(__name__)
@@ -11,6 +12,21 @@ logger = logging.getLogger(__name__)
11
  # Can specify GPU device with:
12
  # CUDA_VISIBLE_DEVICES="1" python script.py
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def model_fn(model_dir, context=None):
15
  """Load the model for inference"""
16
  try:
@@ -22,15 +38,25 @@ def model_fn(model_dir, context=None):
22
  torch.cuda.empty_cache()
23
  logger.info(f"Using device: {device}")
24
 
25
- # Load tokenizer and model directly using AutoModelForSequenceClassification
26
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
27
- model = AutoModelForSequenceClassification.from_pretrained(
 
 
 
 
 
 
 
28
  model_id,
29
- num_labels=2,
30
  torch_dtype=torch.bfloat16 if device.type == 'cuda' else torch.float32,
31
  trust_remote_code=True
32
  )
33
 
 
 
 
34
  # Move model to device
35
  model = model.to(device)
36
 
 
1
  import os
2
  import json
3
  import torch
4
+ import torch.nn as nn
5
+ from transformers import AutoModel, AutoTokenizer, AutoConfig
6
  import logging
7
 
8
  logger = logging.getLogger(__name__)
 
12
  # Can specify GPU device with:
13
  # CUDA_VISIBLE_DEVICES="1" python script.py
14
 
15
+ class PhiForSequenceClassification(nn.Module):
16
+ def __init__(self, base_model, num_labels=2):
17
+ super().__init__()
18
+ self.phi = base_model
19
+ # Create classifier with same dtype as base model
20
+ dtype = next(base_model.parameters()).dtype
21
+ self.classifier = nn.Linear(self.phi.config.hidden_size, num_labels, dtype=dtype)
22
+
23
+ def forward(self, **inputs):
24
+ outputs = self.phi(**inputs, output_hidden_states=True)
25
+ # Use the last hidden state of the last token for classification
26
+ last_hidden_state = outputs.hidden_states[-1][:, -1, :]
27
+ logits = self.classifier(last_hidden_state)
28
+ return type('Outputs', (), {'logits': logits})()
29
+
30
  def model_fn(model_dir, context=None):
31
  """Load the model for inference"""
32
  try:
 
38
  torch.cuda.empty_cache()
39
  logger.info(f"Using device: {device}")
40
 
41
+ # Load tokenizer
42
  tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
43
+
44
+ # Load config and specify it's a Phi3Config
45
+ config = AutoConfig.from_pretrained(model_id,
46
+ num_labels=2,
47
+ trust_remote_code=True)
48
+
49
+ # Load base model
50
+ base_model = AutoModel.from_pretrained(
51
  model_id,
52
+ config=config,
53
  torch_dtype=torch.bfloat16 if device.type == 'cuda' else torch.float32,
54
  trust_remote_code=True
55
  )
56
 
57
+ # Create classification model
58
+ model = PhiForSequenceClassification(base_model, num_labels=2)
59
+
60
  # Move model to device
61
  model = model.to(device)
62