saadqbal commited on
Commit
2ed456d
·
1 Parent(s): 0411a6a

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +16 -2
handler.py CHANGED
@@ -1,6 +1,7 @@
1
  import torch
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
4
 
5
  # get dtype
6
  dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
@@ -8,9 +9,22 @@ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.
8
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
 
 
 
 
 
 
 
 
 
 
 
11
  # load the model
12
- tokenizer = AutoTokenizer.from_pretrained(path)
13
- model = AutoModelForCausalLM.from_pretrained(path, device_map="auto",torch_dtype=dtype)
 
 
14
  # create inference pipeline
15
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
16
 
 
1
  import torch
2
  from typing import Dict, List, Any
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ from peft import PeftModel, PeftConfig
5
 
6
  # get dtype
7
  dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 
9
 
10
  class EndpointHandler:
11
  def __init__(self, path=""):
12
+
13
+ config = PeftConfig.from_pretrained(path)
14
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
15
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
16
+ # Load the Lora model
17
+ model = PeftModel.from_pretrained(model, path)
18
+ if tokenizer.pad_token_id is None:
19
+ tokenizer.pad_token = tokenizer.eos_token
20
+
21
+ tokenizer.padding_side = "left"
22
+
23
  # load the model
24
+ # tokenizer = AutoTokenizer.from_pretrained(path)
25
+ # model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype)
26
+
27
+
28
  # create inference pipeline
29
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
30