saadqbal
/

gpt-j-6b-lora-instruct-v2

Text Generation

Inference Endpoints

Model card Files Files and versions Community

saadqbal commited on May 25, 2023

Commit

2ed456d

·

1 Parent(s): 0411a6a

Update handler.py

Files changed (1) hide show

handler.py +16 -2

handler.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import torch
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 # get dtype
 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
@@ -8,9 +9,22 @@ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
-        tokenizer = AutoTokenizer.from_pretrained(path)
-        model = AutoModelForCausalLM.from_pretrained(path, device_map="auto",torch_dtype=dtype)
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

 import torch
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from peft import PeftModel, PeftConfig
 # get dtype
 dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.float16
 class EndpointHandler:
     def __init__(self, path=""):
+        config = PeftConfig.from_pretrained(path)
+        model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
+        tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
+        # Load the Lora model
+        model = PeftModel.from_pretrained(model, path)
+        if tokenizer.pad_token_id is None:
+            tokenizer.pad_token = tokenizer.eos_token
+        tokenizer.padding_side = "left"
         # load the model
+        # tokenizer = AutoTokenizer.from_pretrained(path)
+        # model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype)
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)