jhao commited on
Commit
77848b5
·
1 Parent(s): 3d7a3e2

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +18 -0
README.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Please use the following code sample to load these AWQ models:
2
+ ```
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
4
+ from auto_gptq import AutoGPTQForCausalLM
5
+ import torch
6
+
7
+ model_path = 'efficient-llm/vicuna-13b-v1.3-awq'
8
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
9
+ enc = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', trust_remote_code=True)
10
+ kwargs = {"torch_dtype": torch.float16, "low_cpu_mem_usage": True}
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_path, config=config, trust_remote_code=True, device_map='auto', revision='3bit_128g', **kwargs)
13
+
14
+ model.eval()
15
+ input_ids = enc('How are you today?', return_tensors='pt').input_ids.to('cuda')
16
+ outputs = model.generate(input_ids=input_ids, max_length=128)
17
+ print(enc.decode(outputs[0]))
18
+ ```