twhitworth commited on
Commit
c10949d
·
verified ·
1 Parent(s): c005154
Files changed (1) hide show
  1. p16.py → fp16.py +5 -3
p16.py → fp16.py RENAMED
@@ -9,6 +9,7 @@ from tqdm import tqdm
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
11
 
 
12
  MODEL_ID = "openai/gpt-oss-120b"
13
  OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./fp16/gpt-oss-120b-fp16")
14
 
@@ -19,11 +20,12 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
19
  tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
20
 
21
  # 3. load model in fp16
22
- max_memory = {0: "17GiB", 1: "17GiB", 2: "17GiB", 3: "17GiB", 4: "17GiB", 5: "17GiB", "cpu": "196GiB"}
 
23
  model = AutoModelForCausalLM.from_pretrained(
24
  MODEL_ID,
25
  torch_dtype=torch.float16,
26
- device_map="auto",
27
  low_cpu_mem_usage=True,
28
  max_memory=max_memory,
29
  offload_folder="./offload_cache",
@@ -98,7 +100,7 @@ for _m in model.modules():
98
  from transformers.models.gpt_bigcode import modeling_gpt_bigcode
99
  modeling_gpt_bigcode.GPTBigCodeModel._check_hidden_states_dtype = lambda *_, **__: None
100
 
101
- # 5. quick demo
102
  if __name__ == "__main__":
103
  prompt = "Explain quantum supremacy in one paragraph."
104
  inputs = tok(prompt, return_tensors="pt").to(model.device)
 
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
11
 
12
+ # Make sure to set your model output directory and make sure it has 755 permissions.
13
  MODEL_ID = "openai/gpt-oss-120b"
14
  OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./fp16/gpt-oss-120b-fp16")
15
 
 
20
  tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
21
 
22
  # 3. load model in fp16
23
+ # Make sure to change these max_memory settings.
24
+ max_memory = {0: "17GiB", 1: "17GiB", 2: "17GiB", 3: "17GiB", 4: "17GiB", 5: "17GiB", 6: "17GiB", 7: "17GiB", "cpu": "196GiB"}
25
  model = AutoModelForCausalLM.from_pretrained(
26
  MODEL_ID,
27
  torch_dtype=torch.float16,
28
+ device_map="sequential",
29
  low_cpu_mem_usage=True,
30
  max_memory=max_memory,
31
  offload_folder="./offload_cache",
 
100
  from transformers.models.gpt_bigcode import modeling_gpt_bigcode
101
  modeling_gpt_bigcode.GPTBigCodeModel._check_hidden_states_dtype = lambda *_, **__: None
102
 
103
+ # 5. inference to verify functionality
104
  if __name__ == "__main__":
105
  prompt = "Explain quantum supremacy in one paragraph."
106
  inputs = tok(prompt, return_tensors="pt").to(model.device)