qianhuiwu commited on
Commit
ae7947f
·
verified ·
1 Parent(s): 34e15c4

Update example code.

Browse files
Files changed (1) hide show
  1. README.md +7 -4
README.md CHANGED
@@ -60,7 +60,7 @@ from qwen_vl_utils import process_vision_info
60
  from datasets import load_dataset
61
  from transformers import Qwen2VLProcessor
62
  from gui_actor.constants import chat_template
63
- from gui_actor.modeling import Qwen2VLForConditionalGenerationWithActionHead
64
  from gui_actor.inference import inference
65
 
66
 
@@ -68,7 +68,7 @@ from gui_actor.inference import inference
68
  model_name_or_path = "microsoft/GUI-Actor-7B-Qwen2-VL"
69
  data_processor = Qwen2VLProcessor.from_pretrained(model_name_or_path)
70
  tokenizer = data_processor.tokenizer
71
- model = Qwen2VLForConditionalGenerationWithActionHead.from_pretrained(
72
  model_name_or_path,
73
  torch_dtype=torch.bfloat16,
74
  device_map="cuda:0",
@@ -78,6 +78,9 @@ model = Qwen2VLForConditionalGenerationWithActionHead.from_pretrained(
78
  # prepare example
79
  dataset = load_dataset("rootsautomation/ScreenSpot")["test"]
80
  example = dataset[0]
 
 
 
81
  conversation = [
82
  {
83
  "role": "system",
@@ -105,9 +108,9 @@ conversation = [
105
  ]
106
 
107
  # inference
108
- pred = inference(conversation, model, tokenizer, data_processor, logits_processor=logits_processor_actor, use_placeholder=True, topk=3)
109
  px, py = pred["topk_points"][0]
110
- print(f"Click point: [{px}, {py}]")
111
  ```
112
 
113
  ## Citation
 
60
  from datasets import load_dataset
61
  from transformers import Qwen2VLProcessor
62
  from gui_actor.constants import chat_template
63
+ from gui_actor.modeling import Qwen2VLForConditionalGenerationWithPointer
64
  from gui_actor.inference import inference
65
 
66
 
 
68
  model_name_or_path = "microsoft/GUI-Actor-7B-Qwen2-VL"
69
  data_processor = Qwen2VLProcessor.from_pretrained(model_name_or_path)
70
  tokenizer = data_processor.tokenizer
71
+ model = Qwen2VLForConditionalGenerationWithPointer.from_pretrained(
72
  model_name_or_path,
73
  torch_dtype=torch.bfloat16,
74
  device_map="cuda:0",
 
78
  # prepare example
79
  dataset = load_dataset("rootsautomation/ScreenSpot")["test"]
80
  example = dataset[0]
81
+ print(f"Intruction: {example['instruction']}")
82
+ print(f"ground-truth action region (x1, y1, x2, y2): {[round(i, 2) for i in example['bbox']]}")
83
+
84
  conversation = [
85
  {
86
  "role": "system",
 
108
  ]
109
 
110
  # inference
111
+ pred = inference(conversation, model, tokenizer, data_processor, use_placeholder=True, topk=3)
112
  px, py = pred["topk_points"][0]
113
+ print(f"Predicted click point: [{round(px, 2)}, {round(py, 2)}]")
114
  ```
115
 
116
  ## Citation