google
/

pix2struct-ai2d-base

Visual Question Answering

Model card Files Files and versions

ybelkada commited on Mar 16, 2023

Commit

bf544e9

·

1 Parent(s): 8b5fd15

Update README.md

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -85,7 +85,7 @@ processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
 question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
-inputs = processor(images=image, text=text, return_tensors="pt")
 predictions = model.generate(**inputs)
 print(processor.decode(predictions[0], skip_special_tokens=True))
@@ -108,7 +108,7 @@ processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
 question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
-inputs = processor(images=image, text=text, return_tensors="pt").to("cuda")
 predictions = model.generate(**inputs)
 print(processor.decode(predictions[0], skip_special_tokens=True))
@@ -133,7 +133,7 @@ processor = Pix2StructProcessor.from_pretrained("google/pix2struct-ai2d-base")
 question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
-inputs = processor(images=image, text=text, return_tensors="pt").to("cuda", torch.bfloat16)
 predictions = model.generate(**inputs)
 print(processor.decode(predictions[0], skip_special_tokens=True))

 question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
+inputs = processor(images=image, text=question, return_tensors="pt")
 predictions = model.generate(**inputs)
 print(processor.decode(predictions[0], skip_special_tokens=True))
 question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
+inputs = processor(images=image, text=question, return_tensors="pt").to("cuda")
 predictions = model.generate(**inputs)
 print(processor.decode(predictions[0], skip_special_tokens=True))
 question = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud"
+inputs = processor(images=image, text=question, return_tensors="pt").to("cuda", torch.bfloat16)
 predictions = model.generate(**inputs)
 print(processor.decode(predictions[0], skip_special_tokens=True))