Add DuckTrack as initial annotation tool; Initial multimodal test
This commit is contained in:
0
mm_agents/__init__.py
Normal file
0
mm_agents/__init__.py
Normal file
BIN
mm_agents/chrome_start.png
Normal file
BIN
mm_agents/chrome_start.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 MiB |
20
mm_agents/fuyu_test.py
Normal file
20
mm_agents/fuyu_test.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from transformers import FuyuProcessor, FuyuForCausalLM
|
||||
from PIL import Image
|
||||
|
||||
image = Image.open("stackoverflow.png").convert("RGB")
|
||||
|
||||
# load model and processor
|
||||
model_id = "adept/fuyu-8b"
|
||||
processor = FuyuProcessor.from_pretrained(model_id)
|
||||
model = FuyuForCausalLM.from_pretrained(model_id, device_map="cuda:0")
|
||||
|
||||
# prepare inputs for the model
|
||||
text_prompt = "Description:\n"
|
||||
|
||||
inputs = processor(text=text_prompt, images=image, return_tensors="pt").to("cuda:0")
|
||||
|
||||
# autoregressively generate text
|
||||
generation_output = model.generate(**inputs, max_new_tokens=100)
|
||||
generation_text = processor.batch_decode(generation_output[:, -100:], skip_special_tokens=True)
|
||||
|
||||
print(generation_text)
|
||||
BIN
mm_agents/stackoverflow.png
Normal file
BIN
mm_agents/stackoverflow.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 1.0 MiB |
Reference in New Issue
Block a user