Add DuckTrack as initial annotation tool; Initial multimodal test

This commit is contained in:
Timothyxxx
2023-11-27 00:34:57 +08:00
parent 8c0525c20e
commit 8272e93953
53 changed files with 1705 additions and 0 deletions

0
mm_agents/__init__.py Normal file
View File

BIN
mm_agents/chrome_start.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 MiB

20
mm_agents/fuyu_test.py Normal file
View File

@@ -0,0 +1,20 @@
from transformers import FuyuProcessor, FuyuForCausalLM
from PIL import Image
image = Image.open("stackoverflow.png").convert("RGB")
# load model and processor
model_id = "adept/fuyu-8b"
processor = FuyuProcessor.from_pretrained(model_id)
model = FuyuForCausalLM.from_pretrained(model_id, device_map="cuda:0")
# prepare inputs for the model
text_prompt = "Description:\n"
inputs = processor(text=text_prompt, images=image, return_tensors="pt").to("cuda:0")
# autoregressively generate text
generation_output = model.generate(**inputs, max_new_tokens=100)
generation_text = processor.batch_decode(generation_output[:, -100:], skip_special_tokens=True)
print(generation_text)

BIN
mm_agents/stackoverflow.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 MiB