Add DuckTrack as initial annotation tool; Initial multimodal test

2023-11-27 00:34:57 +08:00
parent 8c0525c20e
commit 8272e93953
53 changed files with 1705 additions and 0 deletions
--- a/mm_agents/init.py
+++ b/mm_agents/init.py
--- a/mm_agents/chrome_start.png
+++ b/mm_agents/chrome_start.png
--- a/mm_agents/fuyu_test.py
+++ b/mm_agents/fuyu_test.py
@@ -0,0 +1,20 @@
+from transformers import FuyuProcessor, FuyuForCausalLM
+from PIL import Image
+
+image = Image.open("stackoverflow.png").convert("RGB")
+
+# load model and processor
+model_id = "adept/fuyu-8b"
+processor = FuyuProcessor.from_pretrained(model_id)
+model = FuyuForCausalLM.from_pretrained(model_id, device_map="cuda:0")
+
+# prepare inputs for the model
+text_prompt = "Description:\n"
+
+inputs = processor(text=text_prompt, images=image, return_tensors="pt").to("cuda:0")
+
+# autoregressively generate text
+generation_output = model.generate(**inputs, max_new_tokens=100)
+generation_text = processor.batch_decode(generation_output[:, -100:], skip_special_tokens=True)
+
+print(generation_text)
--- a/mm_agents/stackoverflow.png
+++ b/mm_agents/stackoverflow.png