Merge main

This commit is contained in:
BlankCheng
2024-03-18 22:21:01 +08:00
133 changed files with 1845 additions and 8812 deletions

19
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,19 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Python Debugger: Current File with Arguments",
"type": "debugpy",
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"args": [
"--path_to_vm", "/Users/lxc/Virtual Machines.localized/DesktopEnv-Ubuntu 64-bit Arm.vmwarevm/DesktopEnv-Ubuntu 64-bit Arm.vmx"
// "--example_time_limit", "60"
]
}
]
}

View File

@@ -21,10 +21,12 @@
Please refer to [guidance](https://docs.google.com/document/d/1KBdeZwmZs2Vi_Wsnngb3Wf1-RiwMMpXTftwMqP2Ztak/edit#heading=h.uh0x0tkl7fuw)
2. Install the environment package, download the examples and the virtual machine image.
For x86_64 Linux or Windows, you can install the environment package and download the examples and the virtual machine image by running the following commands:
```bash
pip install desktop-env
gdown xxxx
gdown xxxx
vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
```
## Quick Start

View File

@@ -263,16 +263,19 @@ class PythonController:
"""
Ends recording the screen.
"""
response = requests.post(self.http_server + "/end_recording")
if response.status_code == 200:
logger.info("Recording stopped successfully")
with open(dest, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
else:
logger.error("Failed to stop recording. Status code: %d", response.status_code)
return None
try:
response = requests.post(self.http_server + "/end_recording")
if response.status_code == 200:
logger.info("Recording stopped successfully")
with open(dest, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
else:
logger.error("Failed to stop recording. Status code: %d", response.status_code)
return None
except Exception as e:
logger.error("An error occurred while trying to download the recording: %s", e)
# Additional info
def get_vm_platform(self):

View File

@@ -53,8 +53,8 @@ class DesktopEnv(gym.Env):
def __init__(
self,
path_to_vm: str,
snapshot_name: str = "init_state",
action_space: str = "computer_13",
task_config: Dict[str, Any] = None,
tmp_dir: str = "tmp",
cache_dir: str = "cache",
screen_size: Tuple[int] = (1920, 1080),
@@ -64,15 +64,6 @@ class DesktopEnv(gym.Env):
Args:
path_to_vm (str): path to .vmx file
action_space (str): "computer_13" | "pyautogui"
task_config (Dict[str, Any]): manages task configs integratedly,
including
* base snapshot
* task id (uuid)
* instruction
* setup config
* evaluator config
tmp_dir (str): temporary directory to store trajectory stuffs like
the extracted screenshots
cache_dir (str): cache directory to cache task-related stuffs like
@@ -81,23 +72,20 @@ class DesktopEnv(gym.Env):
# Initialize environment variables
self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm)))
self.snapshot_name = snapshot_name
self.tmp_dir_base: str = tmp_dir
self.cache_dir_base: str = cache_dir
self.vm_screen_size = screen_size
self.vm_screen_size = screen_size # todo: add the logic to get the screen size from the VM
self.headless = headless
os.makedirs(self.tmp_dir_base, exist_ok=True)
# task-aware stuffs
# todo: handling the logic of snapshot directory
self._set_task_info(task_config)
# Initialize emulator and controller
logger.info("Initializing...")
self._start_emulator()
self.vm_ip = self._get_vm_ip()
self.controller = PythonController(vm_ip=self.vm_ip)
self.setup_controller = SetupController(vm_ip=self.vm_ip, cache_dir=self.cache_dir)
self.setup_controller = SetupController(vm_ip=self.vm_ip, cache_dir=self.cache_dir_base)
# Meta info of the VM, move to the reset() function
self.vm_platform: str = "" # self.controller.get_vm_platform()
@@ -147,7 +135,7 @@ class DesktopEnv(gym.Env):
raise Exception("Failed to get VM IP address!")
def _save_state(self):
_execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_path])
_execute_command(["vmrun", "-T", "ws" "snapshot", self.path_to_vm, self.snapshot_name])
def _get_screenshot(self):
# random_uuid = str(uuid.uuid4())
@@ -167,7 +155,6 @@ class DesktopEnv(gym.Env):
return screenshot_image_path
def _set_task_info(self, task_config: Dict[str, Any]):
self.snapshot_path = task_config["snapshot"]
self.task_id: str = task_config["id"]
self.cache_dir: str = os.path.join(self.cache_dir_base, self.task_id)
os.makedirs(self.cache_dir, exist_ok=True)
@@ -187,7 +174,7 @@ class DesktopEnv(gym.Env):
if isinstance(self.evaluator["func"], list) \
else getattr(metrics, self.evaluator["func"])
self.metric_conj: str = self.evaluator.get("conj", "and") # take conjunction of multiple metrics
if "result" in self.evaluator:
if "result" in self.evaluator and len(self.evaluator["result"])>0:
self.result_getter: Getter = [getattr(getters, "get_{:}".format(res["type"])) for res in
self.evaluator["result"]] \
if isinstance(self.evaluator["result"], list) \
@@ -197,7 +184,7 @@ class DesktopEnv(gym.Env):
if isinstance(self.metric, list) \
else None
if "expected" in self.evaluator:
if "expected" in self.evaluator and len(self.evaluator["expected"])>0:
self.expected_getter: Getter = [getattr(getters, "get_{:}".format(exp["type"])) if exp else None for exp in
self.evaluator["expected"]] \
if isinstance(self.evaluator["expected"], list) \
@@ -239,8 +226,8 @@ class DesktopEnv(gym.Env):
)
os.makedirs(os.path.join(self.tmp_dir, "screenshots"))
logger.info("Reverting to snapshot to {}...".format(self.snapshot_path))
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_path])
logger.info("Reverting to snapshot to {}...".format(self.snapshot_name))
_execute_command(["vmrun", "-T", "ws", "revertToSnapshot", self.path_to_vm, self.snapshot_name])
time.sleep(5)
print(self.vm_screen_size)

View File

@@ -114,7 +114,8 @@ from .slides import (
)
from .table import (
compare_table,
compare_csv
compare_csv,
compare_conference_city_in_order
)
from .thunderbird import (
check_thunderbird_prefs,
@@ -148,7 +149,6 @@ from .vscode import (
check_html_background_image,
compare_zip_files
)
from .calc import compare_conference_city_in_order
from .others import compare_epub, check_mp3_meta
def infeasible():

View File

@@ -1,6 +1,3 @@
import subprocess
def check_gnome_favorite_apps(apps_str: str, rule):
# parse the string like "['thunderbird.desktop', 'vim.desktop', 'google-chrome.desktop']"
# to a list of strings
@@ -57,6 +54,7 @@ def check_moved_jpgs(directory_list, rule):
else:
return 0
def is_in_vm_clickboard(config, terminal_output):
print("terminal_output: ")
print(terminal_output)
@@ -67,4 +65,4 @@ def is_in_vm_clickboard(config, terminal_output):
if not isinstance(expected_results, list):
return 1 if expected_results in terminal_output else 0
else:
return 1 if all(result in terminal_output for result in expected_results) else 0
return 1 if all(result in terminal_output for result in expected_results) else 0

View File

@@ -1,41 +0,0 @@
import logging
from typing import List
import openpyxl
logger = logging.getLogger("desktopenv.metrics.calc")
def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
for row in sheet["C2:C22"]:
for cell in row:
actual_city_list.append(cell.value)
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
try:
for i in range(len(actual_city_list)):
if isinstance(expected_city_list[i], str):
if expected_city_list[i] not in actual_city_list[i]:
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
elif isinstance(expected_city_list[i], List):
if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
else:
raise TypeError("Expected city should be a string or a list of strings")
except:
return 0.
return 1.

View File

@@ -1,28 +0,0 @@
import fitz # PyMuPDF
def extract_answers_from_pdf(pdf_file):
# 打开PDF文件
doc = fitz.open(pdf_file)
answers = []
# 遍历每一页
for page in doc:
# 提取当前页的文本
text = page.get_text()
# 分割文本为行
lines = text.split('\n')
for line in lines:
if line.strip(): # 排除空白行
# 分割等号,提取答案
parts = line.split('=')
if len(parts) > 1:
answer = parts[-1].strip() # 取等号后的部分为答案
answers.append(answer)
return answers
# 假设你的文件名是'math_problems.pdf'
pdf_file = '/Users/lxc/Desktop/calculus.pdf'
answers = extract_answers_from_pdf(pdf_file)
for i, answer in enumerate(answers, 1):
print(f"题目{i}的答案是: {answer}")

View File

@@ -26,13 +26,3 @@ def check_libre_locale(config_file: str, rules: Dict[str, List[str]]) -> float:
for ptn in rules["locale_set"]
)
)
if __name__ == "__main__":
path1 = "../../任务数据/LibreOffice Calc/registrymodifications.ru.xcu"
print(check_libre_locale(path1, {"locale_set": ["ru-*", "de-*", "fr-*"
, "pt-*", "es-*", "it-*"
]
}
)
)

View File

@@ -1,20 +1,20 @@
import zipfile
import os.path
import logging
import os
import os.path
import zipfile
from typing import List, Dict
from typing import Union, TypeVar
import lxml.html
from lxml.html import HtmlElement
from typing import List, Dict
from typing import Union, TypeVar
from mutagen.easyid3 import EasyID3
from .general import diff_text_file
from .utils import _match_value_to_rule
import logging
logger = logging.getLogger("desktopenv.metric.others")
def process_epub(filename: str) -> List[str]:
file_list: List[str] = []
@@ -23,7 +23,7 @@ def process_epub(filename: str) -> List[str]:
try:
with zipfile.ZipFile(filename, "r") as z_f:
with z_f.open("toc.ncx") as in_f\
with z_f.open("toc.ncx") as in_f \
, open(os.path.join(base_dir, "toc.ncx"), "w") as out_f:
contents: str = in_f.read().decode()
contents = contents.splitlines()
@@ -31,7 +31,7 @@ def process_epub(filename: str) -> List[str]:
if "navPoint" not in l:
out_f.write(l + "\n")
file_list.append(os.path.join(base_dir, "toc.ncx"))
with z_f.open("content.opf") as in_f\
with z_f.open("content.opf") as in_f \
, open(os.path.join(base_dir, "content.opf"), "w") as out_f:
contents: str = in_f.read().decode()
contents = contents.splitlines()
@@ -41,14 +41,14 @@ def process_epub(filename: str) -> List[str]:
file_list.append(os.path.join(base_dir, "content.opf"))
for f_n in z_f.namelist():
if f_n.endswith(".html"):
with z_f.open(f_n) as in_f\
with z_f.open(f_n) as in_f \
, open(os.path.join(base_dir, f_n), "w") as out_f:
html: HtmlElement = lxml.html.fromstring(
''.join( filter( lambda ch: ch!="\n" and ch!="\r"
, in_f.read().decode()
)
).encode()
)
''.join(filter(lambda ch: ch != "\n" and ch != "\r"
, in_f.read().decode()
)
).encode()
)
out_f.write(lxml.html.tostring(html, pretty_print=True, encoding="unicode"))
file_list.append(os.path.join(base_dir, f_n))
logger.debug("%s: %s", filename, file_list)
@@ -56,6 +56,7 @@ def process_epub(filename: str) -> List[str]:
except zipfile.BadZipFile:
return []
def compare_epub(result: str, expected: str) -> float:
if result is None:
return 0.
@@ -69,8 +70,10 @@ def compare_epub(result: str, expected: str) -> float:
metric *= current_metric
return metric
V = TypeVar("Value")
def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bool:
# checks using _match_value_to_rule
if result is None:
@@ -85,44 +88,3 @@ def check_mp3_meta(result: str, meta: Dict[str, Dict[str, Union[str, V]]]) -> bo
logger.debug("%s.%s: %s", result, k, value)
metric = metric and _match_value_to_rule(value, r)
return float(metric)
if __name__ == "__main__":
import datetime
import sys
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
metric = check_mp3_meta( "snapshots/test/cache/3f05f3b9-29ba-4b6b-95aa-2204697ffc06/Cheng Xiang - Missing You - gt.mp3"
, { "title": { "method": "eq"
, "ref": "Missing You"
}
, "artist": { "method": "eq"
, "ref": "Cheng Xiang"
}
}
)
print(metric)

View File

@@ -2,6 +2,7 @@ import operator
from typing import Any
from typing import Dict
import fitz # PyMuPDF
from pypdf import PdfReader
@@ -11,3 +12,20 @@ def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float:
reader = PdfReader(pdf_file)
nb_pages: int = len(reader.pages)
return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"]))
def extract_answers_from_pdf(pdf_file):
doc = fitz.open(pdf_file)
answers = []
for page in doc:
text = page.get_text()
lines = text.split('\n')
for line in lines:
if line.strip():
parts = line.split('=')
if len(parts) > 1:
answer = parts[-1].strip()
answers.append(answer)
return answers

View File

@@ -165,23 +165,24 @@ def compare_pptx_files(file1_path, file2_path, **options):
# compare the content of each slide
for slide1, slide2 in zip(prs1.slides, prs2.slides):
slide_idx += 1
def get_slide_background_color(slide):
background = slide.background
if background.fill.background():
return background.fill.fore_color.rgb
else:
return None
if get_slide_background_color(slide1) != get_slide_background_color(slide2) and examine_background_color:
return 0
def get_slide_notes(slide):
notes_slide = slide.notes_slide
if notes_slide:
return notes_slide.notes_text_frame.text
else:
return None
if get_slide_notes(slide1).strip() != get_slide_notes(slide2).strip() and examine_note:
return 0
# check if the shapes are the same
@@ -192,14 +193,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_table_bottom_position:
if slide_idx == 3 and shape1.shape_type == 19 and shape2.shape_type == 19:
if shape1.top <= shape2.top or shape1.top < 3600000:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_right_position:
if slide_idx == 2 and not hasattr(shape1, "text") and not hasattr(shape2, "text"):
if shape1.left <= shape2.left or shape1.left < 4320000:
@@ -207,28 +208,31 @@ def compare_pptx_files(file1_path, file2_path, **options):
if examine_top_position:
if slide_idx == 2 and shape1.shape_type == 13 and shape2.shape_type == 13:
if shape1.top >= shape2.top or shape1.top > 1980000:
return 0
if shape1.top >= shape2.top or shape1.top > 1980000:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_shape_for_shift_size:
if shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
if not (hasattr(shape1, "text") and hasattr(shape2, "text") and shape1.text == shape2.text and shape1.text == "Elaborate on what you want to discuss."):
if not (hasattr(shape1, "text") and hasattr(shape2,
"text") and shape1.text == shape2.text and shape1.text == "Elaborate on what you want to discuss."):
return 0
if (shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
if (
shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height) and examine_shape:
return 0
if examine_image_size:
if shape1.shape_type == 13 and shape2.shape_type == 13:
if shape1.width != shape2.width or shape1.height != shape2.height:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
return 0
if examine_modify_height:
if not hasattr(shape1, "text") and not hasattr(shape2, "text") or shape1.shape_type == 5 and shape2.shape_type == 5:
if not hasattr(shape1, "text") and not hasattr(shape2,
"text") or shape1.shape_type == 5 and shape2.shape_type == 5:
if shape1.height != shape2.height:
return 0
elif shape1.left != shape2.left or shape1.top != shape2.top or shape1.width != shape2.width or shape1.height != shape2.height:
@@ -236,13 +240,13 @@ def compare_pptx_files(file1_path, file2_path, **options):
if hasattr(shape1, "text") and hasattr(shape2, "text"):
if shape1.text.strip() != shape2.text.strip() and examine_text:
return 0
# check if the paragraphs are the same
return 0
# check if the paragraphs are the same
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
if para1.alignment != para2.alignment and examine_alignment:
return 0
# check if the runs are the same
if para1.text != para2.text and examine_text:
return 0
@@ -253,7 +257,7 @@ def compare_pptx_files(file1_path, file2_path, **options):
for run1, run2 in zip(para1.runs, para2.runs):
# check if the font properties are the same
if run1.font.name != run2.font.name and examine_font_name:
if run1.font.name != run2.font.name and examine_font_name:
return 0
if run1.font.size != run2.font.size and examine_font_size:
@@ -305,10 +309,9 @@ def compare_pptx_files(file1_path, file2_path, **options):
return bullets
if examine_bullets and _extract_bullets(run1.part.blob.decode('utf-8')) != _extract_bullets(run2.part.blob.decode('utf-8')):
if examine_bullets and _extract_bullets(run1.part.blob.decode('utf-8')) != _extract_bullets(
run2.part.blob.decode('utf-8')):
return 0
# fixme: Actually there are more properties to be compared, we can add them later via parsing the xml data
@@ -524,15 +527,3 @@ def check_auto_saving_time(pptx_file, rules):
logger.error(f"Error parsing XML: {e}")
except FileNotFoundError:
logger.error(f"File not found: {pptx_file}")
if __name__ == '__main__':
# print(compare_pptx_files(
# r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx",
# r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\550ce7e7-747b-495f-b122-acdc4d0b8e54\New_Club_Spring_2018_Training_Gold.pptx"))
# print(evaluate_presentation_fill_to_rgb_distance(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\3b27600c-3668-4abd-8f84-7bcdebbccbdb\lec17-gui-events.pptx", {"rgb": (0, 0, 255)}))
# print(check_auto_saving_time(r"C:\Users\tianbaox\Desktop\DesktopEnv\cache\2cd43775-7085-45d8-89fa-9e35c0a915cf\registrymodifications.xcu", {"minutes": 3}))
print(compare_pptx_files(
r"D:\NJU\HKUNLP\Desktop-Env\DesktopEnv\cache\08aced46-45a2-48d7-993b-ed3fb5b32302\22_6_Gold.pptx",
r"D:\NJU\HKUNLP\Desktop-Env\DesktopEnv\cache\08aced46-45a2-48d7-993b-ed3fb5b32302\22_6.pptx",
examine_shape=False))

View File

@@ -11,15 +11,15 @@ import openpyxl
import pandas as pd
from openpyxl import Workbook
from openpyxl.cell.cell import Cell
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.worksheet.worksheet import Worksheet
from rapidfuzz import fuzz
from desktop_env.evaluators.metrics.utils import _match_value_to_rule, _read_cell_style, read_cell_value
from desktop_env.evaluators.metrics.utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles \
, load_filters, load_pivot_tables
from rapidfuzz import fuzz
# from openpyxl.utils import coordinate_to_tuple
@@ -165,7 +165,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
logger.debug("Sheet1: \n%s", str(sheet1))
logger.debug("Sheet2: \n%s", str(sheet2))
try:
logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2))
logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1 == sheet2))
except:
logger.debug("Sheet1 =/v= Sheet2")
logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
@@ -231,14 +231,14 @@ def compare_table(result: str, expected: str = None, **options) -> float:
value1 = value1.lower()
value2 = value2.lower()
if rl["type"]=="includes":
if rl["type"] == "includes":
metric: bool = value2 in value1
elif rl["type"]=="included_by":
elif rl["type"] == "included_by":
metric: bool = value1 in value2
elif rl["type"]=="fuzzy_match":
elif rl["type"] == "fuzzy_match":
metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
elif rl["type"]=="exact_match":
metric: bool = value1==value2
elif rl["type"] == "exact_match":
metric: bool = value1 == value2
total_metric = total_metric and metric
metric: bool = total_metric
@@ -409,7 +409,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
filters1: Dict[str, Any] = load_filters(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
filters2: Dict[str, Any] = load_filters(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = filters1==filters2
metric: bool = filters1 == filters2
logger.debug("Assertion: %s[filter] == %s[filter] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Filters #
@@ -421,7 +421,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
pivots1: Dict[str, Any] = load_pivot_tables(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
pivots2: Dict[str, Any] = load_pivot_tables(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = pivots1==pivots2
metric: bool = pivots1 == pivots2
logger.debug("Assertion: %s[pivot]==%s[pivot] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Pivot Tables #
@@ -482,81 +482,36 @@ def compare_csv(result: str, expected: str, **options) -> float:
return float(metric)
if __name__ == '__main__':
import datetime
import sys
def compare_conference_city_in_order(actual_city_list_path, expected_city):
expected_city_list = expected_city["expected"]
wb = openpyxl.load_workbook(actual_city_list_path)
sheet = wb.active
actual_city_list = []
for row in sheet["C2:C22"]:
for cell in row:
actual_city_list.append(cell.value)
# expected_city is the city that we want to compare with the actual city list
# must in order index
# debug
try:
for i in range(len(actual_city_list)):
if isinstance(expected_city_list[i], str):
if expected_city_list[i] not in actual_city_list[i]:
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
elif isinstance(expected_city_list[i], List):
if not any(possible_str in actual_city_list[i] for possible_str in expected_city_list[i]):
logger.debug(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
print(f"Expected city {expected_city_list[i]}; Actual city {actual_city_list[i]}")
return 0.
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
else:
raise TypeError("Expected city should be a string or a list of strings")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
except:
return 0.
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
path1 = "snapshots/test/cache/4e6fcf72-daf3-439f-a232-c434ce416af6/Employee_Age_By_Birthday.xlsx"
path2 = "snapshots/test/cache/4e6fcf72-daf3-439f-a232-c434ce416af6/Employee_Age_By_Birthday_gold.xlsx"
rules = [ { "type": "sheet_data"
, "sheet_idx0": 0
, "sheet_idx1": "EI0"
}
]
print(compare_table(path1, path2
, rules=rules
)
)
print(compare_table(path2, path2
, rules=rules
)
)
# Row Properties
# path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx"
# workbook: Workbook = openpyxl.load_workbook(filename=path1)
# worksheet: Worksheet = workbook.active
# for r_no, dms in worksheet.column_dimensions.items():
# print(r_no, type(r_no), type(dms), dms.hidden)
# Conditional Formats
# import formulas
# path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx"
# path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx"
# path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx"
# workbook: Workbook = openpyxl.load_workbook(filename=path2)
# worksheet: Worksheet = workbook.active
# print(worksheet.conditional_formatting)
# for itm in worksheet.conditional_formatting:
# print(itm.cells)
# for r in itm.rules:
# print( r.type, r.formula, r.dxf.font.color.rgb
# , r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb
# )
# condition = formulas.Parser().ast("=" + r.formula[0])[1].compile()
##print(r.type, r.operator, r.dxfId, r.dxf)
# for r in itm.cells:
# for c in r.cells:
# value = worksheet.cell(row=c[0], column=c[1]).value
# print(value, condition(str(value)))
return 1.

View File

@@ -1,17 +1,19 @@
import json
import logging
import re
from typing import List, Pattern, Dict, Match
from typing import Union, Any, TypeVar, Callable
import re
import json
from .utils import _match_record
from .utils import _match_value_to_rule as _match_pref
import logging
logger = logging.getLogger("desktopenv.metric.thunderbird")
V = TypeVar("Value")
_pref_pattern: Pattern[str] = re.compile(r'^user_pref\("(?P<key>(?:[^"]|\\")+)\", (?P<val>.+)\);$');
def check_thunderbird_prefs(result: str, rule: Dict[str, Dict[str, Dict[str, Any]]]):
"""
Args:
@@ -51,10 +53,10 @@ def check_thunderbird_prefs(result: str, rule: Dict[str, Dict[str, Dict[str, Any
continue
key: str = match_.group("key")
#value: str = match_.group("val")
#if value in {"true", "false"}:
#value = value.title()
#value: V = eval(value)
# value: str = match_.group("val")
# if value in {"true", "false"}:
# value = value.title()
# value: V = eval(value)
value = json.loads(match_.group("val"))
if key in expect_rules:
logger.debug("K: %s, V: %s", key, repr(value))
@@ -64,9 +66,13 @@ def check_thunderbird_prefs(result: str, rule: Dict[str, Dict[str, Dict[str, Any
return float(all(expect_metrics.values()) and unexpect_metric)
_value_processor: Callable[[str], str] = lambda val: val.replace("\\\"", "\"").replace("\\\\", "\\")
#_condition_pattern: Pattern[str] = re.compile(r'(?P<type>AND|OR) \((?P<key>[\w ]+),(?P<rel>[\w ' + '\'' + r']+),(?:"(?P<val2>(?:[^"]|\")+)"|(?P<val1>[^)]+))\)')
_condition_pattern: Pattern[str] = re.compile(r'\b(?:AND|OR) \((?:[\w ]+),(?:[\w ' + '\'' + r']+),(?:"(?:(?:[^"]|\")+)"|(?:[^)]+))\)|\bALL\b')
# _condition_pattern: Pattern[str] = re.compile(r'(?P<type>AND|OR) \((?P<key>[\w ]+),(?P<rel>[\w ' + '\'' + r']+),(?:"(?P<val2>(?:[^"]|\")+)"|(?P<val1>[^)]+))\)')
_condition_pattern: Pattern[str] = re.compile(
r'\b(?:AND|OR) \((?:[\w ]+),(?:[\w ' + '\'' + r']+),(?:"(?:(?:[^"]|\")+)"|(?:[^)]+))\)|\bALL\b')
def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float:
"""
Args:
@@ -112,8 +118,8 @@ def check_thunderbird_filter(result: str, rules: Dict[str, List[Dict[str, str]]]
condition_str: str = _value_processor(l[11:-2])
logger.debug("FILTER CONDITION: %s", condition_str)
conditions: List[str] =\
_condition_pattern.findall(condition_str)
conditions: List[str] = \
_condition_pattern.findall(condition_str)
logger.debug("FILTER CONDITIONS: %s", repr(conditions))
filter_["condition"] = conditions
@@ -138,6 +144,7 @@ def check_thunderbird_folder(result: Union[str, List[str]], reference: Union[str
remove_deleted (bool): ignore deleted messages which has status code 0008 or 0009. default: True
remove_duplicate (bool): remove duplicate messages. default: True
"""
def normalize_msg(msg, options):
ignore_status = options.get('ignore_status', False)
ignore_keys = options.get('ignore_keys', False)
@@ -167,66 +174,3 @@ def check_thunderbird_folder(result: Union[str, List[str]], reference: Union[str
mail2 = read_thunderbird_folder_file(gold)
if mail1 != mail2: return .0
return 1.0
if __name__ == "__main__":
#import lxml.etree
#from lxml.cssselect import CSSSelector
#from lxml.etree import _Element
#xml = "../../任务数据/Thunderbird/vertical-card-view.xml"
#xml = "../../任务数据/Thunderbird/vertical-table-view.xml"
#at: _Element = lxml.etree.parse(xml)
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] page-tab-list')(at) # page tab tags
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]')(at) # email tag page
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]>section:nth-child(3)')(at) # email tag page
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]>section[attr|id=threadPane]>section[attr|id="threadTree"]>table[attr|class="tree-table"]>section[attr|class~="tree-table-header"]>table-row>column-header[name=Subject]>push-button', namespaces={"attr": "uri:deskat:attributes.at-spi.gnome.org"})(at) # table view, column header
#elements: List[_Element] = CSSSelector('application[name=Thunderbird] panel>scroll-pane>internal-frame>panel[name$="anonym-x2024@outlook.com"]>section[attr|id=threadPane]>section[attr|id="threadTree"]>table[attr|class="tree-table"]>tree>tree-item>section[name="Subject"]>section>section', namespaces={"attr": "uri:deskat:attributes.at-spi.gnome.org"})(at) # table view, column header
#print(len(elements))
#for elm in elements:
#print(lxml.etree.tostring(elm, encoding="unicode", pretty_print=True))
import datetime
import os
import sys
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
print( check_thunderbird_filter( "../../任务数据/Thunderbird/msgFilterRules.dat"
, { "expect": [ { "enabled": "yes"
, "action": "Move to folder"
, "actionValue": "mailbox://nobody@Local%20Folders/Promotions"
, "condition": ["AND (subject,contains,discount)"]
}
]
}
)
)

View File

@@ -236,6 +236,9 @@ def check_html_background_image(src_path: str, rule: Dict = None) -> float:
Check if the background image is correctly set.
multi-app:bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108
"""
if not src_path:
return 0.0
from bs4 import BeautifulSoup
with open(src_path, 'r') as f:
html_content = f.read()
@@ -252,6 +255,9 @@ def compare_result_files(src_path, tgt_path):
Compare whether the content of two files are the same.
multi-app:7f35355e-02a6-45b5-b140-f0be698bcf85
"""
if not src_path or not tgt_path:
return 0.0
with open(src_path, 'r') as f:
src_content = f.read().strip()
with open(tgt_path, 'r') as f:
@@ -271,12 +277,3 @@ def compare_result_files(src_path, tgt_path):
if src_content == tgt_content:
return 1.0
return 0.0
if __name__ == "__main__":
src_path = "../../../cache/bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108/index.html"
rule = {
"type:": "value",
"value": "anmi_sharper.png"
}
print(check_html_background_image(src_path, rule))

View File

@@ -63,7 +63,7 @@ def execute_command():
# Execute the command without any safety checks.
try:
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True)
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=shell, text=True, timeout=120)
return jsonify({
'status': 'success',
'output': result.stdout,
@@ -117,7 +117,7 @@ def launch_app():
def capture_screen_with_cursor():
# fixme: when running on virtual machines, the cursor is not captured, don't know why
file_path = os.path.join("screenshots", "screenshot.png")
file_path = os.path.join(os.path.dirname(__file__), "screenshots", "screenshot.png")
user_platform = platform.system()
# Ensure the screenshots directory exists
@@ -284,6 +284,15 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N
text = text.replace("\ufffc", "").replace("\ufffd", "")
# }}} Text #
# Image {{{ #
try:
node.queryImage()
except NotImplementedError:
pass
else:
attribute_dict["image"] = "true"
# }}} Image #
# Selection {{{ #
try:
node.querySelection()

View File

@@ -0,0 +1,16 @@
[Unit]
Description=OSBench Server
StartLimitIntervalSec=60
StartLimitBurst=4
After=network.target auditd.service
[Service]
ExecStart=/usr/bin/python3 /home/user/main.py
User=user
WorkingDirectory=/home/user
Restart=on-failure
RestartSec=1
Environment="DISPLAY=:1"
[Install]
WantedBy=graphical.target

View File

@@ -0,0 +1,16 @@
[Unit]
Description=OSBench Server
StartLimitIntervalSec=60
StartLimitBurst=4
After=network.target auditd.service
[Service]
ExecStart=/usr/bin/python3 /home/user/main.py
User=user
WorkingDirectory=/home/user
Restart=on-failure
RestartSec=1
Environment="DISPLAY=%i"
[Install]
WantedBy=graphical.target

View File

@@ -10,10 +10,6 @@
"libreoffice_calc"
],
"evaluator": {
"func": "infeasible",
"expected": {
},
"result": {
}
"func": "infeasible"
}
}
}

View File

@@ -10,10 +10,6 @@
"libreoffice_calc"
],
"evaluator": {
"func": "infeasible",
"expected": {
},
"result": {
}
"func": "infeasible"
}
}
}

View File

@@ -63,6 +63,12 @@
"type": "vm_file",
"path": "/home/user/Desktop/saa-format-guide.pptx",
"dest": "saa-format-guide.pptx"
},
"expected": {
"type": "rule",
"rules": {
"color": "red"
}
}
}
}

View File

@@ -94,7 +94,7 @@
"result": {
"type": "googledrive_file",
"settings_file": "evaluation_examples/settings/googledrive/settings.yml",
"path": "environment_policy_report (draft).docx",
"path": ["environment_policy", "environment_policy_report (draft)"],
"dest": "environment_policy_report (draft).docx"
},
"expected": {

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=104pg3yochKyH2Uvlp3BdvKmHgYmSIESu&export=download&authuser=0&confirm=t&uuid=d1926366-4e54-4a44-8dcd-fc49ed6524d7&at=APZUnTXcBFV9kcacsA0toU83lMKJ:1706505549057d",
"url": "https://drive.usercontent.google.com/download?id=1gqqY56robX1tb4YPa3Yk1d72T_k-Rgz3&export=download&authuser=0&confirm=t",
"path": "/home/user/Desktop/15-MB-docx-file-download.docx"
}
]

View File

@@ -1,7 +1,7 @@
{
"id": "3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
"snapshot": "gimp",
"instruction": "Use `gdown` to download the image from \"https://drive.google.com/uc?export=download&id=1i8j5dGS57sA07jEuPNAlQW-sn5uqUnuK\", and then use GIMP to compress it to under 600KB. Resize if needed.",
"instruction": "Download the image from \"https://drive.google.com/uc?export=download&id=1i8j5dGS57sA07jEuPNAlQW-sn5uqUnuK\", and then use GIMP to compress it to under 600KB as \"compressed.jpeg\" on the Desktop. Resize if needed.",
"source": "",
"config": [
{

View File

@@ -1,7 +1,7 @@
{
"id": "42f4d1c7-4521-4161-b646-0a8934e36081",
"snapshot": "gimp",
"instruction": "Configure VS Code to edit GIMP script-fu scripts effectively by installing lisp extension. Test by writing code to resizing the image as 128 * 128 as \"resized.png\"",
"instruction": "Configure VS Code to edit GIMP script-fu scripts effectively by installing lisp extension. Test by writing code to resize the image \"character.png\" to 128 * 128 as \"resized.png\".",
"source": "",
"config": [
{

View File

@@ -30,12 +30,12 @@
],
"evaluator": {
"func": "check_brightness_decrease_and_structure_sim",
"expected": {
"result": {
"type": "vm_file",
"path": "/home/user/Desktop/background.png",
"dest": "background.png"
},
"result": {
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=13if1UwZ5ay6ADAVW2jp3rcyvAEBse6MJ&export=download&authuser=0&confirm=t&uuid=2ea03068-1874-4240-baa1-f8bb2f917a99&at=APZUnTXq6dVlASg819jCaI1A-rm2:1710136385956",
"dest": "image_original.png"

View File

@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1e12nL_V7bffaLSocQ86EiGCdygzggWeu&export=download",
"url": "https://drive.usercontent.google.com/download?id=1epTcblcYh8j_wFtA-aiXPIF2Oo1IVw8A&export=download",
"path": "/home/user/Desktop/Dickinson_Slides.pptx"
}
]
@@ -36,7 +36,7 @@
},
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=1Xl6tgQ0K5qA1BDA2fKTK2xFLzXwbtkZ6&export=download",
"path": "https://drive.usercontent.google.com/download?id=1vUvaQLJUtFgbZi7lSzl0y0TS_WecFczm&export=download",
"dest": "notes_gold.docx"
},
"options": {

View File

@@ -11,10 +11,6 @@
{
"url": "https://drive.google.com/uc?export=download&id=1bmSRNNh4JkF6izrKrmynUHarf0pFES50",
"path": "/home/user/Desktop/cola.png"
},
{
"url": "https://drive.google.com/uc?export=download&id=1MayrIPJWRK7cMEVe3TxYmgkAbVMrYcQA",
"path": "/home/user/Desktop/cropped_gold.png"
}
]
}
@@ -43,8 +39,8 @@
"dest": "cropped.png"
},
"expected": {
"type": "vm_file",
"path": "/home/user/Desktop/cropped_gold.png",
"type": "cloud_file",
"path": "https://drive.google.com/uc?export=download&id=1MayrIPJWRK7cMEVe3TxYmgkAbVMrYcQA",
"dest": "cropped_gold.png"
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "98e8e339-5f91-4ed2-b2b2-12647cb134f4",
"snapshot": "vs_code",
"instruction": "Merge the contents of all .txt files from your vscode project into a single document in Writer. No merging separator is needed. Ensure to set the overall font size of the document to 10.",
"instruction": "Merge the contents of all .txt files from your vscode project into a single document \"concat.docx\" on Desktop with libreoffice writer. No merging separator is needed. Ensure to set the overall font size of the document to 10.",
"source": "",
"config": [
{

View File

@@ -38,7 +38,7 @@
}
},
{
"type": "execute",
"type": "launch",
"parameters": {
"command": [
"nautilus",
@@ -109,4 +109,4 @@
]
}
}
}
}

View File

@@ -11,10 +11,6 @@
{
"url": "https://drive.google.com/uc?export=download&id=1CPGW_OZsfSWDdTU7CFrTjpzSAASyLy4w",
"path": "/home/user/Desktop/tilearray.png"
},
{
"url": "https://drive.google.com/uc?export=download&id=1aHwmnxL2CKEh_FhVpevY452-BQH2t5rG",
"path": "/home/user/Desktop/rearranged_gold.png"
}
]
}
@@ -43,8 +39,8 @@
"dest": "rearranged.png"
},
"expected": {
"type": "vm_file",
"path": "/home/user/Desktop/rearranged_gold.png",
"type": "cloud_file",
"path": "https://drive.google.com/uc?export=download&id=1aHwmnxL2CKEh_FhVpevY452-BQH2t5rG",
"dest": "rearranged_gold.png"
}
}

View File

@@ -1,13 +1,17 @@
{
"id": "e2392362-125e-4f76-a2ee-524b183a3412",
"snapshot": "chrome",
"instruction": "I recently started using the famous personal academic homepage template from academicpages.github.io to build my own personal homepage, and I have cloned it to my local ~/Code/Website folder. According to an online tutorial, I can configure my name and contact information in the _config.yaml file. However, I am not familiar with the YAML file format. Please help me find the sections related to the name and contact information in this file and change them to Test Account and Test@gmail.com.",
"instruction": "I recently started using the famous personal academic homepage template from academicpages.github.io to build my own personal homepage, and I have cloned it to my local ~/Code/Website folder. According to an online tutorial, I can configure my name and contact information in the _config.yaml file. However, I am not familiar with the YAML file format. Please help me find the sections related to the name and contact information in this file and change them to \"Test Account\" and \"Test@gmail.com\".",
"source": "authors",
"config": [
{
"type": "command",
"parameters": {
"command": ["mkdir", "-p", "/home/user/Code/Website"]
"command": [
"mkdir",
"-p",
"/home/user/Code/Website"
]
}
},
{
@@ -24,13 +28,22 @@
{
"type": "execute",
"parameters": {
"command": ["tar", "-xJvf", ".tmp.tar.xz", "-C", "/home/user/Code/Website/"]
"command": [
"tar",
"-xJvf",
".tmp.tar.xz",
"-C",
"/home/user/Code/Website/"
]
}
},
{
"type": "launch",
"parameters": {
"command": ["google-chrome", "--remote-debugging-port=1337"]
"command": [
"google-chrome",
"--remote-debugging-port=1337"
]
}
},
{
@@ -46,31 +59,59 @@
{
"type": "chrome_open_tabs",
"parameters": {
"urls_to_open": ["https://academicpages.github.io/"]
"urls_to_open": [
"https://academicpages.github.io/"
]
}
}
],
"trajectory": "trajectories/e2392362-125e-4f76-a2ee-524b183a3412",
"related_apps": ["chrome", "os", "vscode"],
"related_apps": [
"chrome",
"os",
"vscode"
],
"evaluator": {
"postconfig": [
{
"type": "execute",
"parameters": {
"command": [
"python",
"-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5);"
]
}
}
],
"func": "check_json",
"options": {"is_yaml": true},
"options": {
"is_yaml": true
},
"expected": {
"type": "rule",
"rules": {
"expect": [
{
"key": ["name"],
"key": [
"name"
],
"method": "eq",
"ref": "Test Account"
},
{
"key": ["author", "name"],
"key": [
"author",
"name"
],
"method": "eq",
"ref": "Test Account"
},
{
"key": ["author", "email"],
"key": [
"author",
"email"
],
"method": "eq",
"ref": "Test@gmail.com"
}
@@ -83,4 +124,4 @@
"dest": "_config.yaml"
}
}
}
}

View File

@@ -1 +1 @@
{"access_token": "ya29.a0Ad52N3969wUkQepy6SBOSw9Gjg4-MNPfEUBD3OZpajVfs9wL4DbfImk-5XawHjBkTdCKKBqG5R9XIX6KvvUzQDfB2BwVwb0MfLfLJDLALia7MRdPn4j6GAES372u3bSqJNNPMwVZA9j-THb3o5svJiKcJgwcoFKeKC_xaCgYKAScSARISFQHGX2MioJPeGh_8OM6z1_BujwRe3Q0171", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-08T17:16:15Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N3969wUkQepy6SBOSw9Gjg4-MNPfEUBD3OZpajVfs9wL4DbfImk-5XawHjBkTdCKKBqG5R9XIX6KvvUzQDfB2BwVwb0MfLfLJDLALia7MRdPn4j6GAES372u3bSqJNNPMwVZA9j-THb3o5svJiKcJgwcoFKeKC_xaCgYKAScSARISFQHGX2MioJPeGh_8OM6z1_BujwRe3Q0171", "expires_in": 3599, "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
{"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-13T10:09:01Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}

View File

@@ -0,0 +1,398 @@
{
"chrome": [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"06fe7178-4491-4589-810f-2e2bc9502122",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
"44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
"2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"af630914-714e-4a24-a7bb-f9af687d3b91",
"3720f614-37fd-4d04-8a6b-76f54f8c222d",
"99146c54-4f37-4ab8-9327-5f3291665e1e",
"12086550-11c0-466b-b367-1d9e75b3910e",
"6766f2b8-8a72-417f-a9e5-56fcaa735837",
"93eabf48-6a27-4cb6-b963-7d5fe1e0d3a9",
"ae78f875-5b98-4907-bbb5-9c737fc68c03",
"3299584d-8f11-4457-bf4c-ce98f7600250",
"030eeff7-b492-4218-b312-701ec99ee0cc",
"9656a811-9b5b-4ddf-99c7-5117bcef0626",
"fc6d8143-9452-4171-9459-7f515143419a",
"a96b564e-dbe9-42c3-9ccf-b4498073438a",
"1704f00f-79e6-43a7-961b-cedd3724d5fd",
"f3b19d1e-2d48-44e9-b4e1-defcae1a0197",
"82bc8d6a-36eb-4d2d-8801-ef714fb1e55a",
"47543840-672a-467d-80df-8f7c3b9788c9",
"c1fa57f3-c3db-4596-8f09-020701085416",
"da46d875-6b82-4681-9284-653b0c7ae241",
"6c4c23a1-42a4-43cc-9db1-2f86ff3738cc",
"f79439ad-3ee8-4f99-a518-0eb60e5652b0",
"b7895e80-f4d1-4648-bee0-4eb45a6f1fa8",
"9f3f70fc-5afc-4958-a7b7-3bb4fcb01805",
"7f52cab9-535c-4835-ac8c-391ee64dc930",
"82279c77-8fc6-46f6-9622-3ba96f61b477",
"2888b4e6-5b47-4b57-8bf5-c73827890774",
"b4f95342-463e-4179-8c3f-193cd7241fb2",
"f5d96daf-83a8-4c86-9686-bada31fc66ab",
"121ba48f-9e17-48ce-9bc6-a4fb17a7ebba",
"368d9ba4-203c-40c1-9fa3-da2f1430ce63",
"59155008-fe71-45ec-8a8f-dc35497b6aa8",
"a728a36e-8bf1-4bb6-9a03-ef039a5233f0",
"b070486d-e161-459b-aa2b-ef442d973b92",
"0d8b7de3-e8de-4d86-b9fd-dd2dce58a217",
"9f935cce-0a9f-435f-8007-817732bfc0a5",
"f0b971a1-6831-4b9b-a50e-22a6e47f45ba",
"cabb3bae-cccb-41bd-9f5d-0f3a9fecd825"
],
"gimp": [
"7a4deb26-d57d-4ea9-9a73-630f66a7b568",
"554785e9-4523-4e7a-b8e1-8016f565f56a",
"77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
"f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
"d52d6308-ec58-42b7-a2c9-de80e4837b2b",
"2a729ded-3296-423d-aec4-7dd55ed5fbb3",
"b148e375-fe0b-4bec-90e7-38632b0d73c2",
"a746add2-cab0-4740-ac36-c3769d9bfb46",
"7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
"d16c99dc-2a1e-46f2-b350-d97c86c85c15",
"06ca5602-62ca-47f6-ad4f-da151cde54cc",
"e2dd0213-26db-4349-abe5-d5667bfd725c",
"f723c744-e62c-4ae6-98d1-750d3cd7d79d",
"72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
"7767eef2-56a3-4cea-8c9f-48c070c7d65b",
"734d6579-c07d-47a8-9ae2-13339795476b",
"e19bd559-633b-4b02-940f-d946248f088e",
"38f48d40-764e-4e77-a7cf-51dfce880291",
"fbb548ca-c2a6-4601-9204-e39a2efc507b",
"5ca86c6f-f317-49d8-b6a7-b527541caae8",
"62f7fd55-0687-4a43-b6e1-3eda16fc6252",
"8ea73f6f-9689-42ad-8c60-195bbf06a7ba",
"58d3eeeb-e9d0-499f-962e-fd0db2a744d8",
"2e6f678f-472d-4c55-99cc-8e7c5c402a71",
"045bf3ff-9077-4b86-b483-a1040a949cff",
"dbbf4b99-2253-4b10-9274-45f246af2466"
],
"libreoffice_calc": [
"357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"42e0a640-4f19-4b28-973d-729602b5a4a7",
"51719eea-10bc-4246-a428-ac7c433dd4b3",
"1954cced-e748-45c4-9c26-9855b97fbc5e",
"2bd59342-0664-4ccb-ba87-79379096cc08",
"3aaa4e37-dc91-482e-99af-132a612d40f3",
"1273e544-688f-496b-8d89-3e0f40aa0606",
"12382c62-0cd1-4bf2-bdc8-1d20bf9b2371",
"f9584479-3d0d-4c79-affa-9ad7afdd8850",
"535364ea-05bd-46ea-9937-9f55c68507e8",
"7e429b8d-a3f0-4ed0-9b58-08957d00b127",
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
"04d9aeaf-7bed-4024-bedb-e10e6f00eb7f",
"0bf05a7d-b28b-44d2-955a-50b41e24012a",
"6054afcb-5bab-4702-90a0-b259b5d3217c",
"abed40dc-063f-4598-8ba5-9fe749c0615d",
"37608790-6147-45d0-9f20-1137bb35703d",
"26a8440e-c166-4c50-aef4-bfb77314b46b",
"d681960f-7bc3-4286-9913-a8812ba3261a",
"035f41ba-6653-43ab-aa63-c86d449d62e5",
"7efeb4b1-3d19-4762-b163-63328d66303b",
"1de60575-bb6e-4c3d-9e6a-2fa699f9f197",
"aa3a8974-2e85-438b-b29e-a64df44deb4b",
"51b11269-2ca8-4b2a-9163-f21758420e78",
"1e8df695-bd1b-45b3-b557-e7d599cf7597",
"ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
"a01fbce3-2793-461f-ab86-43680ccbae25",
"0326d92d-d218-48a8-9ca1-981cd6d064c7",
"0a2e43bf-b26c-4631-a966-af9dfa12c9e5",
"4188d3a4-077d-46b7-9c86-23e1a036f6c1",
"347ef137-7eeb-4c80-a3bb-0951f26a8aff",
"eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
"0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
"1d17d234-e39d-4ed7-b46f-4417922a4e7c",
"4e6fcf72-daf3-439f-a232-c434ce416af6",
"01b269ae-2111-4a07-81fd-3fcd711993b0",
"21df9241-f8d7-4509-b7f1-37e501a823f7",
"a9f325aa-8c05-4e4f-8341-9e4358565f4f",
"6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
"7a4e4bc8-922c-4c84-865c-25ba34136be1",
"4de54231-e4b5-49e3-b2ba-61a0bec721c0",
"30e3e107-1cfb-46ee-a755-2cd080d7ba6a",
"4172ea6e-6b77-4edb-a9cc-c0014bd1603b",
"1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
"3a7c8185-25c1-4941-bd7b-96e823c9f21f",
"21ab7b40-77c2-4ae6-8321-e00d3a086c73"
],
"libreoffice_impress": [
"5d901039-a89c-4bfb-967b-bf66f4df075e",
"550ce7e7-747b-495f-b122-acdc4d0b8e54",
"455d3c66-7dc6-4537-a39a-36d3e9119df7",
"af23762e-2bfd-4a1d-aada-20fa8de9ce07",
"c59742c0-4323-4b9d-8a02-723c251deaa0",
"ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
"9ec204e4-f0a3-42f8-8458-b772a6797cab",
"0f84bef9-9790-432e-92b7-eece357603fb",
"ce88f674-ab7a-43da-9201-468d38539e4a",
"3b27600c-3668-4abd-8f84-7bcdebbccbdb",
"a097acff-6266-4291-9fbd-137af7ecd439",
"bf4e9888-f10f-47af-8dba-76413038b73c",
"21760ecb-8f62-40d2-8d85-0cee5725cb72",
"ac9bb6cb-1888-43ab-81e4-a98a547918cd",
"2cd43775-7085-45d8-89fa-9e35c0a915cf",
"358aa0a7-6677-453f-ae35-e440f004c31e",
"a669ef01-ded5-4099-9ea9-25e99b569840",
"73c99fb9-f828-43ce-b87a-01dc07faa224",
"15aece23-a215-4579-91b4-69eec72e18da",
"986fc832-6af2-417c-8845-9272b3a1528b",
"a434992a-89df-4577-925c-0c58b747f0f4",
"7dbc52a6-11e0-4c9a-a2cb-1e36cfda80d8",
"841b50aa-df53-47bd-a73a-22d3a9f73160",
"8979838c-54a5-4454-a2b8-3d135a1a5c8f",
"b8adbc24-cef2-4b15-99d5-ecbe7ff445eb",
"2b94c692-6abb-48ae-ab0b-b3e8a19cb340",
"9cf05d24-6bd9-4dae-8967-f67d88f5d38a",
"08aced46-45a2-48d7-993b-ed3fb5b32302",
"edb61b14-a854-4bf5-a075-c8075c11293a",
"c82632a4-56b6-4db4-9dd1-3820ee3388e4",
"39be0d19-634d-4475-8768-09c130f5425d",
"ac1b39ff-ee4d-4483-abce-c117e98942f0",
"f23acfd2-c485-4b7c-a1e7-d4303ddfe864",
"70bca0cc-c117-427e-b0be-4df7299ebeb6",
"af2d657a-e6b3-4c6a-9f67-9e3ed015974c",
"57667013-ea97-417c-9dce-2713091e6e2a",
"0a211154-fda0-48d0-9274-eaac4ce5486d",
"a53f80cd-4a90-4490-8310-097b011433f6",
"7ae48c60-f143-4119-b659-15b8f485eb9a",
"5cfb9197-e72b-454b-900e-c06b0c802b40",
"05dd4c1d-c489-4c85-8389-a7836c4f0567",
"5c1a6c3d-c1b3-47cb-9b01-8d1b7544ffa1",
"4ed5abd0-8b5d-47bd-839f-cacfa15ca37a",
"e4ef0baf-4b52-4590-a47e-d4d464cca2d7",
"ed43c15f-00cb-4054-9c95-62c880865d68",
"3161d64e-3120-47b4-aaad-6a764a92493b",
"04578141-1d42-4146-b9cf-6fab4ce5fd74"
],
"libreoffice_writer": [
"0810415c-bde4-4443-9047-d5f70165a697",
"0a0faba3-5580-44df-965d-f562a99b291c",
"0b17a146-2934-46c7-8727-73ff6b6483e8",
"0e47de2a-32e0-456c-a366-8c607ef7a9d2",
"0e763496-b6bb-4508-a427-fad0b6c3e195",
"3ef2b351-8a84-4ff2-8724-d86eae9b842e",
"4bcb1253-a636-4df4-8cb0-a35c04dfef31",
"66399b0d-8fda-4618-95c4-bfc6191617e9",
"6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2",
"6ada715d-3aae-4a32-a6a7-429b2e43fb93",
"6f81754e-285d-4ce0-b59e-af7edb02d108",
"72b810ef-4156-4d09-8f08-a0cf57e7cefe",
"8472fece-c7dd-4241-8d65-9b3cd1a0b568",
"88fe4b2d-3040-4c70-9a70-546a47764b48",
"936321ce-5236-426a-9a20-e0e3c5dc536f",
"adf5e2c3-64c7-4644-b7b6-d2f0167927e7",
"b21acd93-60fd-4127-8a43-2f5178f4a830",
"d53ff5ee-3b1a-431e-b2be-30ed2673079b",
"e246f6d8-78d7-44ac-b668-fcf47946cb50",
"e528b65e-1107-4b8c-8988-490e4fece599",
"ecc2413d-8a48-416e-a3a2-d30106ca36cb",
"f178a4a9-d090-4b56-bc4c-4b72a61a035d",
"bb8ccc78-479f-4a2f-a71e-d565e439436b"
],
"multi_apps": [
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
"2fe4b718-3bd7-46ec-bdce-b184f5653624",
"3680a5ee-6870-426a-a997-eba929a0d25c",
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"510f64c8-9bcc-4be1-8d30-638705850618",
"51f5801c-18b3-4f25-b0c3-02f85507a078",
"58565672-7bfe-48ab-b828-db349231de6b",
"78aed49a-a710-4321-a793-b611a7c5b56b",
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"937087b6-f668-4ba6-9110-60682ee33441",
"a0b9dc9c-fc07-4a88-8c5d-5e3ecad91bcb",
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
"c867c42d-a52d-4a24-8ae3-f75d256b5618",
"d9b7c649-c975-4f53-88f5-940b29c47247",
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
"f7dfbef3-7697-431c-883a-db8583a4e4f9",
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
"6d72aad6-187a-4392-a4c4-ed87269c51cf",
"f918266a-b3e0-4914-865d-4faa564f1aef",
"da52d699-e8d2-4dc5-9191-a2199e0b6a9b",
"bc2b57f3-686d-4ec9-87ce-edf850b7e442",
"74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
"00fa164e-2612-4439-992e-157d019a8436",
"acb0f96b-e27c-44d8-b55f-7cb76609dfcd",
"69acbb55-d945-4927-a87b-8480e1a5bb7e",
"48d05431-6cd5-4e76-82eb-12b60d823f7d",
"68a25bd4-59c7-4f4d-975e-da0c8509c848",
"eb303e01-261e-4972-8c07-c9b4e7a4922a",
"0c825995-5b70-4526-b663-113f4c999dd2",
"c7c1e4c3-9e92-4eba-a4b8-689953975ea4",
"d1acdb87-bb67-4f30-84aa-990e56a09c92",
"deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
"8e116af7-7db7-4e35-a68b-b0939c066c78",
"337d318b-aa07-4f4f-b763-89d9a2dd013f",
"82e3c869-49f6-4305-a7ce-f3e64a0618e7",
"185f29bd-5da0-40a6-b69c-ba7f4e0324ef",
"869de13e-bef9-4b91-ba51-f6708c40b096",
"2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e",
"3a93cae4-ad3e-403e-8c12-65303b271818",
"1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
"26150609-0da3-4a7d-8868-0faf9c5f01bb",
"9219480b-3aed-47fc-8bac-d2cffc5849f7",
"881deb30-9549-4583-a841-8270c65f2a17",
"7e287123-70ca-47b9-8521-47db09b69b14",
"e2392362-125e-4f76-a2ee-524b183a3412",
"5bc63fb9-276a-4439-a7c1-9dc76401737f",
"26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
"36037439-2044-4b50-b9d1-875b5a332143",
"716a6079-22da-47f1-ba73-c9d58f986a38",
"873cafdd-a581-47f6-8b33-b9696ddb7b05",
"a74b607e-6bb5-4ea8-8a7c-5d97c7bbcd2a",
"6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
"da922383-bfa4-4cd3-bbad-6bebab3d7742",
"2373b66a-092d-44cb-bfd7-82e86e7a3b4d",
"81c425f5-78f3-4771-afd6-3d2973825947",
"bb83cab4-e5c7-42c7-a67b-e46068032b86",
"227d2f97-562b-4ccb-ae47-a5ec9e142fbb",
"b337d106-053f-4d37-8da0-7f9c4043a66b",
"20236825-b5df-46e7-89bf-62e1d640a897",
"8df7e444-8e06-4f93-8a1a-c5c974269d82",
"aad10cd7-9337-4b62-b704-a857848cedf2",
"02ce9a50-7af2-47ed-8596-af0c230501f8",
"4c26e3f3-3a14-4d86-b44a-d3cedebbb487",
"a503b07f-9119-456b-b75d-f5146737d24f",
"09a37c51-e625-49f4-a514-20a773797a8a",
"3e3fc409-bff3-4905-bf16-c968eee3f807",
"f5c13cdd-205c-4719-a562-348ae5cd1d91",
"5990457f-2adb-467b-a4af-5c857c92d762",
"415ef462-bed3-493a-ac36-ca8c6d23bf1b",
"7ff48d5b-2df2-49da-b500-a5150ffc7f18",
"9f3bb592-209d-43bc-bb47-d77d9df56504",
"dd60633f-2c72-42ba-8547-6f2c8cb0fdb0",
"ce2b64a2-ddc1-4f91-8c7d-a88be7121aac",
"3f05f3b9-29ba-4b6b-95aa-2204697ffc06",
"e1fc0df3-c8b9-4ee7-864c-d0b590d3aa56",
"f8369178-fafe-40c2-adc4-b9b08a125456",
"778efd0a-153f-4842-9214-f05fc176b877",
"47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5",
"c2751594-0cd5-4088-be1b-b5f2f9ec97c4",
"788b3701-3ec9-4b67-b679-418bfa726c22",
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
"42d25c08-fb87-4927-8b65-93631280a26f",
"bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
"e8172110-ec08-421b-a6f5-842e6451911f",
"42f4d1c7-4521-4161-b646-0a8934e36081",
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
"d68204bf-11c1-4b13-b48b-d303c73d4bf6",
"91190194-f406-4cd6-b3f9-c43fac942b22",
"7f35355e-02a6-45b5-b140-f0be698bcf85",
"98e8e339-5f91-4ed2-b2b2-12647cb134f4",
"0e5303d4-8820-42f6-b18d-daf7e633de21",
"df67aebb-fb3a-44fd-b75b-51b6012df509",
"5df7b33a-9f77-4101-823e-02f863e1c1ae",
"aceb0368-56b8-4073-b70e-3dc9aee184e0",
"22a4636f-8179-4357-8e87-d1743ece1f81",
"236833a3-5704-47fc-888c-4f298f09f799",
"67890eb6-6ce5-4c00-9e3d-fb4972699b06"
],
"os": [
"94d95f96-9699-4208-98ba-3c3119edf9c2",
"bedcedc4-4d72-425e-ad62-21960b11fe0d",
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
"ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3",
"a462a795-fdc7-4b23-b689-e8b6df786b78",
"f9be0997-4b7c-45c5-b05c-4612b44a6118",
"28cc3b7e-b194-4bc9-8353-d04c0f4d56d2",
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
"e0df059f-28a6-4169-924f-b9623e7184cc",
"ddc75b62-7311-4af8-bfb3-859558542b36",
"b6781586-6346-41cd-935a-a6b1487918fc",
"b3d4a89c-53f2-4d6b-8b6a-541fb5d205fa",
"3ce045a0-877b-42aa-8d2c-b4a863336ab8",
"fe41f596-a71b-4c2f-9b2f-9dcd40b568c3",
"a4d98375-215b-4a4d-aee9-3d4370fccc41",
"13584542-872b-42d8-b299-866967b5c3ef",
"23393935-50c7-4a86-aeea-2b78fd089c5c",
"5812b315-e7bd-4265-b51f-863c02174c28",
"c288e301-e626-4b98-a1ab-159dcb162af5",
"cc9d4f34-1ca0-4a1b-8ff2-09302696acb9",
"c56de254-a3ec-414e-81a6-83d2ce8c41fa",
"4783cc41-c03c-4e1b-89b4-50658f642bd5",
"5c1075ca-bb34-46a3-a7a0-029bd7463e79",
"5ced85fc-fa1a-4217-95fd-0fb530545ce2",
"37887e8c-da15-4192-923c-08fa390a176d",
"4127319a-8b79-4410-b58a-7a151e15f3d7",
"4d117223-a354-47fb-8b45-62ab1390a95f",
"6f56bf42-85b8-4fbb-8e06-6c44960184ba"
],
"thunderbird": [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"12086550-11c0-466b-b367-1d9e75b3910e",
"06fe7178-4491-4589-810f-2e2bc9502122",
"6766f2b8-8a72-417f-a9e5-56fcaa735837",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"d088f539-cab4-4f9a-ac92-9999fc3a656e",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"030eeff7-b492-4218-b312-701ec99ee0cc",
"94760984-3ff5-41ee-8347-cf1af709fea0",
"99146c54-4f37-4ab8-9327-5f3291665e1e",
"c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
],
"vlc": [
"59f21cfb-0120-4326-b255-a5b827b38967",
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
"8f080098-ddb1-424c-b438-4e96e5e4786e",
"bba3381f-b5eb-4439-bd9e-80c22218d5a7",
"fba2c100-79e8-42df-ae74-b592418d54f4",
"efcf0d81-0835-4880-b2fd-d866e8bc2294",
"8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
"aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
"386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
"9195653c-f4aa-453d-aa95-787f6ccfaae9",
"d06f0d4d-2cd5-4ede-8de9-598629438c6e",
"a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
"5ac2891a-eacd-4954-b339-98abba077adb",
"f3977615-2b45-4ac5-8bba-80c17dbe2a37",
"215dfd39-f493-4bc3-a027-8a97d72c61bf",
"cb130f0d-d36f-4302-9838-b3baf46139b6",
"7882ed6e-bece-4bf0-bada-c32dc1ddae72"
],
"vs_code": [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"eabc805a-bfcf-4460-b250-ac92135819f6",
"982d12a5-beab-424f-8d38-d2a48429e511",
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"9439a27b-18ae-42d8-9778-5f68f891805e",
"ae506c68-352c-4094-9caa-ee9d42052317",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140",
"5e2d93d8-8ad0-4435-b150-1692aacaa994",
"6ed0a554-cbee-4b44-84ea-fd6c042f4fe1",
"ec71221e-ac43-46f9-89b8-ee7d80f7e1c5",
"70745df8-f2f5-42bd-8074-fbc10334fcc5",
"57242fad-77ca-454f-b71b-f187181a9f23",
"c6bf789c-ba3a-4209-971d-b63abf0ab733",
"0512bb38-d531-4acf-9e7e-0add90816068",
"847a96b6-df94-4927-97e6-8cc9ea66ced7",
"7aeae0e2-70ee-4705-821d-1bba5d5b2ddd",
"dcbe20e8-647f-4f1d-8696-f1c5bbb570e3",
"7c4cc09e-7a92-40dd-8338-b2286535c4ed",
"971cbb5b-3cbf-4ff7-9e24-b5c84fcebfa6"
]
}

View File

@@ -0,0 +1,102 @@
{
"chrome": [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
],
"gimp": [
"7a4deb26-d57d-4ea9-9a73-630f66a7b568",
"554785e9-4523-4e7a-b8e1-8016f565f56a"
],
"libreoffice_calc": [
"357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"42e0a640-4f19-4b28-973d-729602b5a4a7"
],
"libreoffice_impress": [
"5d901039-a89c-4bfb-967b-bf66f4df075e",
"550ce7e7-747b-495f-b122-acdc4d0b8e54"
],
"libreoffice_writer": [
"0810415c-bde4-4443-9047-d5f70165a697",
"0a0faba3-5580-44df-965d-f562a99b291c"
],
"multi_apps": [
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"510f64c8-9bcc-4be1-8d30-638705850618",
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"c867c42d-a52d-4a24-8ae3-f75d256b5618",
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
"f7dfbef3-7697-431c-883a-db8583a4e4f9",
"6d72aad6-187a-4392-a4c4-ed87269c51cf",
"f918266a-b3e0-4914-865d-4faa564f1aef",
"da52d699-e8d2-4dc5-9191-a2199e0b6a9b",
"74d5859f-ed66-4d3e-aa0e-93d7a592ce41",
"b5062e3e-641c-4e3a-907b-ac864d2e7652",
"48d05431-6cd5-4e76-82eb-12b60d823f7d",
"eb303e01-261e-4972-8c07-c9b4e7a4922a",
"d1acdb87-bb67-4f30-84aa-990e56a09c92",
"deec51c9-3b1e-4b9e-993c-4776f20e8bb2",
"8e116af7-7db7-4e35-a68b-b0939c066c78",
"185f29bd-5da0-40a6-b69c-ba7f4e0324ef",
"2c1ebcd7-9c6d-4c9a-afad-900e381ecd5e",
"3a93cae4-ad3e-403e-8c12-65303b271818",
"1f18aa87-af6f-41ef-9853-cdb8f32ebdea",
"26150609-0da3-4a7d-8868-0faf9c5f01bb",
"7e287123-70ca-47b9-8521-47db09b69b14",
"e2392362-125e-4f76-a2ee-524b183a3412",
"26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"a82b78bb-7fde-4cb3-94a4-035baf10bcf0",
"36037439-2044-4b50-b9d1-875b5a332143",
"716a6079-22da-47f1-ba73-c9d58f986a38",
"a74b607e-6bb5-4ea8-8a7c-5d97c7bbcd2a",
"6f4073b8-d8ea-4ade-8a18-c5d1d5d5aa9a",
"da922383-bfa4-4cd3-bbad-6bebab3d7742",
"2373b66a-092d-44cb-bfd7-82e86e7a3b4d",
"81c425f5-78f3-4771-afd6-3d2973825947",
"227d2f97-562b-4ccb-ae47-a5ec9e142fbb",
"20236825-b5df-46e7-89bf-62e1d640a897",
"02ce9a50-7af2-47ed-8596-af0c230501f8",
"4c26e3f3-3a14-4d86-b44a-d3cedebbb487",
"09a37c51-e625-49f4-a514-20a773797a8a",
"3e3fc409-bff3-4905-bf16-c968eee3f807",
"415ef462-bed3-493a-ac36-ca8c6d23bf1b",
"9f3bb592-209d-43bc-bb47-d77d9df56504",
"dd60633f-2c72-42ba-8547-6f2c8cb0fdb0",
"3f05f3b9-29ba-4b6b-95aa-2204697ffc06",
"f8369178-fafe-40c2-adc4-b9b08a125456",
"778efd0a-153f-4842-9214-f05fc176b877",
"47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5",
"c2751594-0cd5-4088-be1b-b5f2f9ec97c4",
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
"42d25c08-fb87-4927-8b65-93631280a26f",
"bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
"d68204bf-11c1-4b13-b48b-d303c73d4bf6",
"91190194-f406-4cd6-b3f9-c43fac942b22",
"7f35355e-02a6-45b5-b140-f0be698bcf85",
"98e8e339-5f91-4ed2-b2b2-12647cb134f4",
"df67aebb-fb3a-44fd-b75b-51b6012df509",
"5df7b33a-9f77-4101-823e-02f863e1c1ae",
"22a4636f-8179-4357-8e87-d1743ece1f81",
"236833a3-5704-47fc-888c-4f298f09f799"
],
"os": [
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
"5812b315-e7bd-4265-b51f-863c02174c28",
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82"
],
"thunderbird": [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
],
"vlc": [
"59f21cfb-0120-4326-b255-a5b827b38967",
"8f080098-ddb1-424c-b438-4e96e5e4786e"
],
"vs_code": [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb"
]
}

View File

@@ -1,432 +0,0 @@
import datetime
import json
import logging
import os
import sys
import func_timeout
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
def stop_recording():
try:
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
except Exception as e:
print(f"An error occurred while stopping the recording: {e}")
try:
func_timeout.func_timeout(30, stop_recording)
except func_timeout.exceptions.FunctionTimedOut:
logger.info("Recording timed out.")
result = env.evaluate()
logger.info("Result: %.2f", result)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"result": result
}))
f.write("\n")
# env.close()
logger.info("Environment closed.")
def main(example_class, example_id, gpt4_model="gpt-4-0125-preview"):
action_space = "pyautogui"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
logger.info("Using model %s", gpt4_model)
# logger.info("Using model %s", gemini_model)
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v5"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], max_tokens=1000,
action_space=action_space, exp="a11y_tree")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space, exp="a11y_tree")
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "a11y_tree", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)
if __name__ == '__main__':
os_list = [
"94d95f96-9699-4208-98ba-3c3119edf9c2",
"bedcedc4-4d72-425e-ad62-21960b11fe0d",
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
"ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3",
"f9be0997-4b7c-45c5-b05c-4612b44a6118",
"28cc3b7e-b194-4bc9-8353-d04c0f4d56d2",
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
"e0df059f-28a6-4169-924f-b9623e7184cc",
"ddc75b62-7311-4af8-bfb3-859558542b36",
"b6781586-6346-41cd-935a-a6b1487918fc",
"3ce045a0-877b-42aa-8d2c-b4a863336ab8",
"a4d98375-215b-4a4d-aee9-3d4370fccc41",
"13584542-872b-42d8-b299-866967b5c3ef",
"23393935-50c7-4a86-aeea-2b78fd089c5c"
]
# for example_id in os_list:
# try:
# main("os", example_id, gpt4_model="gpt-3.5-turbo-16k")
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
vlc_list = [
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
"8f080098-ddb1-424c-b438-4e96e5e4786e",
"bba3381f-b5eb-4439-bd9e-80c22218d5a7",
"fba2c100-79e8-42df-ae74-b592418d54f4",
"efcf0d81-0835-4880-b2fd-d866e8bc2294",
"8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
"aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
"386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
"9195653c-f4aa-453d-aa95-787f6ccfaae9",
"d06f0d4d-2cd5-4ede-8de9-598629438c6e",
"a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
"f3977615-2b45-4ac5-8bba-80c17dbe2a37",
"215dfd39-f493-4bc3-a027-8a97d72c61bf"
]
chrome_list = [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"06fe7178-4491-4589-810f-2e2bc9502122",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
"44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
"2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"af630914-714e-4a24-a7bb-f9af687d3b91"
]
calc_list = [
"eb03d19a-b88d-4de4-8a64-ca0ac66f426b",
"0bf05a7d-b28b-44d2-955a-50b41e24012a",
"7a4e4bc8-922c-4c84-865c-25ba34136be1",
"2bd59342-0664-4ccb-ba87-79379096cc08",
"ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"7efeb4b1-3d19-4762-b163-63328d66303b",
"4e6fcf72-daf3-439f-a232-c434ce416af6",
"6054afcb-5bab-4702-90a0-b259b5d3217c",
"abed40dc-063f-4598-8ba5-9fe749c0615d",
"01b269ae-2111-4a07-81fd-3fcd711993b0",
"8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
"0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
"4188d3a4-077d-46b7-9c86-23e1a036f6c1",
"51b11269-2ca8-4b2a-9163-f21758420e78",
"7e429b8d-a3f0-4ed0-9b58-08957d00b127",
"347ef137-7eeb-4c80-a3bb-0951f26a8aff",
"6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
"3aaa4e37-dc91-482e-99af-132a612d40f3",
"37608790-6147-45d0-9f20-1137bb35703d",
"f9584479-3d0d-4c79-affa-9ad7afdd8850",
"d681960f-7bc3-4286-9913-a8812ba3261a",
"21df9241-f8d7-4509-b7f1-37e501a823f7",
"1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
"357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"aa3a8974-2e85-438b-b29e-a64df44deb4b",
"a01fbce3-2793-461f-ab86-43680ccbae25",
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c",
]
# for example_id in calc_list:
# main("libreoffice_calc", example_id)
impress_list = [
"5d901039-a89c-4bfb-967b-bf66f4df075e",
"550ce7e7-747b-495f-b122-acdc4d0b8e54",
"455d3c66-7dc6-4537-a39a-36d3e9119df7",
"af23762e-2bfd-4a1d-aada-20fa8de9ce07",
"c59742c0-4323-4b9d-8a02-723c251deaa0",
"ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
"9ec204e4-f0a3-42f8-8458-b772a6797cab",
"0f84bef9-9790-432e-92b7-eece357603fb",
"ce88f674-ab7a-43da-9201-468d38539e4a",
"3b27600c-3668-4abd-8f84-7bcdebbccbdb",
"a097acff-6266-4291-9fbd-137af7ecd439",
"bf4e9888-f10f-47af-8dba-76413038b73c",
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
]
# for example_id in impress_list:
# main("libreoffice_impress", example_id)
thunderbird_list = [
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
# "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"12086550-11c0-466b-b367-1d9e75b3910e",
"06fe7178-4491-4589-810f-2e2bc9502122",
"6766f2b8-8a72-417f-a9e5-56fcaa735837",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"d088f539-cab4-4f9a-ac92-9999fc3a656e",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"030eeff7-b492-4218-b312-701ec99ee0cc",
"94760984-3ff5-41ee-8347-cf1af709fea0",
"99146c54-4f37-4ab8-9327-5f3291665e1e",
"c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
]
# for example_id in thunderbird_list:
# main("thunderbird", example_id)
gimp_list = [
"7a4deb26-d57d-4ea9-9a73-630f66a7b568",
"554785e9-4523-4e7a-b8e1-8016f565f56a",
"77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
"f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
"d52d6308-ec58-42b7-a2c9-de80e4837b2b",
"2a729ded-3296-423d-aec4-7dd55ed5fbb3",
"b148e375-fe0b-4bec-90e7-38632b0d73c2",
"a746add2-cab0-4740-ac36-c3769d9bfb46",
"7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
"d16c99dc-2a1e-46f2-b350-d97c86c85c15",
"06ca5602-62ca-47f6-ad4f-da151cde54cc",
"e2dd0213-26db-4349-abe5-d5667bfd725c",
"f723c744-e62c-4ae6-98d1-750d3cd7d79d",
"72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
"7767eef2-56a3-4cea-8c9f-48c070c7d65b",
"734d6579-c07d-47a8-9ae2-13339795476b"
]
# for example_id in gimp_list:
# try:
# main("gimp", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
vs_code_list = [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"eabc805a-bfcf-4460-b250-ac92135819f6",
"982d12a5-beab-424f-8d38-d2a48429e511",
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"9439a27b-18ae-42d8-9778-5f68f891805e",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
# for example_id in vs_code_list:
# try:
# main("vs_code", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
from tqdm import tqdm
# for example_id in tqdm(vlc_list):
# try:
# main("vlc", example_id, gpt4_model="gpt-3.5-turbo-16k")
# except Exception as e:
# print(f"An error occurred while running the example: {e}")
# continue
chrome_list = [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"06fe7178-4491-4589-810f-2e2bc9502122",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
"44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
"2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"af630914-714e-4a24-a7bb-f9af687d3b91"
]
# for example_id in tqdm(chrome_list):
# try:
# main("chrome", example_id, gpt4_model="gpt-3.5-turbo-16k")
# except Exception as e:
# print(f"An error occurred while running the example: {e}")
# continue
vs_code_list = [
# "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
# "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
# "eabc805a-bfcf-4460-b250-ac92135819f6",
# "982d12a5-beab-424f-8d38-d2a48429e511",
# "4e60007a-f5be-4bfc-9723-c39affa0a6d3",
# "e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
# "9439a27b-18ae-42d8-9778-5f68f891805e",
# "ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
# "930fdb3b-11a8-46fe-9bac-577332e2640e",
# "276cc624-87ea-4f08-ab93-f770e3790175",
# "9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
for example_id in tqdm(vs_code_list):
try:
main("vs_code", example_id, gpt4_model="gpt-3.5-turbo-16k")
except Exception as e:
print(f"An error occurred while running the example: {e}")
continue
thunderbird_list = [
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
# "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"12086550-11c0-466b-b367-1d9e75b3910e",
"06fe7178-4491-4589-810f-2e2bc9502122",
"6766f2b8-8a72-417f-a9e5-56fcaa735837",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"d088f539-cab4-4f9a-ac92-9999fc3a656e",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"030eeff7-b492-4218-b312-701ec99ee0cc",
"94760984-3ff5-41ee-8347-cf1af709fea0",
"99146c54-4f37-4ab8-9327-5f3291665e1e",
"c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
]
# for example_id in tqdm(thunderbird_list):
# try:
# main("thunderbird", example_id, gpt4_model="gpt-3.5-turbo-16k")
# except Exception as e:
# print(f"An error occurred while running the example: {e}")
# continue
multiple_list = [
# "f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
# "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"2fe4b718-3bd7-46ec-bdce-b184f5653624",
"3680a5ee-6870-426a-a997-eba929a0d25c",
# "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
# "b52b40a5-ad70-4c53-b5b0-5650a8387052",
# "46407397-a7d5-4c6b-92c6-dbe038b1457b",
# "2b9493d7-49b8-493a-a71b-56cd1f4d6908",
# "51f5801c-18b3-4f25-b0c3-02f85507a078",
"58565672-7bfe-48ab-b828-db349231de6b",
# "2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
# "510f64c8-9bcc-4be1-8d30-638705850618",
# "937087b6-f668-4ba6-9110-60682ee33441",
# "ee9a3c83-f437-4879-8918-be5efbb9fac7",
# "3680a5ee-6870-426a-a997-eba929a0d25c",
# "e135df7c-7687-4ac0-a5f0-76b74438b53e",
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
# "58565672-7bfe-48ab-b828-db349231de6b",
# "2fe4b718-3bd7-46ec-bdce-b184f5653624"
]
for example_id in multiple_list:
try:
main("multi_apps", example_id, gpt4_model="gpt-3.5-turbo-16k")
except Exception as e:
logger.error("An error occurred while running the example: %s", e)
continue

View File

@@ -1,306 +0,0 @@
import datetime
import json
import logging
import os
import sys
import time
import func_timeout
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# from mm_agents.gemini_pro_agent import GeminiPro_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
def stop_recording():
try:
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
except Exception as e:
print(f"An error occurred while stopping the recording: {e}")
try:
func_timeout.func_timeout(30, stop_recording)
except func_timeout.exceptions.FunctionTimedOut:
logger.info("Recording timed out.")
result = env.evaluate()
logger.info("Result: %.2f", result)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"result": result
}))
f.write("\n")
# env.close()
logger.info("Environment closed.")
def main(example_class, example_id, gpt4_model = "gpt-4-vision-preview"):
action_space = "pyautogui"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
logger.info("Using model %s", gpt4_model)
# logger.info("Using model %s", gemini_model)
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v5"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'], action_space=action_space,
exp="screenshot")
#
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, instruction=example['instruction'], action_space=action_space, exp="screenshot")
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "screenshot", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)
if __name__ == '__main__':
chrome_list = [
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
# "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
# "06fe7178-4491-4589-810f-2e2bc9502122",
# "e1e75309-3ddb-4d09-92ec-de869c928143",
# "35253b65-1c19-4304-8aa4-6884b8218fc0",
# "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
# "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
# "44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
# "2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"af630914-714e-4a24-a7bb-f9af687d3b91"
]
calc_list = [
"a9f325aa-8c05-4e4f-8341-9e4358565f4f",
"ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"7efeb4b1-3d19-4762-b163-63328d66303b",
"4e6fcf72-daf3-439f-a232-c434ce416af6",
"6054afcb-5bab-4702-90a0-b259b5d3217c",
"abed40dc-063f-4598-8ba5-9fe749c0615d",
"01b269ae-2111-4a07-81fd-3fcd711993b0",
"8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
"af2b02f7-acee-4be4-8b66-499fab394915",
"da1d63b8-fa12-417b-ba18-f748e5f770f3",
"636380ea-d5f6-4474-b6ca-b2ed578a20f1",
"5ba77536-05c5-4aae-a9ff-6e298d094c3e",
"4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b",
"672a1b02-c62f-4ae2-acf0-37f5fb3052b0",
"648fe544-16ba-44af-a587-12ccbe280ea6",
"8985d1e4-5b99-4711-add4-88949ebb2308",
"9e606842-2e27-43bf-b1d1-b43289c9589b",
"fcb6e45b-25c4-4087-9483-03d714f473a9",
"68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2",
"fff629ea-046e-4793-8eec-1a5a15c3eb35",
"5c9a206c-bb00-4fb6-bb46-ee675c187df5",
"e975ae74-79bd-4672-8d1c-dc841a85781d",
"34a6938a-58da-4897-8639-9b90d6db5391",
"b5a22759-b4eb-4bf2-aeed-ad14e8615f19",
"2f9913a1-51ed-4db6-bfe0-7e1c95b3139e",
"2558031e-401d-4579-8e00-3ecf540fb492",
"0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
"4188d3a4-077d-46b7-9c86-23e1a036f6c1",
"51b11269-2ca8-4b2a-9163-f21758420e78",
"7e429b8d-a3f0-4ed0-9b58-08957d00b127",
"347ef137-7eeb-4c80-a3bb-0951f26a8aff",
"6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
"3aaa4e37-dc91-482e-99af-132a612d40f3",
"37608790-6147-45d0-9f20-1137bb35703d",
"f9584479-3d0d-4c79-affa-9ad7afdd8850",
"d681960f-7bc3-4286-9913-a8812ba3261a",
"21df9241-f8d7-4509-b7f1-37e501a823f7",
"1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
"357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"aa3a8974-2e85-438b-b29e-a64df44deb4b",
"a01fbce3-2793-461f-ab86-43680ccbae25",
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c"
]
# for example_id in calc_list:
# main("libreoffice_calc", example_id)
impress_list = [
# "5d901039-a89c-4bfb-967b-bf66f4df075e",
# "550ce7e7-747b-495f-b122-acdc4d0b8e54",
# "455d3c66-7dc6-4537-a39a-36d3e9119df7",
# "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
# "c59742c0-4323-4b9d-8a02-723c251deaa0",
# "ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
# "9ec204e4-f0a3-42f8-8458-b772a6797cab",
# "0f84bef9-9790-432e-92b7-eece357603fb",
# "ce88f674-ab7a-43da-9201-468d38539e4a",
# "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
# "a097acff-6266-4291-9fbd-137af7ecd439",
# "bf4e9888-f10f-47af-8dba-76413038b73c",
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
]
# for example_id in impress_list:
# main("libreoffice_impress", example_id)
# gimp_list = [
# "7a4deb26-d57d-4ea9-9a73-630f66a7b568",
# "554785e9-4523-4e7a-b8e1-8016f565f56a",
# "77b8ab4d-994f-43ac-8930-8ca087d7c4b4",
# "f4aec372-4fb0-4df5-a52b-79e0e2a5d6ce",
# "d52d6308-ec58-42b7-a2c9-de80e4837b2b",
# "2a729ded-3296-423d-aec4-7dd55ed5fbb3",
# "b148e375-fe0b-4bec-90e7-38632b0d73c2",
# "a746add2-cab0-4740-ac36-c3769d9bfb46",
# "7b7617bd-57cc-468e-9c91-40c4ec2bcb3d",
# "d16c99dc-2a1e-46f2-b350-d97c86c85c15",
# "06ca5602-62ca-47f6-ad4f-da151cde54cc",
# "e2dd0213-26db-4349-abe5-d5667bfd725c",
# "f723c744-e62c-4ae6-98d1-750d3cd7d79d",
# "72f83cdc-bf76-4531-9a1b-eb893a13f8aa",
# "7767eef2-56a3-4cea-8c9f-48c070c7d65b",
# "734d6579-c07d-47a8-9ae2-13339795476b"
# ]
#
# for example_id in gimp_list:
# try:
# main("gimp", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
#
vs_code_list = [
# "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
# "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
# "eabc805a-bfcf-4460-b250-ac92135819f6",
# "982d12a5-beab-424f-8d38-d2a48429e511",
# "4e60007a-f5be-4bfc-9723-c39affa0a6d3",
# "e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
# "9439a27b-18ae-42d8-9778-5f68f891805e",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
# for example_id in vs_code_list:
# try:
# main("vs_code", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
# multiple_list = [
# "f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
# "897e3b53-5d4d-444b-85cb-2cdc8a97d903",
# "4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
# "b52b40a5-ad70-4c53-b5b0-5650a8387052",
# "46407397-a7d5-4c6b-92c6-dbe038b1457b",
# "2b9493d7-49b8-493a-a71b-56cd1f4d6908",
# "51f5801c-18b3-4f25-b0c3-02f85507a078",
# "2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
# "510f64c8-9bcc-4be1-8d30-638705850618",
# "937087b6-f668-4ba6-9110-60682ee33441",
# "ee9a3c83-f437-4879-8918-be5efbb9fac7",
# "3680a5ee-6870-426a-a997-eba929a0d25c",
# "e135df7c-7687-4ac0-a5f0-76b74438b53e",
# "58565672-7bfe-48ab-b828-db349231de6b",
# "2fe4b718-3bd7-46ec-bdce-b184f5653624"
# ]
#
# for example_id in multiple_list:
# try:
# main("multi_apps", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue

View File

@@ -1,361 +0,0 @@
import datetime
import json
import logging
import os
import sys
import func_timeout
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu2\Ubuntu2.vmx"
# PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
def stop_recording():
try:
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
except Exception as e:
print(f"An error occurred while stopping the recording: {e}")
try:
func_timeout.func_timeout(30, stop_recording)
except func_timeout.exceptions.FunctionTimedOut:
logger.info("Recording timed out.")
result = env.evaluate()
logger.info("Result: %.2f", result)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"result": result
}))
f.write("\n")
# env.close()
logger.info("Environment closed.")
def main(example_class, example_id, gpt4_model="gpt-4-vision-preview"):
action_space = "pyautogui"
# example_class = "libreoffice_calc"
# example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
# example_id = "01b269ae-2111-4a07-81fd-3fcd711993b0"
gemini_model = "gemini-pro-vision"
logger.info("Running example %s/%s", example_class, example_id)
logger.info("Using model %s", gpt4_model)
# logger.info("Using model %s", gemini_model)
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v5"
# example["snapshot"] = "exp_setup4"
# example["snapshot"] = "Snapshot 30"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
action_space=action_space, exp="both")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space, exp="both")
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "both", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "both", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)
if __name__ == '__main__':
os_list = [
"94d95f96-9699-4208-98ba-3c3119edf9c2",
"bedcedc4-4d72-425e-ad62-21960b11fe0d",
"43c2d64c-bab5-4dcb-a30c-b888321c319a",
"7688b85f-87a4-4e4a-b2f8-f3d6c3f29b82",
"ec4e3f68-9ea4-4c18-a5c9-69f89d1178b3",
"f9be0997-4b7c-45c5-b05c-4612b44a6118",
"28cc3b7e-b194-4bc9-8353-d04c0f4d56d2",
"5ea617a3-0e86-4ba6-aab2-dac9aa2e8d57",
"e0df059f-28a6-4169-924f-b9623e7184cc",
"ddc75b62-7311-4af8-bfb3-859558542b36",
"b6781586-6346-41cd-935a-a6b1487918fc",
"3ce045a0-877b-42aa-8d2c-b4a863336ab8",
"a4d98375-215b-4a4d-aee9-3d4370fccc41",
"13584542-872b-42d8-b299-866967b5c3ef",
"23393935-50c7-4a86-aeea-2b78fd089c5c"
]
# for example_id in os_list:
# try:
# main("os", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
calc_list = [
"a9f325aa-8c05-4e4f-8341-9e4358565f4f",
"ecb0df7a-4e8d-4a03-b162-053391d3afaf",
"7efeb4b1-3d19-4762-b163-63328d66303b",
"4e6fcf72-daf3-439f-a232-c434ce416af6",
"6054afcb-5bab-4702-90a0-b259b5d3217c",
"abed40dc-063f-4598-8ba5-9fe749c0615d",
"01b269ae-2111-4a07-81fd-3fcd711993b0",
"8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14",
"af2b02f7-acee-4be4-8b66-499fab394915",
"da1d63b8-fa12-417b-ba18-f748e5f770f3",
"636380ea-d5f6-4474-b6ca-b2ed578a20f1",
"5ba77536-05c5-4aae-a9ff-6e298d094c3e",
"4bc4eaf4-ca5e-4db2-8138-8d4e65af7c0b",
"672a1b02-c62f-4ae2-acf0-37f5fb3052b0",
"648fe544-16ba-44af-a587-12ccbe280ea6",
"8985d1e4-5b99-4711-add4-88949ebb2308",
"9e606842-2e27-43bf-b1d1-b43289c9589b",
"fcb6e45b-25c4-4087-9483-03d714f473a9",
"68c0c5b7-96f3-4e87-92a7-6c1b967fd2d2",
"fff629ea-046e-4793-8eec-1a5a15c3eb35",
"5c9a206c-bb00-4fb6-bb46-ee675c187df5",
"e975ae74-79bd-4672-8d1c-dc841a85781d",
"34a6938a-58da-4897-8639-9b90d6db5391",
"b5a22759-b4eb-4bf2-aeed-ad14e8615f19",
"2f9913a1-51ed-4db6-bfe0-7e1c95b3139e",
"2558031e-401d-4579-8e00-3ecf540fb492",
"0cecd4f3-74de-457b-ba94-29ad6b5dafb6",
"4188d3a4-077d-46b7-9c86-23e1a036f6c1",
"51b11269-2ca8-4b2a-9163-f21758420e78",
"7e429b8d-a3f0-4ed0-9b58-08957d00b127",
"347ef137-7eeb-4c80-a3bb-0951f26a8aff",
"6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5",
"3aaa4e37-dc91-482e-99af-132a612d40f3",
"37608790-6147-45d0-9f20-1137bb35703d",
"f9584479-3d0d-4c79-affa-9ad7afdd8850",
"d681960f-7bc3-4286-9913-a8812ba3261a",
"21df9241-f8d7-4509-b7f1-37e501a823f7",
"1334ca3e-f9e3-4db8-9ca7-b4c653be7d17",
"357ef137-7eeb-4c80-a3bb-0951f26a8aff",
"aa3a8974-2e85-438b-b29e-a64df44deb4b",
"a01fbce3-2793-461f-ab86-43680ccbae25",
"4f07fbe9-70de-4927-a4d5-bb28bc12c52c"
]
# for example_id in calc_list:
# try:
# main("libreoffice_calc", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
impress_list = [
"5d901039-a89c-4bfb-967b-bf66f4df075e",
"550ce7e7-747b-495f-b122-acdc4d0b8e54",
"455d3c66-7dc6-4537-a39a-36d3e9119df7",
"af23762e-2bfd-4a1d-aada-20fa8de9ce07",
"c59742c0-4323-4b9d-8a02-723c251deaa0",
"ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
"9ec204e4-f0a3-42f8-8458-b772a6797cab",
"0f84bef9-9790-432e-92b7-eece357603fb",
"ce88f674-ab7a-43da-9201-468d38539e4a",
"3b27600c-3668-4abd-8f84-7bcdebbccbdb",
"a097acff-6266-4291-9fbd-137af7ecd439",
"bf4e9888-f10f-47af-8dba-76413038b73c",
"21760ecb-8f62-40d2-8d85-0cee5725cb72"
]
# for example_id in impress_list:
# try:
# main("libreoffice_impress", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
vs_code_list = [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"eabc805a-bfcf-4460-b250-ac92135819f6",
"982d12a5-beab-424f-8d38-d2a48429e511",
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"9439a27b-18ae-42d8-9778-5f68f891805e",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
# for example_id in vs_code_list:
# try:
# main("vs_code", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
multiple_list = [
"f8cfa149-d1c1-4215-8dac-4a0932bad3c2",
"897e3b53-5d4d-444b-85cb-2cdc8a97d903",
"4e9f0faf-2ecc-4ae8-a804-28c9a75d1ddc",
"b52b40a5-ad70-4c53-b5b0-5650a8387052",
"46407397-a7d5-4c6b-92c6-dbe038b1457b",
"2b9493d7-49b8-493a-a71b-56cd1f4d6908",
"51f5801c-18b3-4f25-b0c3-02f85507a078",
"2c9fc0de-3ee7-45e1-a5df-c86206ad78b5",
"510f64c8-9bcc-4be1-8d30-638705850618",
"937087b6-f668-4ba6-9110-60682ee33441",
"ee9a3c83-f437-4879-8918-be5efbb9fac7",
"3680a5ee-6870-426a-a997-eba929a0d25c",
"e135df7c-7687-4ac0-a5f0-76b74438b53e",
"58565672-7bfe-48ab-b828-db349231de6b",
"2fe4b718-3bd7-46ec-bdce-b184f5653624"
]
# for example_id in multiple_list:
# try:
# main("multi_apps", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
chrome_list = [
# "bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"06fe7178-4491-4589-810f-2e2bc9502122",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
"44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
"2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"af630914-714e-4a24-a7bb-f9af687d3b91"
]
# for example_id in chrome_list:
# try:
# main("chrome", example_id)
# except Exception as e:
# logger.error("An error occurred while running the example: %s", e)
# continue
writer_list = [
"6ada715d-3aae-4a32-a6a7-429b2e43fb93",
"ecc2413d-8a48-416e-a3a2-d30106ca36cb",
"0e47de2a-32e0-456c-a366-8c607ef7a9d2",
"4bcb1253-a636-4df4-8cb0-a35c04dfef31",
"0810415c-bde4-4443-9047-d5f70165a697",
"e528b65e-1107-4b8c-8988-490e4fece599",
"66399b0d-8fda-4618-95c4-bfc6191617e9",
"936321ce-5236-426a-9a20-e0e3c5dc536f",
"3ef2b351-8a84-4ff2-8724-d86eae9b842e",
"0b17a146-2934-46c7-8727-73ff6b6483e8",
"0e763496-b6bb-4508-a427-fad0b6c3e195",
"f178a4a9-d090-4b56-bc4c-4b72a61a035d",
"adf5e2c3-64c7-4644-b7b6-d2f0167927e7",
"0a0faba3-5580-44df-965d-f562a99b291c",
"e246f6d8-78d7-44ac-b668-fcf47946cb50",
"8472fece-c7dd-4241-8d65-9b3cd1a0b568",
"88fe4b2d-3040-4c70-9a70-546a47764b48",
"d53ff5ee-3b1a-431e-b2be-30ed2673079b",
"72b810ef-4156-4d09-8f08-a0cf57e7cefe",
"6f81754e-285d-4ce0-b59e-af7edb02d108",
"b21acd93-60fd-4127-8a43-2f5178f4a830"
]
for example_id in writer_list:
try:
main("libreoffice_writer", example_id)
except Exception as e:
logger.error("An error occurred while running the example: %s", e)
continue

View File

@@ -1,155 +0,0 @@
import ctypes
import datetime
import json
import logging
import os
import sys
import func_timeout
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
def stop_recording():
try:
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
except Exception as e:
print(f"An error occurred while stopping the recording: {e}")
try:
func_timeout.func_timeout(30, stop_recording)
except func_timeout.exceptions.FunctionTimedOut:
logger.info("Recording timed out.")
result = env.evaluate()
logger.info("Result: %.2f", result)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"result": result
}))
f.write("\n")
# env.close()
logger.info("Environment closed.")
def main(example_class, example_id):
action_space = "pyautogui"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v5"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],
action_space=action_space, exp="seeact")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space)
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "seeact", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "seeact", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)
if __name__ == '__main__':
xx_list = [
]
for example_id in xx_list:
main("xx", example_id)

View File

@@ -1,261 +0,0 @@
#import ctypes
import datetime
import json
import logging
import os
import sys
import func_timeout
from desktop_env.envs.desktop_env import DesktopEnv
from mm_agents.gpt_4v_agent import GPT4v_Agent
# Logger Configs {{{ #
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
stdout_handler = logging.StreamHandler(sys.stdout)
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
file_handler.setLevel(logging.INFO)
debug_handler.setLevel(logging.DEBUG)
stdout_handler.setLevel(logging.INFO)
sdebug_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
file_handler.setFormatter(formatter)
debug_handler.setFormatter(formatter)
stdout_handler.setFormatter(formatter)
sdebug_handler.setFormatter(formatter)
stdout_handler.addFilter(logging.Filter("desktopenv"))
sdebug_handler.addFilter(logging.Filter("desktopenv"))
logger.addHandler(file_handler)
logger.addHandler(debug_handler)
logger.addHandler(stdout_handler)
logger.addHandler(sdebug_handler)
# }}} Logger Configs #
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
env = DesktopEnv(
path_to_vm=PATH_TO_VM,
action_space=agent.action_space,
task_config=example
)
# reset the environment to certain snapshot
observation = env.reset()
done = False
step_num = 0
if recording:
# send a request to the server to start recording
env.controller.start_recording()
while not done and step_num < max_steps:
actions = agent.predict(observation)
step_num += 1
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_num, action)
observation, reward, done, info = env.step(action)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
logger.info("Info: %s", info)
# Save screenshot and trajectory information
with open(os.path.join(example_trajectory_dir, f"step_{step_num}_{action_timestamp}.png"), "wb") as _f:
with open(observation['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"step_num": step_num,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_num}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
def stop_recording():
try:
env.controller.end_recording(os.path.join(example_trajectory_dir, "recording.mp4"))
except Exception as e:
print(f"An error occurred while stopping the recording: {e}")
try:
func_timeout.func_timeout(30, stop_recording)
except func_timeout.exceptions.FunctionTimedOut:
logger.info("Recording timed out.")
result = env.evaluate()
logger.info("Result: %.2f", result)
with open(trajectory_recording_path, "a") as f:
f.write(json.dumps({
"result": result
}))
f.write("\n")
# env.close()
logger.info("Environment closed.")
def main(example_class, example_id):
action_space = "pyautogui"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_v5"
logger.info("TASK: %s/%s", example_class, example_id)
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, max_tokens=1000, instruction=example['instruction'],
action_space=action_space, exp="som")
# api_key = os.environ.get("GENAI_API_KEY")
# agent = GeminiPro_Agent(api_key=api_key, model=gemini_model, instruction=example['instruction'], action_space=action_space)
root_trajectory_dir = "exp_trajectory"
example_trajectory_dir = os.path.join(root_trajectory_dir, "som", example_class, gpt4_model, example_id)
# example_trajectory_dir = os.path.join(root_trajectory_dir, "som", example_class, gemini_model, example_id)
os.makedirs(example_trajectory_dir, exist_ok=True)
run_one_example(example, agent, 15, example_trajectory_dir)
if __name__ == '__main__':
from tqdm import tqdm
# impress_list = [
# # "5d901039-a89c-4bfb-967b-bf66f4df075e",
# "550ce7e7-747b-495f-b122-acdc4d0b8e54",
# "455d3c66-7dc6-4537-a39a-36d3e9119df7",
# "af23762e-2bfd-4a1d-aada-20fa8de9ce07",
# "c59742c0-4323-4b9d-8a02-723c251deaa0",
# "ef9d12bd-bcee-4ba0-a40e-918400f43ddf",
# "9ec204e4-f0a3-42f8-8458-b772a6797cab",
# "0f84bef9-9790-432e-92b7-eece357603fb",
# "ce88f674-ab7a-43da-9201-468d38539e4a",
# "3b27600c-3668-4abd-8f84-7bcdebbccbdb",
# "a097acff-6266-4291-9fbd-137af7ecd439",
# "bf4e9888-f10f-47af-8dba-76413038b73c",
# "21760ecb-8f62-40d2-8d85-0cee5725cb72"
# ]
# for example_id in impress_list:
# main("libreoffice_impress", example_id)
vlc_list = [
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
"8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
"8f080098-ddb1-424c-b438-4e96e5e4786e",
"bba3381f-b5eb-4439-bd9e-80c22218d5a7",
"fba2c100-79e8-42df-ae74-b592418d54f4",
"efcf0d81-0835-4880-b2fd-d866e8bc2294",
"8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
"aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
"386dbd0e-0241-4a0a-b6a2-6704fba26b1c",
"9195653c-f4aa-453d-aa95-787f6ccfaae9",
"d06f0d4d-2cd5-4ede-8de9-598629438c6e",
"a5bbbcd5-b398-4c91-83d4-55e1e31bbb81",
"f3977615-2b45-4ac5-8bba-80c17dbe2a37",
"215dfd39-f493-4bc3-a027-8a97d72c61bf"
]
# for example_id in tqdm(vlc_list):
# try:
# main("vlc", example_id)
# except Exception as e:
# print(f"An error occurred while running the example: {e}")
# continue
chrome_list = [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"06fe7178-4491-4589-810f-2e2bc9502122",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
"44ee5668-ecd5-4366-a6ce-c1c9b8d4e938",
"2ae9ba84-3a0d-4d4c-8338-3a1478dc5fe3",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"af630914-714e-4a24-a7bb-f9af687d3b91"
]
for example_id in tqdm(chrome_list):
try:
main("chrome", example_id)
except Exception as e:
print(f"An error occurred while running the example: {e}")
continue
vs_code_list = [
"0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
"53ad5833-3455-407b-bbc6-45b4c79ab8fb",
"eabc805a-bfcf-4460-b250-ac92135819f6",
"982d12a5-beab-424f-8d38-d2a48429e511",
"4e60007a-f5be-4bfc-9723-c39affa0a6d3",
"e2b5e914-ffe1-44d2-8e92-58f8c5d92bb2",
"9439a27b-18ae-42d8-9778-5f68f891805e",
"ea98c5d7-3cf9-4f9b-8ad3-366b58e0fcae",
"930fdb3b-11a8-46fe-9bac-577332e2640e",
"276cc624-87ea-4f08-ab93-f770e3790175",
"9d425400-e9b2-4424-9a4b-d4c7abac4140"
]
for example_id in tqdm(vs_code_list):
try:
main("vs_code", example_id)
except Exception as e:
print(f"An error occurred while running the example: {e}")
continue
thunderbird_list = [
"bb5e4c0d-f964-439c-97b6-bdb9747de3f4",
"7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
"12086550-11c0-466b-b367-1d9e75b3910e",
"06fe7178-4491-4589-810f-2e2bc9502122",
"6766f2b8-8a72-417f-a9e5-56fcaa735837",
"e1e75309-3ddb-4d09-92ec-de869c928143",
"3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5",
"35253b65-1c19-4304-8aa4-6884b8218fc0",
"d088f539-cab4-4f9a-ac92-9999fc3a656e",
"2ad9387a-65d8-4e33-ad5b-7580065a27ca",
"480bcfea-d68f-4aaa-a0a9-2589ef319381",
"030eeff7-b492-4218-b312-701ec99ee0cc",
"94760984-3ff5-41ee-8347-cf1af709fea0",
"99146c54-4f37-4ab8-9327-5f3291665e1e",
"c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
]
for example_id in tqdm(thunderbird_list):
try:
main("thunderbird", example_id)
except Exception as e:
print(f"An error occurred while running the example: {e}")
continue

72
lib_run_single.py Normal file
View File

@@ -0,0 +1,72 @@
import datetime
import json
import logging
import os
# import wandb
from wrapt_timeout_decorator import *
logger = logging.getLogger("desktopenv.experiment")
# Open the JSON file
with open("./settings.json", "r") as file:
# Load the JSON data from the file
data = json.load(file)
time_limit = data["time_limit"]
@timeout(time_limit, use_signals=False)
def run_single_example(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
agent.reset()
obs = env.reset(task_config=example)
done = False
step_idx = 0
env.controller.start_recording()
# str_table = wandb.Table(columns=["Screenshot", "A11T", "Modle Response", "Action", "Action timestamp", "Done"])
while not done and step_idx < max_steps:
response, actions = agent.predict(
instruction,
obs
)
for action in actions:
# Capture the timestamp before executing the action
action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
logger.info("Step %d: %s", step_idx + 1, action)
obs, reward, done, info = env.step(action, args.sleep_after_execution)
logger.info("Reward: %.2f", reward)
logger.info("Done: %s", done)
# Save screenshot and trajectory information
with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"),
"wb") as _f:
with open(obs['screenshot'], "rb") as __f:
screenshot = __f.read()
_f.write(screenshot)
# get a11tree and save to wandb
thisrun_a11tree = env.controller.get_accessibility_tree()
# str_table.add_data(wandb.Image(data_or_path=os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"), caption=f"step_{step_idx + 1}_{action_timestamp}"),
# thisrun_a11tree,
# response, action, action_timestamp, done)
# run.log({"Reward": reward})
with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
f.write(json.dumps({
"step_num": step_idx + 1,
"action_timestamp": action_timestamp,
"action": action,
"reward": reward,
"done": done,
"info": info,
"screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
}))
f.write("\n")
if done:
logger.info("The episode is done.")
break
step_idx += 1
# run.log({"str_trajectory": str_table})
result = env.evaluate()
logger.info("Result: %.2f", result)
scores.append(result)
with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
f.write(f"{result}\n")
env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
# run.log({"Result": result})

36
main.py
View File

@@ -47,38 +47,38 @@ def human_agent():
Runs the Gym environment with human input.
"""
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--path', type=str, required=True, help="Path to the virtual machine .vmx file.")
parser.add_argument('-s', '--snapshot', type=str, help="Name of the snapshot to restore.")
parser.add_argument('-p', '--path', type=str, default=r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu3\Ubuntu3.vmx", help="Path to the virtual machine .vmx file.")
parser.add_argument('-s', '--snapshot', type=str, default='init_state', help="Name of the snapshot to restore.")
parser.add_argument('-e', '--example', type=str, help="Path to the example json file.")
args = parser.parse_args(sys.argv[1:])
example_path = args.example if args.example is not None and os.path.exists(args.example) else \
'evaluation_examples/examples/libreoffice_writer/6a33f9b9-0a56-4844-9c3f-96ec3ffb3ba2.json'
with open(example_path, "r") as f:
'evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json'
with open(example_path, "r", encoding="utf-8") as f:
example = json.load(f)
# change to your customized snapshot
if args.snapshot is not None: example["snapshot"] = args.snapshot
if args.snapshot is not None:
example['snapshot'] = args.snapshot
assert os.path.exists(args.path), "The specified path to the .vmx file does not exist."
env = DesktopEnv(
path_to_vm=args.path,
action_space="computer_13",
task_config=example
snapshot_name=args.snapshot,
action_space="computer_13"
)
# reset the environment to certain snapshot
observation = env.reset()
logger.info('\x1b[32m[TASK INSTRUCTION]: \x1b[32;3m%s\x1b[0m', example["instruction"])
observation = env.reset(task_config=example)
done = False
logger.info('\x1b[32m[TASK INSTRUCTION]: \x1b[32;3m%s\x1b[0m', example["instruction"])
trajectory = [
# {
# "action_type": "MOVE_TO",
# "parameters": {
# "x": 754,
# "y": 1057
# }
# },
# {"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}}
{
"action_type": "MOVE_TO", #
"parameters": {
"x": 754,
"y": 1057
}
},
{"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}}
]
for i in range(len(trajectory)):

View File

@@ -26,7 +26,7 @@ def find_leaf_nodes(xlm_file_str):
state_ns = "uri:deskat:state.at-spi.gnome.org"
component_ns = "uri:deskat:component.at-spi.gnome.org"
def judge_node(node: ET, platform="ubuntu") -> bool:
def judge_node(node: ET, platform="ubuntu", check_image=False) -> bool:
keeps: bool = node.tag.startswith("document")\
or node.tag.endswith("item")\
or node.tag.endswith("button")\
@@ -55,23 +55,25 @@ def judge_node(node: ET, platform="ubuntu") -> bool:
or platform=="windows"\
and node.get("{{{:}}}visible".format(state_ns), "false")=="true"\
)\
and ( node.get("{{{:}}}enabled".format(state_ns), "false")=="true"\
or node.get("{{{:}}}editable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}expandable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}checkable".format(state_ns), "false")=="true"
)\
and (node.get("name", "") != "" or node.text is not None and len(node.text)>0)
and ( node.get("{{{:}}}enabled".format(state_ns), "false")=="true"\
or node.get("{{{:}}}editable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}expandable".format(state_ns), "false")=="true"\
or node.get("{{{:}}}checkable".format(state_ns), "false")=="true"
)\
and ( node.get("name", "") != "" or node.text is not None and len(node.text)>0\
or check_image and node.get("image", "false")=="true"
)
coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(component_ns), "(-1, -1)"))
sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(component_ns), "(-1, -1)"))
keeps = keeps and coordinates[0]>0 and coordinates[1]>0 and sizes[0]>0 and sizes[1]>0
return keeps
def filter_nodes(root: ET, platform="ubuntu"):
def filter_nodes(root: ET, platform="ubuntu", check_image=False):
filtered_nodes = []
for node in root.iter():
if judge_node(node, platform):
if judge_node(node, platform, check_image):
filtered_nodes.append(node)
#print(ET.tostring(node, encoding="unicode"))
@@ -155,12 +157,12 @@ def print_nodes_with_indent(nodes, indent=0):
if __name__ == '__main__':
import json
with open('4.json', 'r', encoding='utf-8') as f:
xml_file_str = json.load(f)["AT"]
with open('selection_sorted(imaged).xml', 'r', encoding='utf-8') as f:
xml_file_str = f.read()
filtered_nodes = filter_nodes(ET.fromstring(xml_file_str))
print(len(filtered_nodes))
masks = draw_bounding_boxes( filtered_nodes, '4.png'
, '4.a.png'
masks = draw_bounding_boxes( filtered_nodes, 'selection_sorted(imaged).png'
, 'selection_sorted(imaged).ai.png'
)
# print(masks)

View File

@@ -5,10 +5,10 @@ import os
import re
import time
import uuid
import xml.etree.ElementTree as ET
from http import HTTPStatus
from io import BytesIO
from typing import Dict, List
import xml.etree.ElementTree as ET
import backoff
import dashscope
@@ -16,20 +16,13 @@ import google.generativeai as genai
import openai
import requests
from PIL import Image
from openai import (
APIConnectionError,
APIError,
RateLimitError
)
from google.api_core.exceptions import InvalidArgument
from mm_agents.accessibility_tree_wrap.heuristic_retrieve import find_leaf_nodes, filter_nodes, draw_bounding_boxes
from mm_agents.accessibility_tree_wrap.heuristic_retrieve import filter_nodes, draw_bounding_boxes
from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION, \
SYS_PROMPT_IN_A11Y_OUT_CODE, SYS_PROMPT_IN_A11Y_OUT_ACTION, \
SYS_PROMPT_IN_BOTH_OUT_CODE, SYS_PROMPT_IN_BOTH_OUT_ACTION, \
SYS_PROMPT_IN_SOM_A11Y_OUT_TAG, \
SYS_PROMPT_SEEACT, ACTION_DESCRIPTION_PROMPT_SEEACT, ACTION_GROUNDING_PROMPT_SEEACT
import logging
SYS_PROMPT_IN_SOM_OUT_TAG
logger = logging.getLogger("desktopenv.agent")
@@ -41,10 +34,10 @@ def encode_image(image_path):
def linearize_accessibility_tree(accessibility_tree):
#leaf_nodes = find_leaf_nodes(accessibility_tree)
# leaf_nodes = find_leaf_nodes(accessibility_tree)
filtered_nodes = filter_nodes(ET.fromstring(accessibility_tree))
linearized_accessibility_tree = "tag\tname\ttext\tposition\tsize\n"
linearized_accessibility_tree = "tag\tname\ttext\tposition (top-left x&y)\tsize (w&h)\n"
# Linearize the accessibility tree nodes into a table format
for node in filtered_nodes:
@@ -72,7 +65,8 @@ def tag_screenshot(screenshot, accessibility_tree):
uuid_str = str(uuid.uuid4())
os.makedirs("tmp/images", exist_ok=True)
tagged_screenshot_file_path = os.path.join("tmp/images", uuid_str + ".png")
nodes = filter_nodes(find_leaf_nodes(accessibility_tree))
# nodes = filter_nodes(find_leaf_nodes(accessibility_tree))
nodes = filter_nodes(ET.fromstring(accessibility_tree), check_image=True)
# Make tag screenshot
marks, drew_nodes = draw_bounding_boxes(nodes, screenshot, tagged_screenshot_file_path)
@@ -168,79 +162,66 @@ def parse_code_from_som_string(input_string, masks):
return actions
class GPT4v_Agent:
class PromptAgent:
def __init__(
self,
api_key,
instruction,
model="gpt-4-vision-preview",
max_tokens=500,
max_tokens=1500,
top_p=0.9,
temperature=0.5,
action_space="computer_13",
exp="screenshot_a11y_tree"
# exp can be in ["screenshot", "a11y_tree", "screenshot_a11y_tree", "som", "seeact"]
observation_type="screenshot_a11y_tree",
# observation_type can be in ["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"]
max_trajectory_length=3
):
self.instruction = instruction
self.model = model
self.max_tokens = max_tokens
self.top_p = top_p
self.temperature = temperature
self.action_space = action_space
self.exp = exp
self.max_trajectory_length = 3
self.headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
self.observation_type = observation_type
self.max_trajectory_length = max_trajectory_length
self.thoughts = []
self.actions = []
self.observations = []
if exp == "screenshot":
if observation_type == "screenshot":
if action_space == "computer_13":
self.system_message = SYS_PROMPT_IN_SCREENSHOT_OUT_ACTION
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_SCREENSHOT_OUT_CODE
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "a11y_tree":
elif observation_type == "a11y_tree":
if action_space == "computer_13":
self.system_message = SYS_PROMPT_IN_A11Y_OUT_ACTION
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_A11Y_OUT_CODE
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "both":
elif observation_type == "screenshot_a11y_tree":
if action_space == "computer_13":
self.system_message = SYS_PROMPT_IN_BOTH_OUT_ACTION
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_BOTH_OUT_CODE
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "som":
elif observation_type == "som":
if action_space == "computer_13":
raise ValueError("Invalid action space: " + action_space)
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_IN_SOM_A11Y_OUT_TAG
else:
raise ValueError("Invalid action space: " + action_space)
elif exp == "seeact":
if action_space == "computer_13":
raise ValueError("Invalid action space: " + action_space)
elif action_space == "pyautogui":
self.system_message = SYS_PROMPT_SEEACT
self.system_message = SYS_PROMPT_IN_SOM_OUT_TAG
else:
raise ValueError("Invalid action space: " + action_space)
else:
raise ValueError("Invalid experiment type: " + exp)
raise ValueError("Invalid experiment type: " + observation_type)
self.system_message = self.system_message + "\nYou are asked to complete the following task: {}".format(
self.instruction)
def predict(self, obs: Dict) -> List:
def predict(self, instruction: str, obs: Dict) -> List:
"""
Predict the next action(s) based on the current observation.
"""
system_message = self.system_message + "\nYou are asked to complete the following task: {}".format(instruction)
# Prepare the payload for the API call
messages = []
@@ -251,7 +232,7 @@ class GPT4v_Agent:
"content": [
{
"type": "text",
"text": self.system_message
"text": system_message
},
]
})
@@ -272,7 +253,7 @@ class GPT4v_Agent:
for previous_obs, previous_action, previous_thought in zip(_observations, _actions, _thoughts):
# {{{1
if self.exp == "both":
if self.observation_type == "screenshot_a11y_tree":
_screenshot = previous_obs["screenshot"]
_linearized_accessibility_tree = previous_obs["accessibility_tree"]
logger.debug("LINEAR AT: %s", _linearized_accessibility_tree)
@@ -294,18 +275,15 @@ class GPT4v_Agent:
}
]
})
elif self.exp in ["som", "seeact"]:
elif self.observation_type in ["som"]:
_screenshot = previous_obs["screenshot"]
_linearized_accessibility_tree = previous_obs["accessibility_tree"]
logger.debug("LINEAR AT: %s", _linearized_accessibility_tree)
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
_linearized_accessibility_tree)
"text": "Given the tagged screenshot as below. What's the next step that you will do to help with the task?"
},
{
"type": "image_url",
@@ -316,7 +294,7 @@ class GPT4v_Agent:
}
]
})
elif self.exp == "screenshot":
elif self.observation_type == "screenshot":
_screenshot = previous_obs["screenshot"]
messages.append({
@@ -335,7 +313,7 @@ class GPT4v_Agent:
}
]
})
elif self.exp == "a11y_tree":
elif self.observation_type == "a11y_tree":
_linearized_accessibility_tree = previous_obs["accessibility_tree"]
messages.append({
@@ -349,7 +327,7 @@ class GPT4v_Agent:
]
})
else:
raise ValueError("Invalid experiment type: " + self.exp) # 1}}}
raise ValueError("Invalid observation_type type: " + self.observation_type) # 1}}}
messages.append({
"role": "assistant",
@@ -362,11 +340,11 @@ class GPT4v_Agent:
})
# {{{1
if self.exp in ["screenshot", "both"]:
if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
base64_image = encode_image(obs["screenshot"])
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
if self.exp == "both":
if self.observation_type == "screenshot_a11y_tree":
self.observations.append({
"screenshot": base64_image,
"accessibility_tree": linearized_accessibility_tree
@@ -383,7 +361,7 @@ class GPT4v_Agent:
{
"type": "text",
"text": "Given the screenshot as below. What's the next step that you will do to help with the task?"
if self.exp == "screenshot"
if self.observation_type == "screenshot"
else "Given the screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
linearized_accessibility_tree)
},
@@ -396,7 +374,7 @@ class GPT4v_Agent:
}
]
})
elif self.exp == "a11y_tree":
elif self.observation_type == "a11y_tree":
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
self.observations.append({
@@ -414,15 +392,13 @@ class GPT4v_Agent:
}
]
})
elif self.exp == "som":
elif self.observation_type == "som":
# Add som to the screenshot
masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
base64_image = encode_image(tagged_screenshot)
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
self.observations.append({
"screenshot": base64_image,
"accessibility_tree": linearized_accessibility_tree
"screenshot": base64_image
})
messages.append({
@@ -430,35 +406,7 @@ class GPT4v_Agent:
"content": [
{
"type": "text",
"text": "Given the tagged screenshot and info from accessibility tree as below:\n{}\nWhat's the next step that you will do to help with the task?".format(
linearized_accessibility_tree)
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}",
"detail": "high"
}
}
]
})
elif self.exp == "seeact":
# Add som to the screenshot
masks, drew_nodes, tagged_screenshot = tag_screenshot(obs["screenshot"], obs["accessibility_tree"])
base64_image = encode_image(tagged_screenshot)
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
self.observations.append({
"screenshot": base64_image,
"accessibility_tree": linearized_accessibility_tree
})
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": ACTION_DESCRIPTION_PROMPT_SEEACT.format(linearized_accessibility_tree)
"text": "Given the tagged screenshot as below. What's the next step that you will do to help with the task?"
},
{
"type": "image_url",
@@ -470,141 +418,244 @@ class GPT4v_Agent:
]
})
else:
raise ValueError("Invalid experiment type: " + self.exp) # 1}}}
with open("messages.json", "w") as f:
f.write(json.dumps(messages, indent=4))
raise ValueError("Invalid observation_type type: " + self.observation_type) # 1}}}
# with open("messages.json", "w") as f:
# f.write(json.dumps(messages, indent=4))
response = self.call_llm({
"model": self.model,
"messages": messages,
"max_tokens": self.max_tokens
"max_tokens": self.max_tokens,
"top_p": self.top_p,
"temperature": self.temperature
})
logger.debug("RESPONSE: %s", response)
if self.exp == "seeact":
messages.append({
"role": "assistant",
"content": [
{
"type": "text",
"text": response
}
]
})
messages.append({
"role": "user",
"content": [
{
"type": "text",
"text": "{}\n\nWhat's the next step that you will do to help with the task?".format(
ACTION_GROUNDING_PROMPT_SEEACT)
}
]
})
response = self.call_llm({
"model": self.model,
"messages": messages,
"max_tokens": self.max_tokens
})
print(response)
logger.info("RESPONSE: %s", response)
try:
actions = self.parse_actions(response, masks)
self.thoughts.append(response)
except Exception as e:
except ValueError as e:
print("Failed to parse action from response", e)
actions = None
self.thoughts.append("")
return actions
return response, actions
@backoff.on_exception(
backoff.expo,
(APIError, RateLimitError, APIConnectionError),
max_tries=10
# here you should add more model exceptions as you want,
# but you are forbidden to add "Exception", that is, a common type of exception
# because we want to catch this kind of Exception in the outside to ensure each example won't exceed the time limit
(openai.RateLimitError,
openai.BadRequestError,
openai.InternalServerError,
InvalidArgument),
max_tries=5
)
def call_llm(self, payload):
if self.model.startswith("gpt"):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}"
}
logger.info("Generating content with GPT model: %s", self.model)
response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=self.headers,
headers=headers,
json=payload
)
if response.status_code != 200:
if response.json()['error']['code'] == "context_length_exceeded":
print("Context length exceeded. Retrying with a smaller context.")
payload["messages"] = payload["messages"][-1:]
logger.error("Context length exceeded. Retrying with a smaller context.")
payload["messages"] = [payload["messages"][0]] + payload["messages"][-1:]
retry_response = requests.post(
"https://api.openai.com/v1/chat/completions",
headers=self.headers,
headers=headers,
json=payload
)
if retry_response.status_code != 200:
print("Failed to call LLM: " + retry_response.text)
logger.error(
"Failed to call LLM even after attempt on shortening the history: " + retry_response.text)
return ""
print("Failed to call LLM: " + response.text)
logger.error("Failed to call LLM: " + response.text)
time.sleep(5)
return ""
else:
return response.json()['choices'][0]['message']['content']
elif self.model.startswith("mistral"):
print("call mistral")
elif self.model.startswith("claude"):
messages = payload["messages"]
max_tokens = payload["max_tokens"]
top_p = payload["top_p"]
temperature = payload["temperature"]
claude_messages = []
for i, message in enumerate(messages):
claude_message = {
"role": message["role"],
"content": []
}
assert len(message["content"]) in [1, 2], "One text, or one text with one image"
for part in message["content"]:
if part['type'] == "image_url":
image_source = {}
image_source["type"] = "base64"
image_source["media_type"] = "image/png"
image_source["data"] = part['image_url']['url'].replace("data:image/png;base64,", "")
claude_message['content'].append({"type": "image", "source": image_source})
if part['type'] == "text":
claude_message['content'].append({"type": "text", "text": part['text']})
claude_messages.append(claude_message)
# the claude not support system message in our endpoint, so we concatenate it at the first user message
if claude_messages[0]['role'] == "system":
claude_system_message_item = claude_messages[0]['content'][0]
claude_messages[1]['content'].insert(0, claude_system_message_item)
claude_messages.pop(0)
# headers = {
# "x-api-key": os.environ["ANTHROPIC_API_KEY"],
# "anthropic-version": "2023-06-01",
# "content-type": "application/json"
# }
headers = {
"Accept": "application / json",
"Authorization": "Bearer " + os.environ["ANTHROPIC_API_KEY"],
"User-Agent": "Apifox/1.0.0 (https://apifox.com)",
"Content-Type": "application/json"
}
payload = {
"model": self.model,
"max_tokens": max_tokens,
"messages": claude_messages,
"temperature": temperature,
"top_p": top_p
}
response = requests.post(
# "https://chat.claude.com/v1/chat/completions",
"https://api.aigcbest.top/v1/chat/completions",
headers=headers,
json=payload
)
if response.status_code != 200:
logger.error("Failed to call LLM: " + response.text)
time.sleep(5)
return ""
# else:
# return response.json()['content'][0]['text']
else:
return response.json()['choices'][0]['message']['content']
elif self.model.startswith("mistral"):
print("Call mistral")
messages = payload["messages"]
max_tokens = payload["max_tokens"]
top_p = payload["top_p"]
temperature = payload["temperature"]
misrtal_messages = []
for i, message in enumerate(messages):
mistral_message = {
"role": message["role"],
"content": []
"content": ""
}
for part in message["content"]:
mistral_message['content'] = part['text'] if part['type'] == "text" else None
mistral_message['content'] = part['text'] if part['type'] == "text" else ""
misrtal_messages.append(mistral_message)
# the mistral not support system message in our endpoint, so we concatenate it at the first user message
if misrtal_messages[0]['role'] == "system":
misrtal_messages[1]['content'] = misrtal_messages[0]['content'] + "\n" + misrtal_messages[1]['content']
misrtal_messages.pop(0)
# openai.api_base = "http://localhost:8000/v1"
# openai.api_key = "test"
# response = openai.ChatCompletion.create(
# messages=misrtal_messages,
# model="Mixtral-8x7B-Instruct-v0.1"
# )
from openai import OpenAI
TOGETHER_API_KEY = "d011650e7537797148fb6170ec1e0be7ae75160375686fae02277136078e90d2"
client = OpenAI(api_key=TOGETHER_API_KEY,
client = OpenAI(api_key=os.environ["TOGETHER_API_KEY"],
base_url='https://api.together.xyz',
)
logger.info("Generating content with Mistral model: %s", self.model)
response = client.chat.completions.create(
messages=misrtal_messages,
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
max_tokens=1024
model=self.model,
max_tokens=max_tokens
)
try:
# return response['choices'][0]['message']['content']
return response.choices[0].message.content
except Exception as e:
print("Failed to call LLM: " + str(e))
return ""
elif self.model.startswith("THUDM"):
# THUDM/cogagent-chat-hf
print("Call CogAgent")
messages = payload["messages"]
max_tokens = payload["max_tokens"]
top_p = payload["top_p"]
temperature = payload["temperature"]
cog_messages = []
for i, message in enumerate(messages):
cog_message = {
"role": message["role"],
"content": []
}
for part in message["content"]:
if part['type'] == "image_url":
cog_message['content'].append(
{"type": "image_url", "image_url": {"url": part['image_url']['url']}})
if part['type'] == "text":
cog_message['content'].append({"type": "text", "text": part['text']})
cog_messages.append(cog_message)
# the cogagent not support system message in our endpoint, so we concatenate it at the first user message
if cog_messages[0]['role'] == "system":
cog_system_message_item = cog_messages[0]['content'][0]
cog_messages[1]['content'].insert(0, cog_system_message_item)
cog_messages.pop(0)
payload = {
"model": self.model,
"max_tokens": max_tokens,
"messages": cog_messages
}
base_url = "http://127.0.0.1:8000"
response = requests.post(f"{base_url}/v1/chat/completions", json=payload, stream=False)
if response.status_code == 200:
decoded_line = response.json()
content = decoded_line.get("choices", [{}])[0].get("message", "").get("content", "")
return content
else:
print("Failed to call LLM: ", response.status_code)
return ""
elif self.model.startswith("gemini"):
def encoded_img_to_pil_img(data_str):
base64_str = data_str.replace("data:image/png;base64,", "")
@@ -615,6 +666,8 @@ class GPT4v_Agent:
messages = payload["messages"]
max_tokens = payload["max_tokens"]
top_p = payload["top_p"]
temperature = payload["temperature"]
gemini_messages = []
for i, message in enumerate(messages):
@@ -645,24 +698,45 @@ class GPT4v_Agent:
gemini_messages[1]['parts'][0] = gemini_messages[0]['parts'][0] + "\n" + gemini_messages[1]['parts'][0]
gemini_messages.pop(0)
print(gemini_messages)
# since the gemini-pro-vision donnot support multi-turn message
if self.model == "gemini-pro-vision":
message_history_str = ""
for message in gemini_messages:
message_history_str += "<|" + message['role'] + "|>\n" + message['parts'][0] + "\n"
gemini_messages = [{"role": "user", "parts": [message_history_str, gemini_messages[-1]['parts'][1]]}]
# gemini_messages[-1]['parts'][1].save("output.png", "PNG")
# print(gemini_messages)
api_key = os.environ.get("GENAI_API_KEY")
assert api_key is not None, "Please set the GENAI_API_KEY environment variable"
genai.configure(api_key=api_key)
logger.info("Generating content with Gemini model: %s", self.model)
response = genai.GenerativeModel(self.model).generate_content(
gemini_messages,
generation_config={
"max_output_tokens": max_tokens
"candidate_count": 1,
"max_output_tokens": max_tokens,
"top_p": top_p,
"temperature": temperature
},
safety_settings={
"harassment": "block_none",
"hate": "block_none",
"sex": "block_none",
"danger": "block_none"
}
)
try:
return response.text
except Exception as e:
logger.error("Meet exception when calling Gemini API, " + str(e))
return ""
elif self.model.startswith("qwen"):
messages = payload["messages"]
max_tokens = payload["max_tokens"]
top_p = payload["top_p"]
temperature = payload["temperature"]
qwen_messages = []
@@ -673,13 +747,16 @@ class GPT4v_Agent:
}
assert len(message["content"]) in [1, 2], "One text, or one text with one image"
for part in message["content"]:
qwen_message['content'].append({"image": part['image_url']['url']}) if part['type'] == "image_url" else None
qwen_message['content'].append({"image": part['image_url']['url']}) if part[
'type'] == "image_url" else None
qwen_message['content'].append({"text": part['text']}) if part['type'] == "text" else None
qwen_messages.append(qwen_message)
response = dashscope.MultiModalConversation.call(model='qwen-vl-plus',
messages=messages)
response = dashscope.MultiModalConversation.call(
model='qwen-vl-plus',
messages=messages, # todo: add the hyperparameters
)
# The response status_code is HTTPStatus.OK indicate success,
# otherwise indicate request is failed, you can get error code
# and message from code and message.
@@ -698,7 +775,7 @@ class GPT4v_Agent:
def parse_actions(self, response: str, masks=None):
if self.exp in ["screenshot", "a11y_tree", "both"]:
if self.observation_type in ["screenshot", "a11y_tree", "screenshot_a11y_tree"]:
# parse from the response
if self.action_space == "computer_13":
actions = parse_actions_from_string(response)
@@ -710,7 +787,7 @@ class GPT4v_Agent:
self.actions.append(actions)
return actions
elif self.exp in ["som", "seeact"]:
elif self.observation_type in ["som"]:
# parse from the response
if self.action_space == "computer_13":
raise ValueError("Invalid action space: " + self.action_space)
@@ -722,3 +799,8 @@ class GPT4v_Agent:
self.actions.append(actions)
return actions
def reset(self):
self.thoughts = []
self.actions = []
self.observations = []

View File

@@ -1,401 +0,0 @@
# --------------------------------------------------------
# X-Decoder -- Generalized Decoding for Pixel, Image, and Language
# Copyright (c) 2022 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Xueyan Zou (xueyan@cs.wisc.edu)
# --------------------------------------------------------
# Define Test/Trainer/Saving
PIPELINE: XDecoderPipeline
TRAINER: xdecoder
SAVE_DIR: '../../data/output/test'
base_path: "./"
# Resume Logistic
RESUME: false
WEIGHT: false
RESUME_FROM: ''
EVAL_AT_START: False
# Logging and Debug
WANDB: False
LOG_EVERY: 100
FIND_UNUSED_PARAMETERS: false
# Speed up training
FP16: false
PORT: '36873'
# misc
LOADER:
JOINT: False
KEY_DATASET: 'coco'
##################
# Task settings
##################
VERBOSE: true
MODEL:
NAME: seem_model_v1
HEAD: xdecoder_head
MASK_ON: false
KEYPOINT_ON: false
LOAD_PROPOSALS: false
DIM_PROJ: 512
TEXT:
ARCH: vlpencoder
NAME: transformer
TOKENIZER: clip
CONTEXT_LENGTH: 77 # 77
WIDTH: 512
HEADS: 8
LAYERS: 12 # 6
AUTOGRESSIVE: True
BACKBONE:
NAME: focal
PRETRAINED: ''
LOAD_PRETRAINED: false
FOCAL:
PRETRAIN_IMG_SIZE: 224
PATCH_SIZE: 4
EMBED_DIM: 192
DEPTHS: [2, 2, 18, 2]
FOCAL_LEVELS: [4, 4, 4, 4]
FOCAL_WINDOWS: [3, 3, 3, 3]
DROP_PATH_RATE: 0.3
MLP_RATIO: 4.0
DROP_RATE: 0.0
PATCH_NORM: True
USE_CONV_EMBED: True
SCALING_MODULATOR: True
USE_CHECKPOINT: False
USE_POSTLN: true
USE_POSTLN_IN_MODULATION: false
USE_LAYERSCALE: True
OUT_FEATURES: ["res2", "res3", "res4", "res5"]
OUT_INDICES: [0, 1, 2, 3]
ENCODER:
NAME: transformer_encoder_fpn
IGNORE_VALUE: 255
NUM_CLASSES: 133
LOSS_WEIGHT: 1.0
CONVS_DIM: 512
MASK_DIM: 512
NORM: "GN"
IN_FEATURES: ["res2", "res3", "res4", "res5"]
DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
COMMON_STRIDE: 4
TRANSFORMER_ENC_LAYERS: 6
DECODER:
NAME: seem_v1
TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
MASK:
ENABLED: True
DETECTION: False
SPATIAL:
ENABLED: True
MAX_ITER: 1
GROUNDING:
ENABLED: True
MAX_LEN: 5
TEXT_WEIGHT: 2.0
CLASS_WEIGHT: 0.5
RETRIEVAL:
ENABLED: False
LVIS:
ENABLED: True
THRES: 0.7
OPENIMAGE:
ENABLED: False
NEGATIVE_SAMPLES: 5
GROUNDING:
ENABLED: False
MAX_LEN: 5
CAPTION:
ENABLED: False
PHRASE_PROB: 0.5
SIM_THRES: 0.95
DEEP_SUPERVISION: True
NO_OBJECT_WEIGHT: 0.1
GCLASS_WEIGHT: 0.4
GMASK_WEIGHT: 1.0
GDICE_WEIGHT: 1.0
SCLASS_WEIGHT: 0.4
SMASK_WEIGHT: 1.0
SDICE_WEIGHT: 1.0
OCLASS_WEIGHT: 0.4
OMASK_WEIGHT: 1.0
ODICE_WEIGHT: 1.0
CLASS_WEIGHT: 2.0
MASK_WEIGHT: 5.0
DICE_WEIGHT: 5.0
BBOX_WEIGHT: 5.0
GIOU_WEIGHT: 2.0
CAPTION_WEIGHT: 2.0
COST_SPATIAL:
CLASS_WEIGHT: 5.0
MASK_WEIGHT: 2.0
DICE_WEIGHT: 2.0
HIDDEN_DIM: 512
NUM_OBJECT_QUERIES: 101
NHEADS: 8
DROPOUT: 0.0
DIM_FEEDFORWARD: 2048
MAX_SPATIAL_LEN: [512, 512, 512, 512]
# ENC_LAYERS: 0
PRE_NORM: False
ENFORCE_INPUT_PROJ: False
SIZE_DIVISIBILITY: 32
TRAIN_NUM_POINTS: 12544
OVERSAMPLE_RATIO: 3.0
IMPORTANCE_SAMPLE_RATIO: 0.75
DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query
TOP_GROUNDING_LAYERS: 10
TOP_CAPTION_LAYERS: 10
TOP_SPATIAL_LAYERS: 10
TOP_OPENIMAGE_LAYERS: 10
TEST:
SEMANTIC_ON: True
INSTANCE_ON: True
PANOPTIC_ON: True
OVERLAP_THRESHOLD: 0.8
OBJECT_MASK_THRESHOLD: 0.8
SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false
# Spatial sampler
STROKE_SAMPLER:
MAX_CANDIDATE: 1
CANDIDATE_PROBS: [0.25, 0.25, 0.25, 0.25] # for training only
CANDIDATE_NAMES: ["Point", "Polygon", "Scribble", "Circle"]
DILATION: 3
CIRCLE:
NUM_STROKES: 5
STROKE_PRESET: ['object_like', 'object_like_middle', 'object_like_small']
STROKE_PROB: [0.33, 0.33, 0.33]
SCRIBBLE:
NUM_STROKES: 5
STROKE_PRESET: ['rand_curve', 'rand_curve_small']
STROKE_PROB: [0.5, 0.5]
POINT:
NUM_POINTS: 20
POLYGON:
MAX_POINTS: 9
EVAL:
MODE: 'best' # best/random/best_random
NEGATIVE: False
MAX_ITER: 20
IOU_ITER: 1
GROUNDING: False
# Multi-modal Architecture, order matters
ATTENTION_ARCH:
VARIABLE:
queries: ['object', 'grounding', 'spatial']
tokens: ['grounding', 'spatial']
memories: ['spatial']
SELF_ATTENTION:
queries:
object: ['queries_object']
grounding: ['queries_grounding', 'tokens_grounding']
spatial: ['queries_spatial', 'tokens_spatial', 'memories_spatial']
tokens:
grounding: ['queries_grounding', 'tokens_grounding']
spatial: ['tokens_spatial']
memories:
spatial: ['memories_spatial']
CROSS_ATTENTION:
queries:
object: True
grounding: True
spatial: True
memories:
spatial: True
tokens:
grounding: False
spatial: False
MASKING: ['tokens_spatial', 'tokens_grounding']
DUPLICATION:
queries:
grounding: 'queries_object'
spatial: 'queries_object'
SPATIAL_MEMORIES: 32
QUERY_NUMBER: 3
DATASETS:
TRAIN: ["coco_2017_train_panoptic_filtrefgumdval_with_sem_seg_caption_grounding_lvis",]
# TRAIN: ["coco_2017_train_panoptic_with_sem_seg_caption_grounding",]
TEST: ["coco_2017_val_panoptic_with_sem_seg", "pascalvoc_val_Point", "refcocog_val_umd"] # to evaluate instance and semantic performance as well
# TEST: ["pascalvoc_val_Point"] # [pascalvoc, openimage600, ade600, davis, cocomini], [Point, Scribble, Polygon, Circle, Box]
# TEST: ["cocomini_val_Point", "cocomini_val_Circle", "cocomini_val_Scribble", "cocomini_val_Polygon", "cocomini_val_Box"] # [pascalvoc, openimage600, ade600, davis, cocomini], [Point, Scribble, Polygon, Circle, Box]
# TEST: ["ade600_val_Point", "ade600_val_Circle", "ade600_val_Scribble", "ade600_val_Polygon", "ade600_val_Box"] # [pascalvoc, openimage600, ade600, davis, cocomini], [Point, Scribble, Polygon, Circle, Box]
# TEST: ["openimage600_val_Point", "openimage600_val_Circle", "openimage600_val_Scribble", "openimage600_val_Polygon", "openimage600_val_Box"] # [pascalvoc, openimage600, ade600, davis, cocomini], [Point, Scribble, Polygon, Circle, Box]
CLASS_CONCAT: false
SIZE_DIVISIBILITY: 32
PROPOSAL_FILES_TRAIN: []
INPUT:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
TRAIN:
ASPECT_RATIO_GROUPING: true
BATCH_SIZE_TOTAL: 4
BATCH_SIZE_PER_GPU: 4
SHUFFLE: true
TEST:
DETECTIONS_PER_IMAGE: 100
NAME: coco_eval
IOU_TYPE: ['bbox', 'segm']
USE_MULTISCALE: false
BATCH_SIZE_TOTAL: 8
MODEL_FILE: ''
AUG:
ENABLED: False
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 8
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True
COCO:
INPUT:
MIN_SIZE_TRAIN: 800
MAX_SIZE_TRAIN: 1333
MIN_SIZE_TRAIN_SAMPLING: 'choice'
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
IMAGE_SIZE: 1024
MIN_SCALE: 0.1
MAX_SCALE: 2.0
DATASET_MAPPER_NAME: "coco_interactive"
IGNORE_VALUE: 255
COLOR_AUG_SSD: False
SIZE_DIVISIBILITY: 32
RANDOM_FLIP: "horizontal"
MASK_FORMAT: "polygon"
FORMAT: "RGB"
CROP:
ENABLED: True
DATASET:
DATASET: 'coco'
# Validation dataset
ADE20K:
INPUT:
MIN_SIZE_TRAIN: 640
MIN_SIZE_TRAIN_SAMPLING: "choice"
MIN_SIZE_TEST: 640
MAX_SIZE_TRAIN: 2560
MAX_SIZE_TEST: 2560
MASK_FORMAT: "polygon"
CROP:
ENABLED: True
TYPE: "absolute"
SIZE: (640, 640)
SINGLE_CATEGORY_MAX_AREA: 1.0
COLOR_AUG_SSD: True
SIZE_DIVISIBILITY: 640 # used in dataset mapper
DATASET_MAPPER_NAME: "mask_former_panoptic"
FORMAT: "RGB"
DATASET:
DATASET: 'ade'
SBD:
INPUT:
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 1
VOC:
INPUT:
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 8
DAVIS:
INPUT:
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 8
VOS:
INPUT:
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 1
REF:
INPUT:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
MIN_SIZE_TEST: 512
MAX_SIZE_TEST: 1024
FORMAT: "RGB"
SPATIAL: False
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 4
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 8
# Detectron2 training config for optimizer and lr scheduler
SOLVER:
BASE_LR: 0.0001
STEPS: [0.88889, 0.96296]
MAX_ITER: 1
GAMMA: 0.1
WARMUP_FACTOR: 1.0
WARMUP_ITERS: 10
WARMUP_METHOD: "linear"
WEIGHT_DECAY: 0.05
OPTIMIZER: "ADAMW"
LR_SCHEDULER_NAME: "WarmupMultiStepLR"
LR_MULTIPLIER:
backbone: 0.1
lang_encoder: 0.1
FIX_PARAM:
backbone: True
lang_encoder: True
pixel_decoder: True
WEIGHT_DECAY_NORM: 0.0
WEIGHT_DECAY_EMBED: 0.0
CLIP_GRADIENTS:
ENABLED: True
CLIP_TYPE: "full_model"
CLIP_VALUE: 5.0 # 0.01
NORM_TYPE: 2.0
MAX_NUM_EPOCHS: 50

View File

@@ -1,524 +0,0 @@
# ------------------------------------------------------------------------
# Semantic SAM
# Copyright (c) MicroSoft, Inc. and its affiliates.
# Modified from OpenSeed https://github.com/IDEA-Research/OpenSeed by Feng Li.
# ------------------------------------------------------------------------
##################
# Task settings
##################
WEIGHT: ''
PORT: 53711
VERBOSE: true
OUTPUT_DIR: '../../data/output/test'
# misc
LOADER:
JOINT: True
KEY_DATASET: 'coco'
# model
MODEL:
NAME: interactive_mask_dino
HEAD: general_head
MASK_ON: false
KEYPOINT_ON: false
LOAD_PROPOSALS: false
DIM_PROJ: 512
BACKBONE_DIM: 768
BACKGROUND: False
WEIGHTS: ''
TEXT:
ARCH: noencoder # no language encoder for training only sa-1b data
NAME: transformer
TOKENIZER: clip
CONTEXT_LENGTH: 18 # 77
WIDTH: 512
HEADS: 8
LAYERS: 12 # 6
AUTOGRESSIVE: True
BACKBONE:
NAME: swin
PRETRAINED: 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth'
LOAD_PRETRAINED: true
SWIN:
PRETRAIN_IMG_SIZE: 384
PATCH_SIZE: 4
EMBED_DIM: 192
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 6, 12, 24, 48 ]
WINDOW_SIZE: 12
MLP_RATIO: 4.0
QKV_BIAS: true
QK_SCALE: ~
DROP_RATE: 0.0
ATTN_DROP_RATE: 0.0
DROP_PATH_RATE: 0.3
APE: false
PATCH_NORM: true
USE_CHECKPOINT: false
OUT_FEATURES: [ 'res2', 'res3', 'res4', 'res5' ]
ENCODER:
NAME: encoder_deform
IGNORE_VALUE: 255
NUM_CLASSES: 1
LOSS_WEIGHT: 1.0
CONVS_DIM: 256
MASK_DIM: 256
NORM: "GN"
IN_FEATURES: [ "res2", "res3", "res4", "res5" ]
DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: [ "res3", "res4", "res5" ]
COMMON_STRIDE: 4
TRANSFORMER_ENC_LAYERS: 6
TOTAL_NUM_FEATURE_LEVELS: 4
NUM_FEATURE_LEVELS: 3
FEATURE_ORDER: "low2high"
DECODER:
NAME: interactive_mask_dino
TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
MASK: True
BOX: True
PART: True
GROUNDING:
ENABLED: False
MAX_LEN: 5
TEXT_WEIGHT: 2.0
CLASS_WEIGHT: 0.5
CAPTION:
ENABLED: False
PHRASE_PROB: 0.0
SIM_THRES: 0.95
CAPTIONING:
ENABLED: False
STEP: 50
RETRIEVAL:
ENABLED: False
DIM_IMG: 768
ENSEMBLE: True
OPENIMAGE:
ENABLED: False
NEGATIVE_SAMPLES: 5
GROUNDING:
ENABLED: False
MAX_LEN: 5
DEEP_SUPERVISION: True
NO_OBJECT_WEIGHT: 0.1
CLASS_WEIGHT: 4.0
MASK_WEIGHT: 5.0
DICE_WEIGHT: 5.0
BOX_WEIGHT: 5.0
GIOU_WEIGHT: 2.0
IOU_WEIGHT: 1.0
COST_CLASS_WEIGHT: 4.0
COST_DICE_WEIGHT: 5.0
COST_MASK_WEIGHT: 5.0
COST_BOX_WEIGHT: 5.0
COST_GIOU_WEIGHT: 2.0
HIDDEN_DIM: 256
NUM_OBJECT_QUERIES: 0
NHEADS: 8
DROPOUT: 0.0
DIM_FEEDFORWARD: 2048
ENC_LAYERS: 0
PRE_NORM: False
ENFORCE_INPUT_PROJ: False
SIZE_DIVISIBILITY: 32
DEC_LAYERS: 9 # 9 decoder layers, add one for the loss on learnable query
TRAIN_NUM_POINTS: 12544
OVERSAMPLE_RATIO: 3.0
IMPORTANCE_SAMPLE_RATIO: 0.75
TWO_STAGE: False
INITIALIZE_BOX_TYPE: 'no'
DN: seg
DN_NOISE_SCALE: 0.4
DN_NUM: 100
INITIAL_PRED: False
LEARN_TGT: False
TOTAL_NUM_FEATURE_LEVELS: 4
SEMANTIC_CE_LOSS: False
PANO_BOX_LOSS: False
COCO: False
O365: False
SAM: True
PASCAL: False
RE_POINT: True
NUM_INTERACTIVE_TOKENS: 6
MAX_NUM_INSTANCE: 60
TEST:
SEMANTIC_ON: True
INSTANCE_ON: True
PANOPTIC_ON: True
BOX_INTERACTIVE: False
CLASSIFICATION_ON: False
OVERLAP_THRESHOLD: 0.8
OBJECT_MASK_THRESHOLD: 0.25
SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false
TEST_FOUCUS_ON_BOX: False
PANO_TRANSFORM_EVAL: True
PANO_TEMPERATURE: 0.06
TEST:
EVAL_PERIOD: 500000
PRECISE_BN:
NUM_ITER: 1
ENABLED: False
AUG:
ENABLED: False
SAM:
INPUT:
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
IMAGE_SIZE: 1024
MIN_SCALE: 0.99
MAX_SCALE: 1.01
DATASET_MAPPER_NAME: "sam"
IGNORE_VALUE: 255
COLOR_AUG_SSD: False
SIZE_DIVISIBILITY: 32
RANDOM_FLIP: "horizontal"
MASK_FORMAT: "polygon"
FORMAT: "RGB"
CROP:
ENABLED: True
DATASET:
DATASET: 'sam'
TEST:
DETECTIONS_PER_IMAGE: 100
NAME: coco_eval
IOU_TYPE: ['bbox', 'segm']
USE_MULTISCALE: false
BATCH_SIZE_TOTAL: 8
MODEL_FILE: ''
AUG:
ENABLED: False
TRAIN:
BATCH_SIZE_TOTAL: 1
BATCH_SIZE_PER_GPU: 1
SHUFFLE: true
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 4
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True
COCO:
INPUT:
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
IMAGE_SIZE: 1024
MIN_SCALE: 0.1
MAX_SCALE: 2.0
DATASET_MAPPER_NAME: "coco_interactive_panoptic_lsj"
IGNORE_VALUE: 255
COLOR_AUG_SSD: False
SIZE_DIVISIBILITY: 32
RANDOM_FLIP: "horizontal"
MASK_FORMAT: "polygon"
FORMAT: "RGB"
CROP:
ENABLED: True
DATASET:
DATASET: 'coco'
TEST:
DETECTIONS_PER_IMAGE: 100
NAME: coco_eval
IOU_TYPE: ['bbox', 'segm']
USE_MULTISCALE: false
BATCH_SIZE_TOTAL: 1
MODEL_FILE: ''
AUG:
ENABLED: False
TRAIN:
BATCH_SIZE_TOTAL: 1
BATCH_SIZE_PER_GPU: 1
SHUFFLE: true
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 2
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True
VLP:
INPUT:
IMAGE_SIZE: 224
DATASET_MAPPER_NAME: "vlpretrain"
IGNORE_VALUE: 255
COLOR_AUG_SSD: False
SIZE_DIVISIBILITY: 32
MASK_FORMAT: "polygon"
FORMAT: "RGB"
CROP:
ENABLED: True
TRAIN:
BATCH_SIZE_TOTAL: 2
BATCH_SIZE_PER_GPU: 2
TEST:
BATCH_SIZE_TOTAL: 256
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 16
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True
INPUT:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
DATASETS:
TRAIN: ["sam_train"]
# interactive segmentation evaluation.
TEST: ["coco_2017_val_panoptic_with_sem_seg_interactive_jointboxpoint"]
# TEST: ["sam_minival"]
CLASS_CONCAT: false
SIZE_DIVISIBILITY: 32
PROPOSAL_FILES_TRAIN: []
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 16
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True
# Detectron2 training config for optimizer and lr scheduler
SOLVER:
BASE_LR_END: 0.0
MOMENTUM: 0.9
NESTEROV: False
CHECKPOINT_PERIOD: 5000
IMS_PER_BATCH: 1
REFERENCE_WORLD_SIZE: 0
BIAS_LR_FACTOR: 1.0
WEIGHT_DECAY_BIAS: None
# original
BASE_LR: 0.0001
STEPS: [327778, 355092]
MAX_ITER: 368750
GAMMA: 0.1
WARMUP_FACTOR: 1.0
WARMUP_ITERS: 10
WARMUP_METHOD: "linear"
WEIGHT_DECAY: 0.05
OPTIMIZER: "ADAMW"
LR_SCHEDULER_NAME: "WarmupMultiStepLR"
LR_MULTIPLIER:
backbone: 0.1
lang_encoder: 0.1
WEIGHT_DECAY_NORM: 0.0
WEIGHT_DECAY_EMBED: 0.0
CLIP_GRADIENTS:
ENABLED: True
CLIP_TYPE: "full_model"
CLIP_VALUE: 0.01
NORM_TYPE: 2.0
AMP:
ENABLED: True
# Evaluation Dataset
ADE20K:
INPUT:
MIN_SIZE_TRAIN: [320, 384, 448, 512, 576, 640, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280]
MIN_SIZE_TRAIN_SAMPLING: "choice"
MIN_SIZE_TEST: 640
MAX_SIZE_TRAIN: 2560
MAX_SIZE_TEST: 2560
MASK_FORMAT: "polygon"
CROP:
ENABLED: True
TYPE: "absolute"
SIZE: [640, 640]
SINGLE_CATEGORY_MAX_AREA: 1.0
IGNORE_VALUE: 255
COLOR_AUG_SSD: True
SIZE_DIVISIBILITY: 640 # used in dataset mapper
DATASET_MAPPER_NAME: "mask_former_panoptic"
FORMAT: "RGB"
DATASET:
DATASET: 'ade'
TRAIN:
ASPECT_RATIO_GROUPING: true
BATCH_SIZE_TOTAL: 16
BATCH_SIZE_PER_GPU: 2
SHUFFLE: true
TEST:
DETECTIONS_PER_IMAGE: 100
NAME: coco_eval
IOU_TYPE: ['bbox', 'segm']
USE_MULTISCALE: false
BATCH_SIZE_TOTAL: 8
MODEL_FILE: ''
AUG:
ENABLED: False
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 8
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True
#ADE20K:
# INPUT:
# MIN_SIZE_TRAIN: 640
# MIN_SIZE_TRAIN_SAMPLING: "choice"
# MIN_SIZE_TEST: 640
# MAX_SIZE_TRAIN: 2560
# MAX_SIZE_TEST: 2560
# MASK_FORMAT: "polygon"
# CROP:
# ENABLED: True
# TYPE: "absolute"
# SIZE: (640, 640)
# SINGLE_CATEGORY_MAX_AREA: 1.0
# COLOR_AUG_SSD: True
# SIZE_DIVISIBILITY: 640 # used in dataset mapper
# DATASET_MAPPER_NAME: "mask_former_panoptic"
# FORMAT: "RGB"
# DATASET:
# DATASET: 'ade'
# TEST:
# BATCH_SIZE_TOTAL: 8
REF:
INPUT:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
MIN_SIZE_TEST: 512
MAX_SIZE_TEST: 1024
FORMAT: "RGB"
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 8
SUN:
INPUT:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
MIN_SIZE_TEST: 512
MAX_SIZE_TEST: 1024
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 8
SCAN:
INPUT:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
MIN_SIZE_TEST: 512
MAX_SIZE_TEST: 1024
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 8
BDD:
INPUT:
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 0
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: False
TEST:
BATCH_SIZE_TOTAL: 8
CITY:
INPUT:
MIN_SIZE_TRAIN: [ 512, 614, 716, 819, 921, 1024, 1126, 1228, 1331, 1433, 1536, 1638, 1740, 1843, 1945, 2048 ]
MIN_SIZE_TRAIN_SAMPLING: "choice"
MIN_SIZE_TEST: 1024
MAX_SIZE_TRAIN: 4096
MAX_SIZE_TEST: 2048
CROP:
ENABLED: True
TYPE: "absolute"
SIZE: [ 512, 1024 ]
SINGLE_CATEGORY_MAX_AREA: 1.0
IGNORE_VALUE: 255
COLOR_AUG_SSD: True
SIZE_DIVISIBILITY: -1
FORMAT: "RGB"
DATASET_MAPPER_NAME: "mask_former_panoptic"
MASK_FORMAT: "polygon"
TEST:
EVAL_PERIOD: 5000
BATCH_SIZE_TOTAL: 1
AUG:
ENABLED: False
MIN_SIZES: [ 512, 768, 1024, 1280, 1536, 1792 ]
MAX_SIZE: 4096
FLIP: True
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: True
NUM_WORKERS: 2
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True
TRAIN:
ASPECT_RATIO_GROUPING: true
BATCH_SIZE_TOTAL: 2
BATCH_SIZE_PER_GPU: 2
SHUFFLE: true
PSACAL_PART:
INPUT:
MIN_SIZE_TEST: 800
MAX_SIZE_TEST: 1333
IMAGE_SIZE: 1024
MIN_SCALE: 0.1
MAX_SCALE: 2.0
DATASET_MAPPER_NAME: "pascal_part_lsj"
IGNORE_VALUE: 255
COLOR_AUG_SSD: False
SIZE_DIVISIBILITY: 32
RANDOM_FLIP: "horizontal"
MASK_FORMAT: "polygon"
FORMAT: "RGB"
CROP:
ENABLED: True
MODEL:
MASK_ON: True
KEYPOINT_ON: False
LOAD_PROPOSALS: False
# DATASET:
# DATASET: 'coco'
TEST:
DETECTIONS_PER_IMAGE: 100
NAME: coco_eval
IOU_TYPE: ['bbox', 'segm']
USE_MULTISCALE: false
BATCH_SIZE_TOTAL: 8
MODEL_FILE: ''
AUG:
ENABLED: False
TRAIN:
BATCH_SIZE_TOTAL: 1
BATCH_SIZE_PER_GPU: 1
SHUFFLE: true
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
NUM_WORKERS: 2
LOAD_PROPOSALS: False
SAMPLER_TRAIN: "TrainingSampler"
ASPECT_RATIO_GROUPING: True

View File

@@ -0,0 +1,405 @@
import os
import gc
import time
import base64
from contextlib import asynccontextmanager
from typing import List, Literal, Union, Tuple, Optional
import torch
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from loguru import logger
from pydantic import BaseModel, Field
from sse_starlette.sse import EventSourceResponse
from transformers import AutoModelForCausalLM, LlamaTokenizer, PreTrainedModel, PreTrainedTokenizer, \
TextIteratorStreamer
from PIL import Image
from io import BytesIO
MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/cogvlm-chat-hf')
TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", 'lmsys/vicuna-7b-v1.5')
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
if os.environ.get('QUANT_ENABLED'):
QUANT_ENABLED = True
else:
with torch.cuda.device(DEVICE):
__, total_bytes = torch.cuda.mem_get_info()
total_gb = total_bytes / (1 << 30)
if total_gb < 40:
QUANT_ENABLED = True
else:
QUANT_ENABLED = False
@asynccontextmanager
async def lifespan(app: FastAPI):
"""
An asynchronous context manager for managing the lifecycle of the FastAPI app.
It ensures that GPU memory is cleared after the app's lifecycle ends, which is essential for efficient resource management in GPU environments.
"""
yield
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class ModelCard(BaseModel):
"""
A Pydantic model representing a model card, which provides metadata about a machine learning model.
It includes fields like model ID, owner, and creation time.
"""
id: str
object: str = "model"
created: int = Field(default_factory=lambda: int(time.time()))
owned_by: str = "owner"
root: Optional[str] = None
parent: Optional[str] = None
permission: Optional[list] = None
class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = []
class ImageUrl(BaseModel):
url: str
class TextContent(BaseModel):
type: Literal["text"]
text: str
class ImageUrlContent(BaseModel):
type: Literal["image_url"]
image_url: ImageUrl
ContentItem = Union[TextContent, ImageUrlContent]
class ChatMessageInput(BaseModel):
role: Literal["user", "assistant", "system"]
content: Union[str, List[ContentItem]]
name: Optional[str] = None
class ChatMessageResponse(BaseModel):
role: Literal["assistant"]
content: str = None
name: Optional[str] = None
class DeltaMessage(BaseModel):
role: Optional[Literal["user", "assistant", "system"]] = None
content: Optional[str] = None
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessageInput]
temperature: Optional[float] = 0.8
top_p: Optional[float] = 0.8
max_tokens: Optional[int] = None
stream: Optional[bool] = False
# Additional parameters
repetition_penalty: Optional[float] = 1.0
class ChatCompletionResponseChoice(BaseModel):
index: int
message: ChatMessageResponse
class ChatCompletionResponseStreamChoice(BaseModel):
index: int
delta: DeltaMessage
class UsageInfo(BaseModel):
prompt_tokens: int = 0
total_tokens: int = 0
completion_tokens: Optional[int] = 0
class ChatCompletionResponse(BaseModel):
model: str
object: Literal["chat.completion", "chat.completion.chunk"]
choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]]
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
usage: Optional[UsageInfo] = None
@app.get("/v1/models", response_model=ModelList)
async def list_models():
"""
An endpoint to list available models. It returns a list of model cards.
This is useful for clients to query and understand what models are available for use.
"""
model_card = ModelCard(id="cogvlm-chat-17b") # can be replaced by your model id like cogagent-chat-18b
return ModelList(data=[model_card])
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(request: ChatCompletionRequest):
global model, tokenizer
if len(request.messages) < 1 or request.messages[-1].role == "assistant":
raise HTTPException(status_code=400, detail="Invalid request")
gen_params = dict(
messages=request.messages,
temperature=request.temperature,
top_p=request.top_p,
max_tokens=request.max_tokens or 1024,
echo=False,
stream=request.stream,
)
if request.stream:
generate = predict(request.model, gen_params)
return EventSourceResponse(generate, media_type="text/event-stream")
response = generate_cogvlm(model, tokenizer, gen_params)
usage = UsageInfo()
message = ChatMessageResponse(
role="assistant",
content=response["text"],
)
logger.debug(f"==== message ====\n{message}")
choice_data = ChatCompletionResponseChoice(
index=0,
message=message,
)
task_usage = UsageInfo.model_validate(response["usage"])
for usage_key, usage_value in task_usage.model_dump().items():
setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
return ChatCompletionResponse(model=request.model, choices=[choice_data], object="chat.completion", usage=usage)
async def predict(model_id: str, params: dict):
"""
Handle streaming predictions. It continuously generates responses for a given input stream.
This is particularly useful for real-time, continuous interactions with the model.
"""
global model, tokenizer
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(role="assistant"),
finish_reason=None
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.model_dump_json(exclude_unset=True))
previous_text = ""
for new_response in generate_stream_cogvlm(model, tokenizer, params):
decoded_unicode = new_response["text"]
delta_text = decoded_unicode[len(previous_text):]
previous_text = decoded_unicode
delta = DeltaMessage(
content=delta_text,
role="assistant",
)
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=delta,
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.model_dump_json(exclude_unset=True))
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(),
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.model_dump_json(exclude_unset=True))
def generate_cogvlm(model: PreTrainedModel, tokenizer: PreTrainedTokenizer, params: dict):
"""
Generates a response using the CogVLM model. It processes the chat history and image data, if any,
and then invokes the model to generate a response.
"""
for response in generate_stream_cogvlm(model, tokenizer, params):
pass
return response
def process_history_and_images(messages: List[ChatMessageInput]) -> Tuple[
Optional[str], Optional[List[Tuple[str, str]]], Optional[List[Image.Image]]]:
"""
Process history messages to extract text, identify the last user query,
and convert base64 encoded image URLs to PIL images.
Args:
messages(List[ChatMessageInput]): List of ChatMessageInput objects.
return: A tuple of three elements:
- The last user query as a string.
- Text history formatted as a list of tuples for the model.
- List of PIL Image objects extracted from the messages.
"""
formatted_history = []
image_list = []
last_user_query = ''
for i, message in enumerate(messages):
role = message.role
content = message.content
if isinstance(content, list): # text
text_content = ' '.join(item.text for item in content if isinstance(item, TextContent))
else:
text_content = content
if isinstance(content, list): # image
for item in content:
if isinstance(item, ImageUrlContent):
image_url = item.image_url.url
if image_url.startswith("data:image/jpeg;base64,"):
base64_encoded_image = image_url.split("data:image/jpeg;base64,")[1]
image_data = base64.b64decode(base64_encoded_image)
image = Image.open(BytesIO(image_data)).convert('RGB')
image_list.append(image)
elif image_url.startswith("data:image/png;base64,"):
base64_encoded_image = image_url.split("data:image/png;base64,")[1]
image_data = base64.b64decode(base64_encoded_image)
image = Image.open(BytesIO(image_data)).convert('RGB')
image_list.append(image)
if role == 'user':
if i == len(messages) - 1: # 最后一条用户消息
last_user_query = text_content
else:
formatted_history.append((text_content, ''))
elif role == 'assistant':
if formatted_history:
if formatted_history[-1][1] != '':
assert False, f"the last query is answered. answer again. {formatted_history[-1][0]}, {formatted_history[-1][1]}, {text_content}"
formatted_history[-1] = (formatted_history[-1][0], text_content)
else:
assert False, f"assistant reply before user"
else:
assert False, f"unrecognized role: {role}"
return last_user_query, formatted_history, image_list
@torch.inference_mode()
def generate_stream_cogvlm(model: PreTrainedModel, tokenizer: PreTrainedTokenizer, params: dict):
"""
Generates a stream of responses using the CogVLM model in inference mode.
It's optimized to handle continuous input-output interactions with the model in a streaming manner.
"""
messages = params["messages"]
temperature = float(params.get("temperature", 1.0))
repetition_penalty = float(params.get("repetition_penalty", 1.0))
top_p = float(params.get("top_p", 1.0))
max_new_tokens = int(params.get("max_tokens", 256))
query, history, image_list = process_history_and_images(messages)
logger.debug(f"==== request ====\n{query}")
input_by_model = model.build_conversation_input_ids(tokenizer, query=query, history=history,
images=[image_list[-1]])
inputs = {
'input_ids': input_by_model['input_ids'].unsqueeze(0).to(DEVICE),
'token_type_ids': input_by_model['token_type_ids'].unsqueeze(0).to(DEVICE),
'attention_mask': input_by_model['attention_mask'].unsqueeze(0).to(DEVICE),
'images': [[input_by_model['images'][0].to(DEVICE).to(torch_type)]],
}
if 'cross_images' in input_by_model and input_by_model['cross_images']:
inputs['cross_images'] = [[input_by_model['cross_images'][0].to(DEVICE).to(torch_type)]]
input_echo_len = len(inputs["input_ids"][0])
streamer = TextIteratorStreamer(
tokenizer=tokenizer,
timeout=60.0,
skip_prompt=True,
skip_special_tokens=True
)
gen_kwargs = {
"repetition_penalty": repetition_penalty,
"max_new_tokens": max_new_tokens,
"do_sample": True if temperature > 1e-5 else False,
"top_p": top_p if temperature > 1e-5 else 0,
'streamer': streamer,
}
if temperature > 1e-5:
gen_kwargs["temperature"] = temperature
total_len = 0
generated_text = ""
with torch.no_grad():
model.generate(**inputs, **gen_kwargs)
for next_text in streamer:
generated_text += next_text
yield {
"text": generated_text,
"usage": {
"prompt_tokens": input_echo_len,
"completion_tokens": total_len - input_echo_len,
"total_tokens": total_len,
},
}
ret = {
"text": generated_text,
"usage": {
"prompt_tokens": input_echo_len,
"completion_tokens": total_len - input_echo_len,
"total_tokens": total_len,
},
}
yield ret
gc.collect()
torch.cuda.empty_cache()
if __name__ == "__main__":
tokenizer = LlamaTokenizer.from_pretrained(
TOKENIZER_PATH,
trust_remote_code=True)
if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8:
torch_type = torch.bfloat16
else:
torch_type = torch.float16
print("========Use torch type as:{} with device:{}========\n\n".format(torch_type, DEVICE))
if 'cuda' in DEVICE:
if QUANT_ENABLED:
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
load_in_4bit=True,
trust_remote_code=True,
torch_dtype=torch_type,
low_cpu_mem_usage=True
).eval()
else:
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
load_in_4bit=False,
trust_remote_code=True,
torch_dtype=torch_type,
low_cpu_mem_usage=True
).to(DEVICE).eval()
else:
model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, trust_remote_code=True).float().to(DEVICE).eval()
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)

View File

@@ -0,0 +1,7 @@
## Deploy CogAgent as server
```
python CogAgent.py
```
The CogAgent LLM will be deployed on http://127.0.0.1:8000

View File

@@ -1,13 +0,0 @@
# ------------------------------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------------------------------
# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
# ------------------------------------------------------------------------------------------------
# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
from .ms_deform_attn_func import MSDeformAttnFunction

View File

@@ -1,72 +0,0 @@
# ------------------------------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------------------------------
# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
# ------------------------------------------------------------------------------------------------
# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import torch
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
try:
import MultiScaleDeformableAttention as MSDA
except ModuleNotFoundError as e:
info_string = (
"\n\nPlease compile MultiScaleDeformableAttention CUDA op with the following commands:\n"
"\t`cd mask2former/modeling/pixel_decoder/ops`\n"
"\t`sh make.sh`\n"
)
raise ModuleNotFoundError(info_string)
class MSDeformAttnFunction(Function):
@staticmethod
def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
ctx.im2col_step = im2col_step
output = MSDA.ms_deform_attn_forward(
value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
return output
@staticmethod
@once_differentiable
def backward(ctx, grad_output):
value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
grad_value, grad_sampling_loc, grad_attn_weight = \
MSDA.ms_deform_attn_backward(
value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
# for debug and test only,
# need to use cuda version instead
N_, S_, M_, D_ = value.shape
_, Lq_, M_, L_, P_, _ = sampling_locations.shape
value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
sampling_grids = 2 * sampling_locations - 1
sampling_value_list = []
for lid_, (H_, W_) in enumerate(value_spatial_shapes):
# N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
# N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
# N_*M_, D_, Lq_, P_
sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
mode='bilinear', padding_mode='zeros', align_corners=False)
sampling_value_list.append(sampling_value_l_)
# (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
return output.transpose(1, 2).contiguous()

Some files were not shown because too many files have changed in this diff Show More