Merge branch 'main' into zdy
This commit is contained in:
@@ -20,7 +20,7 @@ todo
|
|||||||
- [x] Set up a pipeline and build agents implementation (zero-shot) for the task
|
- [x] Set up a pipeline and build agents implementation (zero-shot) for the task
|
||||||
- [x] Start to design on which tasks inside the DesktopENv to focus on, start to wrap up the environment to be public
|
- [x] Start to design on which tasks inside the DesktopENv to focus on, start to wrap up the environment to be public
|
||||||
- [x] Start to annotate the examples for ~~training~~ and testing
|
- [x] Start to annotate the examples for ~~training~~ and testing
|
||||||
- [ ] Error handling during file passing and file opening, etc.
|
- [x] Error handling during file passing and file opening, etc.
|
||||||
- [ ] Add accessibility tree from the OS into the observation space
|
- [x] Add accessibility tree from the OS into the observation space
|
||||||
- [ ] Add pre-process and post-process action support for benchmarking setup and evaluation
|
- [ ] Add pre-process and post-process action support for benchmarking setup and evaluation
|
||||||
- [ ] Multiprocess support, this can enable the reinforcement learning to be more efficient
|
- [ ] Multiprocess support, this can enable the reinforcement learning to be more efficient
|
||||||
@@ -197,8 +197,10 @@ class PythonController:
|
|||||||
if "text" not in parameters:
|
if "text" not in parameters:
|
||||||
raise Exception(f"Unknown parameters: {parameters}")
|
raise Exception(f"Unknown parameters: {parameters}")
|
||||||
# deal with special ' and \ characters
|
# deal with special ' and \ characters
|
||||||
text = parameters["text"].replace("\\", "\\\\").replace("'", "\\'")
|
# text = parameters["text"].replace("\\", "\\\\").replace("'", "\\'")
|
||||||
self.execute_python_command(f"pyautogui.typewrite('{text}')")
|
# self.execute_python_command(f"pyautogui.typewrite('{text}')")
|
||||||
|
text = parameters["text"]
|
||||||
|
self.execute_python_command("pyautogui.typewrite({:})".format(repr(text)))
|
||||||
|
|
||||||
elif action_type == "PRESS":
|
elif action_type == "PRESS":
|
||||||
if "key" not in parameters:
|
if "key" not in parameters:
|
||||||
@@ -237,6 +239,9 @@ class PythonController:
|
|||||||
keys_para_rep = "', '".join(keys)
|
keys_para_rep = "', '".join(keys)
|
||||||
self.execute_python_command(f"pyautogui.hotkey('{keys_para_rep}')")
|
self.execute_python_command(f"pyautogui.hotkey('{keys_para_rep}')")
|
||||||
|
|
||||||
|
elif action_type in ['WAIT', 'FAIL', 'DONE']:
|
||||||
|
pass
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Unknown action type: {action_type}")
|
raise Exception(f"Unknown action type: {action_type}")
|
||||||
|
|
||||||
@@ -280,3 +285,31 @@ class PythonController:
|
|||||||
else:
|
else:
|
||||||
logger.error("Failed to get wallpaper. Status code: %d", response.status_code)
|
logger.error("Failed to get wallpaper. Status code: %d", response.status_code)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_vm_desktop_path(self):
|
||||||
|
"""
|
||||||
|
Gets the desktop path of the vm.
|
||||||
|
"""
|
||||||
|
response = requests.post(self.http_server + "/desktop_path")
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.info("Desktop path downloaded successfully")
|
||||||
|
return response.json()["desktop_path"]
|
||||||
|
else:
|
||||||
|
logger.error("Failed to get desktop path. Status code: %d", response.status_code)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_vm_directory_tree(self, path):
|
||||||
|
"""
|
||||||
|
Gets the directory tree of the vm.
|
||||||
|
"""
|
||||||
|
payload = json.dumps({"path": path})
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
response = requests.post(self.http_server + "/list_directory", headers=headers, data=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.info("Directory tree downloaded successfully")
|
||||||
|
return response.json()["directory_tree"]
|
||||||
|
else:
|
||||||
|
logger.error("Failed to get directory tree. Status code: %d", response.status_code)
|
||||||
|
return None
|
||||||
@@ -1,18 +1,18 @@
|
|||||||
import json
|
import json
|
||||||
import time
|
import logging
|
||||||
import os.path
|
import os.path
|
||||||
|
import time
|
||||||
import traceback
|
import traceback
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from typing import Dict, List
|
|
||||||
from typing import Any, Union, Optional
|
from typing import Any, Union, Optional
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
from requests_toolbelt.multipart.encoder import MultipartEncoder
|
from requests_toolbelt.multipart.encoder import MultipartEncoder
|
||||||
|
|
||||||
from desktop_env.evaluators.metrics.utils import compare_urls
|
from desktop_env.evaluators.metrics.utils import compare_urls
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger("desktopenv.setup")
|
logger = logging.getLogger("desktopenv.setup")
|
||||||
|
|
||||||
|
|
||||||
@@ -20,6 +20,7 @@ class SetupController:
|
|||||||
def __init__(self, vm_ip: str, cache_dir: str):
|
def __init__(self, vm_ip: str, cache_dir: str):
|
||||||
self.vm_ip: str = vm_ip
|
self.vm_ip: str = vm_ip
|
||||||
self.http_server: str = f"http://{vm_ip}:5000"
|
self.http_server: str = f"http://{vm_ip}:5000"
|
||||||
|
self.http_server_setup_root: str = f"http://{vm_ip}:5000/setup"
|
||||||
self.cache_dir: str = cache_dir
|
self.cache_dir: str = cache_dir
|
||||||
|
|
||||||
def reset_cache_dir(self, cache_dir: str):
|
def reset_cache_dir(self, cache_dir: str):
|
||||||
@@ -57,31 +58,31 @@ class SetupController:
|
|||||||
# can add other setup steps
|
# can add other setup steps
|
||||||
|
|
||||||
# ZDY_COMMENT: merged with launch
|
# ZDY_COMMENT: merged with launch
|
||||||
#def _command_setup(self, command: str):
|
# def _command_setup(self, command: str):
|
||||||
#"""
|
# """
|
||||||
#Directly send a command into the virtual machine os for setting up.
|
# Directly send a command into the virtual machine os for setting up.
|
||||||
#"""
|
# """
|
||||||
#payload = json.dumps({"command": command})
|
# payload = json.dumps({"command": command})
|
||||||
#headers = {
|
# headers = {
|
||||||
#'Content-Type': 'application/json'
|
# 'Content-Type': 'application/json'
|
||||||
#}
|
# }
|
||||||
#timeout = 5
|
# timeout = 5
|
||||||
#timout_whitelist = ["vlc"]
|
# timout_whitelist = ["vlc"]
|
||||||
#
|
#
|
||||||
#try:
|
# try:
|
||||||
#
|
#
|
||||||
#response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
|
# response = requests.post(self.http_server + "/execute", headers=headers, data=payload, timeout=timeout)
|
||||||
#if response.status_code == 200:
|
# if response.status_code == 200:
|
||||||
#print("Command executed successfully:", response.text)
|
# print("Command executed successfully:", response.text)
|
||||||
#else:
|
# else:
|
||||||
#print("Failed to execute command. Status code:", response.status_code)
|
# print("Failed to execute command. Status code:", response.status_code)
|
||||||
#except requests.exceptions.Timeout as e:
|
# except requests.exceptions.Timeout as e:
|
||||||
#if command in timout_whitelist:
|
# if command in timout_whitelist:
|
||||||
#print("Command executed successfully:", command)
|
# print("Command executed successfully:", command)
|
||||||
#else:
|
# else:
|
||||||
#print("An error occurred while trying to execute the command:", e)
|
# print("An error occurred while trying to execute the command:", e)
|
||||||
#except requests.exceptions.RequestException as e:
|
# except requests.exceptions.RequestException as e:
|
||||||
#print("An error occurred while trying to execute the command:", e)
|
# print("An error occurred while trying to execute the command:", e)
|
||||||
|
|
||||||
def _download_setup(self, files: List[Dict[str, str]]):
|
def _download_setup(self, files: List[Dict[str, str]]):
|
||||||
"""
|
"""
|
||||||
@@ -224,9 +225,14 @@ class SetupController:
|
|||||||
except requests.exceptions.RequestException as e:
|
except requests.exceptions.RequestException as e:
|
||||||
logger.error("An error occurred while trying to send the request: %s", e)
|
logger.error("An error occurred while trying to send the request: %s", e)
|
||||||
|
|
||||||
def _execute_setup( self, command: List[str]
|
def _execute_setup(
|
||||||
, stdout: str = "", stderr: str = ""
|
self,
|
||||||
, shell: bool = False, until: Optional[Dict[str, Any]] = None):
|
command: List[str],
|
||||||
|
stdout: str = "",
|
||||||
|
stderr: str = "",
|
||||||
|
shell: bool = False,
|
||||||
|
until: Optional[Dict[str, Any]] = None
|
||||||
|
):
|
||||||
if not command:
|
if not command:
|
||||||
raise Exception("Empty comman to launch.")
|
raise Exception("Empty comman to launch.")
|
||||||
|
|
||||||
@@ -248,10 +254,10 @@ class SetupController:
|
|||||||
if stderr:
|
if stderr:
|
||||||
with open(os.path.join(self.cache_dir, stderr), "w") as f:
|
with open(os.path.join(self.cache_dir, stderr), "w") as f:
|
||||||
f.write(results["error"])
|
f.write(results["error"])
|
||||||
logger.info( "Command executed successfully: %s -> %s"
|
logger.info("Command executed successfully: %s -> %s"
|
||||||
, " ".join(command)
|
, " ".join(command)
|
||||||
, response.text
|
, response.text
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.error("Failed to launch application. Status code: %s", response.text)
|
logger.error("Failed to launch application. Status code: %s", response.text)
|
||||||
results = None
|
results = None
|
||||||
@@ -263,13 +269,13 @@ class SetupController:
|
|||||||
results = None
|
results = None
|
||||||
nb_failings += 1
|
nb_failings += 1
|
||||||
|
|
||||||
if len(until)==0:
|
if len(until) == 0:
|
||||||
terminates = True
|
terminates = True
|
||||||
elif results is not None:
|
elif results is not None:
|
||||||
terminates = "returncode" in until and results["returncode"]==until["returncode"]\
|
terminates = "returncode" in until and results["returncode"] == until["returncode"] \
|
||||||
or "stdout" in until and until["stdout"] in results["output"]\
|
or "stdout" in until and until["stdout"] in results["output"] \
|
||||||
or "stderr" in until and until["stderr"] in results["error"]
|
or "stderr" in until and until["stderr"] in results["error"]
|
||||||
terminates = terminates or nb_failings>=5
|
terminates = terminates or nb_failings >= 5
|
||||||
if not terminates:
|
if not terminates:
|
||||||
time.sleep(0.3)
|
time.sleep(0.3)
|
||||||
|
|
||||||
@@ -292,6 +298,25 @@ class SetupController:
|
|||||||
# TODO
|
# TODO
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
def _activate_window_setup(self, window_name: str):
|
||||||
|
if not window_name:
|
||||||
|
raise Exception(f"Setup Open - Invalid path ({window_name}).")
|
||||||
|
|
||||||
|
payload = json.dumps({"window_name": window_name})
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
}
|
||||||
|
|
||||||
|
# send request to server to open file
|
||||||
|
try:
|
||||||
|
response = requests.post(self.http_server + "/setup" + "/activate_window", headers=headers, data=payload)
|
||||||
|
if response.status_code == 200:
|
||||||
|
logger.info("Command executed successfully: %s", response.text)
|
||||||
|
else:
|
||||||
|
logger.error(f"Failed to activate window {window_name}. Status code: %s", response.text)
|
||||||
|
except requests.exceptions.RequestException as e:
|
||||||
|
logger.error("An error occurred while trying to send the request: %s", e)
|
||||||
|
|
||||||
# Chrome setup
|
# Chrome setup
|
||||||
def _chrome_open_tabs_setup(self, urls_to_open: List[str]):
|
def _chrome_open_tabs_setup(self, urls_to_open: List[str]):
|
||||||
host = self.vm_ip
|
host = self.vm_ip
|
||||||
|
|||||||
@@ -186,5 +186,18 @@ ACTION_SPACE = [
|
|||||||
"optional": False,
|
"optional": False,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
############################################################################################################
|
||||||
|
{
|
||||||
|
"action_type": "WAIT",
|
||||||
|
"note": "wait until the next action",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "FAIL",
|
||||||
|
"note": "decide the task can not be performed",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "DONE",
|
||||||
|
"note": "decide the task is done",
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,28 +1,30 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import tempfile
|
||||||
import time
|
import time
|
||||||
|
from typing import Callable, Any, Optional
|
||||||
# import uuid
|
# import uuid
|
||||||
# import platform
|
# import platform
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
from typing import Callable, Any, Optional
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
import gymnasium as gym
|
import gymnasium as gym
|
||||||
# import requests
|
|
||||||
|
|
||||||
from desktop_env.controllers.python import PythonController
|
from desktop_env.controllers.python import PythonController
|
||||||
from desktop_env.controllers.setup import SetupController
|
from desktop_env.controllers.setup import SetupController
|
||||||
# from desktop_env.evaluators import eval_funcs
|
# from desktop_env.evaluators import eval_funcs
|
||||||
from desktop_env.evaluators import metrics, getters
|
from desktop_env.evaluators import metrics, getters
|
||||||
|
|
||||||
import logging
|
# import requests
|
||||||
|
|
||||||
logger = logging.getLogger("desktopenv.env")
|
logger = logging.getLogger("desktopenv.env")
|
||||||
|
|
||||||
Metric = Callable[[Any, Any], float]
|
Metric = Callable[[Any, Any], float]
|
||||||
Getter = Callable[[gym.Env, Dict[str, Any]], Any]
|
Getter = Callable[[gym.Env, Dict[str, Any]], Any]
|
||||||
|
|
||||||
|
|
||||||
def _execute_command(command: List[str]) -> None:
|
def _execute_command(command: List[str]) -> None:
|
||||||
if command[:4] == ["vmrun", "-T", "ws", "start"]:
|
if command[:4] == ["vmrun", "-T", "ws", "start"]:
|
||||||
p = subprocess.Popen(command)
|
p = subprocess.Popen(command)
|
||||||
@@ -84,8 +86,8 @@ class DesktopEnv(gym.Env):
|
|||||||
self.setup_controller = SetupController(vm_ip=self.vm_ip, cache_dir=self.cache_dir)
|
self.setup_controller = SetupController(vm_ip=self.vm_ip, cache_dir=self.cache_dir)
|
||||||
|
|
||||||
# Meta info of the VM, move to the reset() function
|
# Meta info of the VM, move to the reset() function
|
||||||
self.vm_platform: str = "" # self.controller.get_vm_platform()
|
self.vm_platform: str = "" # self.controller.get_vm_platform()
|
||||||
self.vm_screen_size = None # self.controller.get_vm_screen_size()
|
self.vm_screen_size = None # self.controller.get_vm_screen_size()
|
||||||
|
|
||||||
# mode: human or machine
|
# mode: human or machine
|
||||||
assert action_space in ["computer_13", "pyautogui"]
|
assert action_space in ["computer_13", "pyautogui"]
|
||||||
@@ -164,7 +166,7 @@ class DesktopEnv(gym.Env):
|
|||||||
self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
|
self.evaluator["expected"]["type"])) if "expected" in self.evaluator else None
|
||||||
self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
|
self.metric_options: Dict[str, Any] = self.evaluator.get("options", {})
|
||||||
|
|
||||||
def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None):
|
def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
|
||||||
logger.info("Resetting environment...")
|
logger.info("Resetting environment...")
|
||||||
|
|
||||||
logger.info("Switching task...")
|
logger.info("Switching task...")
|
||||||
@@ -202,11 +204,27 @@ class DesktopEnv(gym.Env):
|
|||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
logger.info("Environment setup complete.")
|
logger.info("Environment setup complete.")
|
||||||
|
|
||||||
observation = self._get_obs()
|
observation = {"screenshot": self._get_obs()}
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
def step(self, action, pause=0.5):
|
def step(self, action, pause=0.5):
|
||||||
self._step_no += 1
|
self._step_no += 1
|
||||||
|
self.action_history.append(action)
|
||||||
|
|
||||||
|
reward = 0 # todo: Define reward calculation for each example
|
||||||
|
done = False # todo: Define episode termination condition for each example
|
||||||
|
info = {}
|
||||||
|
|
||||||
|
# handle the special actions
|
||||||
|
if action in ['WAIT', 'FAIL', 'DONE']:
|
||||||
|
if action == 'WAIT':
|
||||||
|
time.sleep(pause)
|
||||||
|
elif action == 'FAIL':
|
||||||
|
done = True
|
||||||
|
info = {"fail": True}
|
||||||
|
elif action == 'DONE':
|
||||||
|
done = True
|
||||||
|
info = {"done": True}
|
||||||
|
|
||||||
# fixme: add reminding logic here, decide if the action is valid for the current action_space
|
# fixme: add reminding logic here, decide if the action is valid for the current action_space
|
||||||
if self.action_space == "computer_13":
|
if self.action_space == "computer_13":
|
||||||
@@ -215,18 +233,14 @@ class DesktopEnv(gym.Env):
|
|||||||
elif self.action_space == "pyautogui":
|
elif self.action_space == "pyautogui":
|
||||||
# the set of all possible python commands insides `pyautogui`
|
# the set of all possible python commands insides `pyautogui`
|
||||||
self.controller.execute_python_command(action)
|
self.controller.execute_python_command(action)
|
||||||
self.action_history.append(action)
|
|
||||||
|
|
||||||
# todo: maybe for the better here we need to add a logic to wait until the rendering is done
|
|
||||||
time.sleep(pause)
|
|
||||||
observation = {
|
observation = {
|
||||||
"screenshot": self._get_obs(),
|
"screenshot": self._get_obs(),
|
||||||
|
"accessibility_tree": self.controller.get_accessibility_tree(),
|
||||||
"terminal": self.controller.get_terminal_output(),
|
"terminal": self.controller.get_terminal_output(),
|
||||||
"instruction": self.instruction
|
"instruction": self.instruction
|
||||||
}
|
}
|
||||||
reward = 0 # todo: Define reward calculation for each example
|
|
||||||
done = False # todo: Define episode termination condition for each example
|
|
||||||
info = {}
|
|
||||||
return observation, reward, done, info
|
return observation, reward, done, info
|
||||||
|
|
||||||
def evaluate(self):
|
def evaluate(self):
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
|
from .chrome import get_default_search_engine, get_cookie_data, get_bookmarks, get_open_tabs_info, get_pdf_from_url, \
|
||||||
|
get_shortcuts_on_desktop
|
||||||
from .file import get_cloud_file, get_vm_file, get_cache_file
|
from .file import get_cloud_file, get_vm_file, get_cache_file
|
||||||
|
from .general import get_vm_command_line
|
||||||
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
|
from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper
|
||||||
from .misc import get_rule, get_accessibility_tree
|
from .misc import get_rule, get_accessibility_tree
|
||||||
|
from .replay import get_replay
|
||||||
from .vlc import get_vlc_playing_info, get_vlc_config
|
from .vlc import get_vlc_playing_info, get_vlc_config
|
||||||
from .chrome import get_default_search_engine, get_bookmarks, get_open_tabs_info
|
from .vscode import get_vscode_config
|
||||||
|
|||||||
@@ -46,6 +46,10 @@ def get_default_search_engine(env, config: Dict[str, str]):
|
|||||||
|
|
||||||
|
|
||||||
def get_cookie_data(env, config: Dict[str, str]):
|
def get_cookie_data(env, config: Dict[str, str]):
|
||||||
|
"""
|
||||||
|
Get the cookies from the Chrome browser.
|
||||||
|
Assume the cookies are stored in the default location, not encrypted and not large in size.
|
||||||
|
"""
|
||||||
os_type = env.vm_platform
|
os_type = env.vm_platform
|
||||||
if os_type == 'Windows':
|
if os_type == 'Windows':
|
||||||
chrome_cookie_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
|
chrome_cookie_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
|
||||||
@@ -61,21 +65,23 @@ def get_cookie_data(env, config: Dict[str, str]):
|
|||||||
else:
|
else:
|
||||||
raise Exception('Unsupported operating system')
|
raise Exception('Unsupported operating system')
|
||||||
|
|
||||||
# todo: add a new controller function to connect the cookie database
|
|
||||||
#############
|
|
||||||
try:
|
try:
|
||||||
conn = sqlite3.connect(chrome_cookie_file_path)
|
content = env.controller.get_file(chrome_cookie_file_path)
|
||||||
|
_path = os.path.join(env.cache_dir, config["dest"])
|
||||||
|
|
||||||
|
with open(_path, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
conn = sqlite3.connect(_path)
|
||||||
cursor = conn.cursor()
|
cursor = conn.cursor()
|
||||||
|
|
||||||
# Query to check for OpenAI cookies
|
# Query to check for OpenAI cookies
|
||||||
cursor.execute("SELECT * FROM cookies")
|
cursor.execute("SELECT * FROM cookies")
|
||||||
cookies = cursor.fetchall()
|
cookies = cursor.fetchall()
|
||||||
|
|
||||||
return cookies
|
return cookies
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error: {e}")
|
logger.error(f"Error: {e}")
|
||||||
return None
|
return None
|
||||||
#############
|
|
||||||
|
|
||||||
|
|
||||||
def get_bookmarks(env, config: Dict[str, str]):
|
def get_bookmarks(env, config: Dict[str, str]):
|
||||||
@@ -94,17 +100,12 @@ def get_bookmarks(env, config: Dict[str, str]):
|
|||||||
else:
|
else:
|
||||||
raise Exception('Unsupported operating system')
|
raise Exception('Unsupported operating system')
|
||||||
|
|
||||||
try:
|
content = env.controller.get_file(preference_file_path)
|
||||||
content = env.controller.get_file(preference_file_path)
|
if not content:
|
||||||
# make content json variable
|
return []
|
||||||
data = json.load(content)
|
data = json.loads(content)
|
||||||
|
bookmarks = data.get('roots', {})
|
||||||
bookmarks = data.get('roots', {})
|
return bookmarks
|
||||||
return bookmarks
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
# todo: move this to the main.py
|
# todo: move this to the main.py
|
||||||
@@ -190,3 +191,83 @@ def get_active_tab_info(env, config: Dict[str, str]):
|
|||||||
|
|
||||||
browser.close()
|
browser.close()
|
||||||
return active_tab_info
|
return active_tab_info
|
||||||
|
|
||||||
|
|
||||||
|
def get_pdf_from_url(env, config: Dict[str, str]) -> str:
|
||||||
|
"""
|
||||||
|
Download a PDF from a URL.
|
||||||
|
"""
|
||||||
|
_url = config["path"]
|
||||||
|
_path = os.path.join(env.cache_dir, config["dest"])
|
||||||
|
|
||||||
|
host = env.vm_ip
|
||||||
|
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||||
|
|
||||||
|
remote_debugging_url = f"http://{host}:{port}"
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||||
|
page = browser.new_page()
|
||||||
|
page.goto(_url)
|
||||||
|
page.pdf(path=_path)
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
return _path
|
||||||
|
|
||||||
|
|
||||||
|
# fixme: needs to be changed (maybe through post-processing) since it's not working
|
||||||
|
def get_chrome_saved_address(env, config: Dict[str, str]):
|
||||||
|
# host = env.vm_ip
|
||||||
|
host = "192.168.13.130"
|
||||||
|
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||||
|
|
||||||
|
remote_debugging_url = f"http://{host}:{port}"
|
||||||
|
with sync_playwright() as p:
|
||||||
|
# connect to remote Chrome instance
|
||||||
|
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||||
|
|
||||||
|
page = browser.new_page()
|
||||||
|
|
||||||
|
# Navigate to Chrome's settings page for autofill
|
||||||
|
page.goto("chrome://settings/addresses")
|
||||||
|
|
||||||
|
# Get the HTML content of the page
|
||||||
|
content = page.content()
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
def get_shortcuts_on_desktop(env, config: Dict[str, str]):
|
||||||
|
# Find out the operating system
|
||||||
|
os_name = env.vm_platform
|
||||||
|
|
||||||
|
# Depending on the OS, define the shortcut file extension
|
||||||
|
if os_name == 'Windows':
|
||||||
|
# Windows shortcuts are typically .url or .lnk files
|
||||||
|
shortcut_extension = '.lnk'
|
||||||
|
elif os_name == 'Darwin':
|
||||||
|
# macOS's shortcuts are .webloc files
|
||||||
|
shortcut_extension = '.webloc'
|
||||||
|
elif os_name == 'Linux':
|
||||||
|
# Linux (Ubuntu, etc.) shortcuts are typically .desktop files
|
||||||
|
shortcut_extension = '.desktop'
|
||||||
|
else:
|
||||||
|
logger.error(f"Unsupported operating system: {os_name}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Get the path to the desktop folder
|
||||||
|
desktop_path = env.controller.get_vm_desktop_path()
|
||||||
|
desktop_directory_tree = env.controller.get_vm_directory_tree(desktop_path)
|
||||||
|
|
||||||
|
shortcuts_paths = [file['name'] for file in desktop_directory_tree['children'] if
|
||||||
|
file['name'].endswith(shortcut_extension)]
|
||||||
|
|
||||||
|
short_cuts = {}
|
||||||
|
|
||||||
|
for shortcut_path in shortcuts_paths:
|
||||||
|
short_cuts[shortcut_path] = env.controller.get_file(env.controller.execute_python_command(
|
||||||
|
f"import os; print(os.path.join(os.path.expanduser('~'), 'Desktop', '{shortcut_path}'))")['output'].strip()).decode('utf-8')
|
||||||
|
|
||||||
|
return short_cuts
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ def get_vm_file(env, config: Dict[str, str]) -> Optional[str]:
|
|||||||
file = env.controller.get_file(config["path"])
|
file = env.controller.get_file(config["path"])
|
||||||
if file is None:
|
if file is None:
|
||||||
return None
|
return None
|
||||||
#raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
|
# raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
|
||||||
with open(_path, "wb") as f:
|
with open(_path, "wb") as f:
|
||||||
f.write(file)
|
f.write(file)
|
||||||
|
|
||||||
|
|||||||
@@ -1,23 +1,19 @@
|
|||||||
|
import logging
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
import os
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
logger = logging.getLogger("desktopenv.getters.general")
|
||||||
|
|
||||||
def get_string(env, config: Dict[str, str]) -> str:
|
|
||||||
"""
|
|
||||||
Config:
|
|
||||||
string (str)
|
|
||||||
"""
|
|
||||||
|
|
||||||
return config["string"]
|
def get_vm_command_line(env, config: Dict[str, str]):
|
||||||
|
vm_ip = env.vm_ip
|
||||||
|
port = 5000
|
||||||
|
command = config["command"]
|
||||||
|
|
||||||
def get_command_line(env, config: Dict[str, str]) -> str:
|
response = requests.post(f"http://{vm_ip}:{port}/execute", json={"command": command})
|
||||||
"""
|
|
||||||
Config:
|
if response.status_code == 200:
|
||||||
string (str)
|
return response.json()["output"]
|
||||||
"""
|
else:
|
||||||
|
logger.error("Failed to get vm command line. Status code: %d", response.status_code)
|
||||||
f = os.popen(config["command"])
|
return None
|
||||||
|
|
||||||
return f.read()
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import TypeVar
|
from typing import TypeVar
|
||||||
#from typing import Dict, List
|
|
||||||
|
|
||||||
logger = logging.getLogger("desktopenv.getters.misc")
|
logger = logging.getLogger("desktopenv.getters.misc")
|
||||||
|
|
||||||
@@ -13,6 +12,7 @@ def get_rule(env, config: R) -> R:
|
|||||||
"""
|
"""
|
||||||
return config["rules"]
|
return config["rules"]
|
||||||
|
|
||||||
|
|
||||||
def get_accessibility_tree(env, *args) -> str:
|
def get_accessibility_tree(env, *args) -> str:
|
||||||
accessibility_tree: str = env.controller.get_accessibility_tree()
|
accessibility_tree: str = env.controller.get_accessibility_tree()
|
||||||
logger.debug("AT@eval: %s", accessibility_tree)
|
logger.debug("AT@eval: %s", accessibility_tree)
|
||||||
|
|||||||
@@ -0,0 +1,20 @@
|
|||||||
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
|
|
||||||
|
def get_replay(env, trajectory: List[Dict[str, Any]]) -> None:
|
||||||
|
# fixme: need to be combined with the accessibility tree to activate the selection of the target window
|
||||||
|
def parse(action):
|
||||||
|
if action["type"] == "hotkey":
|
||||||
|
keys = "', '".join(action["param"])
|
||||||
|
return f"pyautogui.hotkey('{keys}')"
|
||||||
|
|
||||||
|
if action["type"] == "typewrite":
|
||||||
|
text = action["param"]
|
||||||
|
return f"pyautogui.typewrite('{text}')"
|
||||||
|
|
||||||
|
if action["type"] == "press":
|
||||||
|
key = action["param"]
|
||||||
|
return f"pyautogui.press('{key}')"
|
||||||
|
|
||||||
|
for action in trajectory:
|
||||||
|
env.controller.execute_python_command(parse(action))
|
||||||
|
|||||||
34
desktop_env/evaluators/getters/vscode.py
Normal file
34
desktop_env/evaluators/getters/vscode.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from .file import get_vm_file
|
||||||
|
from .replay import get_replay
|
||||||
|
|
||||||
|
logger = logging.getLogger("desktopenv.getters.vscode")
|
||||||
|
|
||||||
|
|
||||||
|
def get_vscode_config(env, config: Dict[str, Any]) -> str:
|
||||||
|
os_type = env.vm_platform
|
||||||
|
vscode_extension_command = config["vscode_extension_command"]
|
||||||
|
|
||||||
|
# fixme: depends on how we config and install the vscode in virtual machine, need to be aligned and double-checked
|
||||||
|
|
||||||
|
if os_type == "MacOS":
|
||||||
|
trajectory = [
|
||||||
|
{"type": "hotkey", "param": ["command", "shift", "p"]},
|
||||||
|
{"type": "typewrite", "param": vscode_extension_command},
|
||||||
|
{"type": "press", "param": "enter"}
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
trajectory = [
|
||||||
|
{"type": "hotkey", "param": ["ctrl", "shift", "p"]},
|
||||||
|
{"type": "typewrite", "param": vscode_extension_command},
|
||||||
|
{"type": "press", "param": "enter"}
|
||||||
|
]
|
||||||
|
|
||||||
|
get_replay(env, trajectory)
|
||||||
|
|
||||||
|
return get_vm_file(env, {
|
||||||
|
"path": config["path"],
|
||||||
|
"dest": config["dest"]
|
||||||
|
})
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
from .chrome import is_expected_tabs, is_expected_bookmarks
|
from .chrome import is_expected_tabs, is_expected_bookmarks, compare_pdfs, is_cookie_deleted, is_shortcut_on_desktop
|
||||||
from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
|
from .docs import compare_font_names, compare_subscript_contains, has_page_numbers_in_footers
|
||||||
from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
|
from .docs import find_default_font, contains_page_break, compare_docx_files, compare_docx_tables, compare_line_spacing, \
|
||||||
compare_insert_equation
|
compare_insert_equation
|
||||||
@@ -13,4 +13,5 @@ from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, co
|
|||||||
from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
|
from .gimp import increase_saturation, decrease_brightness, check_file_exists, compare_triangle_positions
|
||||||
from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3
|
from .general import check_csv, check_accessibility_tree, check_list, run_sqlite3
|
||||||
from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
|
from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter
|
||||||
|
from .vscode import compare_text_file, compare_config, compare_answer, is_extension_installed
|
||||||
|
from .impress import check_slide_numbers_color, compare_pptx_files, check_for_two_lines
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
import rapidfuzz.fuzz as fuzz
|
||||||
|
|
||||||
from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls
|
from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls
|
||||||
|
|
||||||
logger = logging.getLogger("desktopenv.metrics.chrome")
|
logger = logging.getLogger("desktopenv.metrics.chrome")
|
||||||
@@ -22,18 +26,72 @@ def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> f
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def is_expected_bookmarks(bookmarks: List[Dict[str, Any]], rule: Dict[str, Any]) -> float:
|
def is_expected_bookmarks(bookmarks: List[str], rule: Dict[str, Any]) -> float:
|
||||||
"""
|
"""
|
||||||
Checks if the expected bookmarks are in Chrome.
|
Checks if the expected bookmarks are in Chrome.
|
||||||
"""
|
"""
|
||||||
|
if not bookmarks:
|
||||||
# todo
|
return 0.
|
||||||
match_type = rule['type']
|
elif rule['type'] == "bookmark_bar_folders_names":
|
||||||
|
bookmark_bar_folders_names = [bookmark['name'] for bookmark in bookmarks['bookmark_bar']['children'] if
|
||||||
if match_type == "url":
|
bookmark['type'] == 'folder']
|
||||||
expected_urls = rule['urls']
|
return 1. if set(bookmark_bar_folders_names) == set(rule['names']) else 0.
|
||||||
actual_urls = [bookmark['url'] for bookmark in bookmarks]
|
elif rule['type'] == "bookmark_bar_websites_urls":
|
||||||
return 1 if are_lists_equal(expected_urls, actual_urls, compare_urls) else 0
|
bookmark_bar_websites_urls = [bookmark['url'] for bookmark in bookmarks['bookmark_bar']['children'] if
|
||||||
|
bookmark['type'] == 'url']
|
||||||
|
return 1. if set(bookmark_bar_websites_urls) == set(rule['urls']) else 0.
|
||||||
else:
|
else:
|
||||||
logger.error(f"Unknown type: {match_type}")
|
raise TypeError(f"{rule['type']} not support yet!")
|
||||||
return 0
|
|
||||||
|
|
||||||
|
def compare_pdfs(pdf1_path, pdf2_path):
|
||||||
|
"""
|
||||||
|
Compare two PDF files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def extract_text_from_pdf(pdf_path):
|
||||||
|
"""Extract text from each page of the PDF."""
|
||||||
|
text = ""
|
||||||
|
with fitz.open(pdf_path) as pdf:
|
||||||
|
for page in pdf:
|
||||||
|
text += page.get_text()
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
text1 = extract_text_from_pdf(pdf1_path)
|
||||||
|
text2 = extract_text_from_pdf(pdf2_path)
|
||||||
|
|
||||||
|
return fuzz.ratio(text1, text2) / 100
|
||||||
|
|
||||||
|
|
||||||
|
def is_cookie_deleted(cookie_data, rule):
|
||||||
|
"""
|
||||||
|
Check if the cookie is deleted.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if rule['type'] == 'domains':
|
||||||
|
cookies_domains = [cookie[1] for cookie in cookie_data]
|
||||||
|
for domain in rule['domains']:
|
||||||
|
for cookies_domain in cookies_domains:
|
||||||
|
if compare_urls(domain, cookies_domain):
|
||||||
|
return 0.
|
||||||
|
return 1.
|
||||||
|
else:
|
||||||
|
raise TypeError(f"{rule['type']} not support yet!")
|
||||||
|
|
||||||
|
|
||||||
|
def is_shortcut_on_desktop(shortcuts: Dict[str, str], rule):
|
||||||
|
"""
|
||||||
|
Check if the shortcut is on the desktop.
|
||||||
|
"""
|
||||||
|
# fixme: if the name of the website changed in the future, this will not work; can be replaced with url
|
||||||
|
if rule['type'] == 'name':
|
||||||
|
for shortcut_path, shortcut_content in shortcuts.items():
|
||||||
|
if "Name=" + rule['name'] + "\n" in shortcut_content:
|
||||||
|
return 1.
|
||||||
|
return 0.
|
||||||
|
elif rule['type'] == 'url':
|
||||||
|
raise TypeError(f"{rule['type']} not support yet!")
|
||||||
|
elif rule['type'] == 'id':
|
||||||
|
raise TypeError(f"{rule['type']} not support yet!")
|
||||||
|
else:
|
||||||
|
raise TypeError(f"{rule['type']} not support yet!")
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
import xml.etree.ElementTree as ET
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
from typing import List, Dict, Any
|
from typing import List, Dict, Any
|
||||||
|
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger("desktopenv.metric.docs")
|
logger = logging.getLogger("desktopenv.metric.docs")
|
||||||
|
|
||||||
|
|
||||||
def find_default_font(config_file_path, rules):
|
def find_default_font(config_file_path, rules):
|
||||||
"""Find the default font in LibreOffice Writer."""
|
"""Find the default font in LibreOffice Writer."""
|
||||||
default_font = None
|
default_font = None
|
||||||
|
|||||||
@@ -1,4 +1,75 @@
|
|||||||
from pptx import Presentation
|
from pptx import Presentation
|
||||||
|
import os
|
||||||
|
|
||||||
|
def is_red_color(color):
|
||||||
|
#judge if the color is red
|
||||||
|
print(color.rgb)
|
||||||
|
return color and color.rgb == (255, 0, 0)
|
||||||
|
|
||||||
|
def get_master_placeholder_color(prs):
|
||||||
|
# get the color of the placeholder
|
||||||
|
masters = prs.slide_masters
|
||||||
|
for idx, master in enumerate(masters):
|
||||||
|
for placeholder in master.placeholders:
|
||||||
|
if placeholder.has_text_frame and placeholder.text == "<number>":
|
||||||
|
text_frame = placeholder.text_frame
|
||||||
|
|
||||||
|
if text_frame.paragraphs:
|
||||||
|
first_paragraph = text_frame.paragraphs[0]
|
||||||
|
return first_paragraph.font.color
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def check_slide_numbers_color(pptx_file_path):
|
||||||
|
presentation = Presentation(pptx_file_path)
|
||||||
|
|
||||||
|
for i, slide in enumerate(presentation.slides):
|
||||||
|
for shape in slide.shapes:
|
||||||
|
# check if the shape is a text box
|
||||||
|
if hasattr(shape, "text"):
|
||||||
|
if shape.text.isdigit():
|
||||||
|
# "SlidePlaceholder" is the name of the placeholder in the master slide
|
||||||
|
page_number_text = shape.text
|
||||||
|
font_color = get_master_placeholder_color(presentation)
|
||||||
|
print(font_color)
|
||||||
|
return 1 if font_color is not None and is_red_color(font_color) else 0
|
||||||
|
|
||||||
|
def compare_pptx_files(file1_path, file2_path):
|
||||||
|
prs1 = Presentation(file1_path)
|
||||||
|
prs2 = Presentation(file2_path)
|
||||||
|
|
||||||
|
# compare the number of slides
|
||||||
|
if len(prs1.slides) != len(prs2.slides):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# compare the content of each slide
|
||||||
|
for slide1, slide2 in zip(prs1.slides, prs2.slides):
|
||||||
|
# check if the shapes are the same
|
||||||
|
for shape1, shape2 in zip(slide1.shapes, slide2.shapes):
|
||||||
|
if hasattr(shape1, "text") and hasattr(shape2, "text"):
|
||||||
|
if shape1.text != shape2.text:
|
||||||
|
return 0
|
||||||
|
return 1
|
||||||
|
|
||||||
|
def has_two_lines_on_page(slide):
|
||||||
|
line_count = 0
|
||||||
|
for shape in slide.shapes:
|
||||||
|
if shape.shape_type == 1: # 1 表示 Line 形状
|
||||||
|
line_count += 1
|
||||||
|
if line_count >= 2:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def check_for_two_lines(prs):
|
||||||
|
prs = Presentation(prs)
|
||||||
|
for i, slide in enumerate(prs.slides):
|
||||||
|
if has_two_lines_on_page(slide):
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def check_file_exists(directory, filename):
|
||||||
|
file_path = os.path.join(directory, filename)
|
||||||
|
return 1 if os.path.isfile(file_path) else 0
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx"
|
path1 = "../../任务数据/LibreOffice Impress/Change_Color_Slide_Number_gold_textbox.pptx"
|
||||||
|
|||||||
@@ -1,37 +1,38 @@
|
|||||||
import lxml.cssselect
|
|
||||||
from lxml.etree import _Element as Element
|
|
||||||
import lxml.etree
|
|
||||||
import fnmatch
|
import fnmatch
|
||||||
|
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
import lxml.cssselect
|
||||||
|
import lxml.etree
|
||||||
|
from lxml.etree import _Element as Element
|
||||||
|
|
||||||
_libconf_namespaces = [("oor", "http://openoffice.org/2001/registry")]
|
_libconf_namespaces = [("oor", "http://openoffice.org/2001/registry")]
|
||||||
_libconf_ns_mapping = dict(_libconf_namespaces)
|
_libconf_ns_mapping = dict(_libconf_namespaces)
|
||||||
_setup_locale_selector = lxml.cssselect.CSSSelector( 'item[oor|path$=L10N]>prop[oor|name=ooSetupSystemLocale]>value'
|
_setup_locale_selector = lxml.cssselect.CSSSelector('item[oor|path$=L10N]>prop[oor|name=ooSetupSystemLocale]>value',
|
||||||
, namespaces=_libconf_ns_mapping
|
namespaces=_libconf_ns_mapping)
|
||||||
)
|
_locale_selector = lxml.cssselect.CSSSelector('item[oor|path$=L10N]>prop[oor|name=ooLocale]>value',
|
||||||
_locale_selector = lxml.cssselect.CSSSelector( 'item[oor|path$=L10N]>prop[oor|name=ooLocale]>value'
|
namespaces=_libconf_ns_mapping)
|
||||||
, namespaces=_libconf_ns_mapping
|
|
||||||
)
|
|
||||||
def check_libre_locale(config_file: str, rules: Dict[str, List[str]]) -> float:
|
def check_libre_locale(config_file: str, rules: Dict[str, List[str]]) -> float:
|
||||||
config: Element = lxml.etree.parse(config_file).getroot()
|
config: Element = lxml.etree.parse(config_file).getroot()
|
||||||
setup_locale_setting: List[Element] = _setup_locale_selector(config)
|
setup_locale_setting: List[Element] = _setup_locale_selector(config)
|
||||||
locale_setting: List[Element] = _locale_selector(config)
|
locale_setting: List[Element] = _locale_selector(config)
|
||||||
|
|
||||||
setup_locale_setting: str = setup_locale_setting[0].text\
|
setup_locale_setting: str = setup_locale_setting[0].text \
|
||||||
if len(setup_locale_setting)>0\
|
if len(setup_locale_setting) > 0 \
|
||||||
else locale_setting[0].text
|
else locale_setting[0].text
|
||||||
|
|
||||||
return float( any( fnmatch.fnmatchcase(setup_locale_setting, ptn)\
|
return float(any(fnmatch.fnmatchcase(setup_locale_setting, ptn) \
|
||||||
for ptn in rules["locale_set"]
|
for ptn in rules["locale_set"]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
path1 = "../../任务数据/LibreOffice Calc/registrymodifications.ru.xcu"
|
path1 = "../../任务数据/LibreOffice Calc/registrymodifications.ru.xcu"
|
||||||
print( check_libre_locale( path1, { "locale_set": [ "ru-*", "de-*", "fr-*"
|
print(check_libre_locale(path1, {"locale_set": ["ru-*", "de-*", "fr-*"
|
||||||
, "pt-*", "es-*", "it-*"
|
, "pt-*", "es-*", "it-*"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,13 +1,11 @@
|
|||||||
from pypdf import PdfReader
|
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
from typing import Dict
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
|
||||||
def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float:
|
def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float:
|
||||||
reader = PdfReader(pdf_file)
|
reader = PdfReader(pdf_file)
|
||||||
nb_pages: int = len(reader.pages)
|
nb_pages: int = len(reader.pages)
|
||||||
return float( getattr(operator, rules["relation"])( nb_pages
|
return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"]))
|
||||||
, rules["ref_value"]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -1,18 +1,19 @@
|
|||||||
import pandas as pd
|
import logging
|
||||||
|
import operator
|
||||||
|
from numbers import Number
|
||||||
|
from typing import Any, Union
|
||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
import openpyxl
|
import openpyxl
|
||||||
|
import pandas as pd
|
||||||
from openpyxl import Workbook
|
from openpyxl import Workbook
|
||||||
from openpyxl.worksheet.worksheet import Worksheet
|
from openpyxl.worksheet.worksheet import Worksheet
|
||||||
|
|
||||||
from .utils import load_charts, load_sparklines
|
from .utils import load_charts, load_sparklines
|
||||||
import operator
|
|
||||||
|
|
||||||
from typing import Dict, List
|
|
||||||
from typing import Any, Union
|
|
||||||
from numbers import Number
|
|
||||||
|
|
||||||
import logging
|
|
||||||
logger = logging.getLogger("desktopenv.metric.table")
|
logger = logging.getLogger("desktopenv.metric.table")
|
||||||
|
|
||||||
|
|
||||||
def compare_table(actual: str, expected: str, **options) -> float:
|
def compare_table(actual: str, expected: str, **options) -> float:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@@ -44,28 +45,28 @@ def compare_table(actual: str, expected: str, **options) -> float:
|
|||||||
workbook1: Workbook = openpyxl.load_workbook(actual)
|
workbook1: Workbook = openpyxl.load_workbook(actual)
|
||||||
workbook2: Workbook = openpyxl.load_workbook(expected)
|
workbook2: Workbook = openpyxl.load_workbook(expected)
|
||||||
|
|
||||||
if ftr=="sparkline":
|
if ftr == "sparkline":
|
||||||
sp1 = load_sparklines(actual)
|
sp1 = load_sparklines(actual)
|
||||||
sp2 = load_sparklines(expected)
|
sp2 = load_sparklines(expected)
|
||||||
new_metric: bool = sp1 == sp2
|
new_metric: bool = sp1 == sp2
|
||||||
logger.debug("Sparkline Metric: {:}".format(new_metric))
|
logger.debug("Sparkline Metric: {:}".format(new_metric))
|
||||||
elif ftr=="chart":
|
elif ftr == "chart":
|
||||||
charts1 = load_charts(workbook1, **options)
|
charts1 = load_charts(workbook1, **options)
|
||||||
charts2 = load_charts(workbook2, **options)
|
charts2 = load_charts(workbook2, **options)
|
||||||
new_metric: bool = charts1 == charts2
|
new_metric: bool = charts1 == charts2
|
||||||
logger.debug("Chart Metric: {:}".format(new_metric))
|
logger.debug("Chart Metric: {:}".format(new_metric))
|
||||||
elif ftr=="number_format":
|
elif ftr == "number_format":
|
||||||
number_formats1: List[str] = [ c.number_format.lower()\
|
number_formats1: List[str] = [c.number_format.lower() \
|
||||||
for col in workbook1.active.iter_cols()\
|
for col in workbook1.active.iter_cols() \
|
||||||
for c in col\
|
for c in col \
|
||||||
if c.data_type=="n"
|
if c.data_type == "n"
|
||||||
]
|
]
|
||||||
number_formats2: List[str] = [ c.number_format.lower()\
|
number_formats2: List[str] = [c.number_format.lower() \
|
||||||
for col in workbook2.active.iter_cols()\
|
for col in workbook2.active.iter_cols() \
|
||||||
for c in col\
|
for c in col \
|
||||||
if c.data_type=="n"
|
if c.data_type == "n"
|
||||||
]
|
]
|
||||||
new_metric: bool = number_formats1==number_formats2
|
new_metric: bool = number_formats1 == number_formats2
|
||||||
logger.debug("Number Format Metric: {:}".format(new_metric))
|
logger.debug("Number Format Metric: {:}".format(new_metric))
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
|
raise NotImplementedError("Unsupported xlsx feature: {:}".format(ftr))
|
||||||
@@ -73,6 +74,7 @@ def compare_table(actual: str, expected: str, **options) -> float:
|
|||||||
|
|
||||||
return float(metric)
|
return float(metric)
|
||||||
|
|
||||||
|
|
||||||
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
|
def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
|
||||||
if result is None:
|
if result is None:
|
||||||
return 0.
|
return 0.
|
||||||
@@ -114,6 +116,7 @@ def check_sheet_list(result: str, rules: List[Dict[str, Any]]) -> float:
|
|||||||
|
|
||||||
return float(passes)
|
return float(passes)
|
||||||
|
|
||||||
|
|
||||||
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
|
def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
|
||||||
if result is None:
|
if result is None:
|
||||||
return 0.
|
return 0.
|
||||||
@@ -121,16 +124,18 @@ def check_xlsx_freeze(result: str, rules: Dict[str, str]) -> float:
|
|||||||
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
|
worksheet: Worksheet = openpyxl.load_workbook(filename=result).active
|
||||||
return float(worksheet.freeze_panes == rules["position"])
|
return float(worksheet.freeze_panes == rules["position"])
|
||||||
|
|
||||||
|
|
||||||
def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
|
def check_xlsx_zoom(result: str, rules: Dict[str, Union[str, Number]]) -> float:
|
||||||
if result is None:
|
if result is None:
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
worksheet = openpyxl.load_workbook(filename=result).active
|
worksheet = openpyxl.load_workbook(filename=result).active
|
||||||
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
|
zoom_scale: Number = worksheet.sheet_view.zoomScale or 100.
|
||||||
return float( getattr(operator, rules["relation"])( zoom_scale
|
return float(getattr(operator, rules["relation"])(zoom_scale
|
||||||
, rules["ref_value"]
|
, rules["ref_value"]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# path1 = ""
|
# path1 = ""
|
||||||
@@ -168,51 +173,51 @@ if __name__ == '__main__':
|
|||||||
# ]
|
# ]
|
||||||
# print(check_sheet_list(path1, rule))
|
# print(check_sheet_list(path1, rule))
|
||||||
|
|
||||||
#path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
# path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
|
||||||
#path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
# path2 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold2.xlsx"
|
||||||
#print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
|
# print(compare_table(path1, path2, features=["chart"], chart_props=["type", "direction"]))
|
||||||
|
|
||||||
#path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
|
# path1 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold.xlsx"
|
||||||
#path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
|
# path2 = "../../任务数据/LibreOffice Calc/Represent_in_millions_billions_gold3.xlsx"
|
||||||
#path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx"
|
# path1 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot.xlsx"
|
||||||
#path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx"
|
# path2 = "../../任务数据/LibreOffice Calc/Set_Decimal_Separator_Dot_gold.xlsx"
|
||||||
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
||||||
#worksheet1: Worksheet = workbook1.active
|
# worksheet1: Worksheet = workbook1.active
|
||||||
#import itertools
|
# import itertools
|
||||||
#for col, r in itertools.product( ['A', 'B']
|
# for col, r in itertools.product( ['A', 'B']
|
||||||
#, range(1, 20)
|
# , range(1, 20)
|
||||||
#):
|
# ):
|
||||||
#position: str = "{:}{:d}".format(col, r)
|
# position: str = "{:}{:d}".format(col, r)
|
||||||
#print(worksheet1[position])
|
# print(worksheet1[position])
|
||||||
#print(worksheet1[position].value)
|
# print(worksheet1[position].value)
|
||||||
#print(worksheet1[position].number_format)
|
# print(worksheet1[position].number_format)
|
||||||
#workbook2: Workbook = openpyxl.load_workbook(filename=path2)
|
# workbook2: Workbook = openpyxl.load_workbook(filename=path2)
|
||||||
#worksheet2: Worksheet = workbook2.active
|
# worksheet2: Worksheet = workbook2.active
|
||||||
#for col, r in itertools.product( ['A', 'B']
|
# for col, r in itertools.product( ['A', 'B']
|
||||||
#, range(1, 20)
|
# , range(1, 20)
|
||||||
#):
|
# ):
|
||||||
#position: str = "{:}{:d}".format(col, r)
|
# position: str = "{:}{:d}".format(col, r)
|
||||||
#print(worksheet2[position])
|
# print(worksheet2[position])
|
||||||
#print(worksheet2[position].value)
|
# print(worksheet2[position].value)
|
||||||
#print(worksheet2[position].number_format)
|
# print(worksheet2[position].number_format)
|
||||||
#print(compare_table(path1, path2, features=["number_format"]))
|
# print(compare_table(path1, path2, features=["number_format"]))
|
||||||
|
|
||||||
#path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
|
# path1 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells_gold.xlsx"
|
||||||
#path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
|
# path2 = "../../任务数据/LibreOffice Calc/Zoom_Out_Oversized_Cells.xlsx"
|
||||||
#workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
# workbook1: Workbook = openpyxl.load_workbook(filename=path1)
|
||||||
#worksheet1: Worksheet = workbook1.active
|
# worksheet1: Worksheet = workbook1.active
|
||||||
#print(worksheet1.sheet_view.zoomScale)
|
# print(worksheet1.sheet_view.zoomScale)
|
||||||
#print(type(worksheet1.sheet_view.zoomScale))
|
# print(type(worksheet1.sheet_view.zoomScale))
|
||||||
#
|
#
|
||||||
#import os
|
# import os
|
||||||
#import os.path
|
# import os.path
|
||||||
#for wb in filter( lambda f: f.endswith(".xlsx")
|
# for wb in filter( lambda f: f.endswith(".xlsx")
|
||||||
#, os.listdir("../../任务数据/LibreOffice Calc/")
|
# , os.listdir("../../任务数据/LibreOffice Calc/")
|
||||||
#):
|
# ):
|
||||||
#path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
|
# path = os.path.join("../../任务数据/LibreOffice Calc/", wb)
|
||||||
#print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
|
# print(wb, openpyxl.load_workbook(filename=path).active.sheet_view.zoomScale)
|
||||||
#print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
|
# print(check_zoom(path1, {"relation": "lt", "ref_value": 100}))
|
||||||
#print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
|
# print(check_zoom(path2, {"relation": "lt", "ref_value": 100}))
|
||||||
|
|
||||||
path1 = "../../任务数据/LibreOffice Calc/Padding_Decimals_In_Formular_gold.xlsx"
|
path1 = "../../任务数据/LibreOffice Calc/Padding_Decimals_In_Formular_gold.xlsx"
|
||||||
data_frame: pd.DataFrame = pd.read_excel(path1)
|
data_frame: pd.DataFrame = pd.read_excel(path1)
|
||||||
|
|||||||
@@ -1,16 +1,18 @@
|
|||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
|
||||||
def compare_text_file(actual: str, expected: str, **options) -> float:
|
def compare_text_file(actual: str, expected: str, **options) -> float:
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
actual (str): path to result xlsx
|
actual (str): path to result text file
|
||||||
expected (str): path to gold xlsx
|
expected (str): path to gold text file
|
||||||
options (Dict[str, List[str]]): dict like
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
Return:
|
Return:
|
||||||
float: the score
|
float: the score
|
||||||
"""
|
"""
|
||||||
|
if not actual:
|
||||||
|
return 0.
|
||||||
|
|
||||||
with open(actual) as f1:
|
with open(actual) as f1:
|
||||||
actual_text = f1.read()
|
actual_text = f1.read()
|
||||||
with open(expected) as f2:
|
with open(expected) as f2:
|
||||||
@@ -20,13 +22,46 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
|
|||||||
return 1.0
|
return 1.0
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
def compare_answer(actual: str, expected: str, **options) -> float:
|
|
||||||
|
|
||||||
if actual == expected:
|
def compare_config(actual: str, rules: Dict, **options) -> float:
|
||||||
|
if not actual:
|
||||||
|
return 0.
|
||||||
|
|
||||||
|
with open(actual) as f1:
|
||||||
|
actual_text = f1.read()
|
||||||
|
|
||||||
|
if actual_text == rules['expect']:
|
||||||
return 1.0
|
return 1.0
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def compare_answer(actual: str, rules: Dict, **options) -> float:
|
||||||
|
"""
|
||||||
|
Args:
|
||||||
|
actual (str): result string
|
||||||
|
expected (str): gold string
|
||||||
|
|
||||||
|
Return:
|
||||||
|
float: the score
|
||||||
|
"""
|
||||||
|
if not actual:
|
||||||
|
return 0.
|
||||||
|
|
||||||
|
if actual == rules['expect']:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
# TODO: can use text embedding to get non-zero return
|
# TODO: can use text embedding to get non-zero return
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
print(compare_text_file("README.md", "README.md"))
|
def is_extension_installed(actual: str, rules: Dict, **options):
|
||||||
|
if rules['type'] == 'contain':
|
||||||
|
if rules['expected'] in actual:
|
||||||
|
return 1.0
|
||||||
|
return 0.0
|
||||||
|
elif rules['type'] == 'not_contain':
|
||||||
|
if rules['expected'] not in actual:
|
||||||
|
return 1.0
|
||||||
|
return 0.0
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|||||||
@@ -71,3 +71,10 @@ You can use accerciser to check the accessibility tree on GNOME VM.
|
|||||||
```sh
|
```sh
|
||||||
sudo apt install accerciser
|
sudo apt install accerciser
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Additional Installation
|
||||||
|
Activating the window manager control requires the installation of `wmctrl`:
|
||||||
|
```bash
|
||||||
|
sudo apt install wmctrl
|
||||||
|
```
|
||||||
|
|||||||
@@ -3,29 +3,26 @@ import os
|
|||||||
import platform
|
import platform
|
||||||
import subprocess
|
import subprocess
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any, Optional
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
import Xlib
|
||||||
import lxml.etree
|
import lxml.etree
|
||||||
from lxml.etree import _Element
|
|
||||||
import pyatspi
|
import pyatspi
|
||||||
|
import pyautogui
|
||||||
|
import requests
|
||||||
|
from PIL import Image
|
||||||
|
from Xlib import display, X
|
||||||
|
from flask import Flask, request, jsonify, send_file, abort
|
||||||
|
from lxml.etree import _Element
|
||||||
from pyatspi import Accessible, StateType
|
from pyatspi import Accessible, StateType
|
||||||
|
from pyatspi import Action as ATAction
|
||||||
from pyatspi import Component, Document
|
from pyatspi import Component, Document
|
||||||
from pyatspi import Text as ATText
|
from pyatspi import Text as ATText
|
||||||
from pyatspi import Value as ATValue
|
from pyatspi import Value as ATValue
|
||||||
from pyatspi import Action as ATAction
|
|
||||||
|
|
||||||
from typing import List, Dict
|
|
||||||
from typing import Any, Optional
|
|
||||||
|
|
||||||
import Xlib
|
|
||||||
import pyautogui
|
|
||||||
from PIL import Image
|
|
||||||
from Xlib import display, X
|
|
||||||
from pyxcursor import Xcursor
|
from pyxcursor import Xcursor
|
||||||
|
|
||||||
import requests
|
|
||||||
from flask import Flask, request, jsonify, send_file, abort
|
|
||||||
from werkzeug.utils import secure_filename
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
pyautogui.PAUSE = 0
|
pyautogui.PAUSE = 0
|
||||||
@@ -141,22 +138,24 @@ def get_terminal_output():
|
|||||||
xpath = '//application[@name="gnome-terminal-server"]/frame[@st:active="true"]//terminal[@st:focused="true"]'
|
xpath = '//application[@name="gnome-terminal-server"]/frame[@st:active="true"]//terminal[@st:focused="true"]'
|
||||||
terminals: List[_Element] = desktop_xml.xpath(xpath, namespaces=_accessibility_ns_map)
|
terminals: List[_Element] = desktop_xml.xpath(xpath, namespaces=_accessibility_ns_map)
|
||||||
output = terminals[0].text.rstrip() if len(terminals) == 1 else None
|
output = terminals[0].text.rstrip() if len(terminals) == 1 else None
|
||||||
else: # windows and macos platform is not implemented currently
|
else: # windows and macos platform is not implemented currently
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
return jsonify({"output": output, "status": "success"})
|
return jsonify({"output": output, "status": "success"})
|
||||||
except:
|
except:
|
||||||
return jsonify({"output": None, "status": "error"})
|
return jsonify({"output": None, "status": "error"})
|
||||||
|
|
||||||
|
|
||||||
_accessibility_ns_map = { "st": "uri:deskat:state.at-spi.gnome.org"
|
_accessibility_ns_map = {"st": "uri:deskat:state.at-spi.gnome.org"
|
||||||
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
|
, "attr": "uri:deskat:attributes.at-spi.gnome.org"
|
||||||
, "cp": "uri:deskat:component.at-spi.gnome.org"
|
, "cp": "uri:deskat:component.at-spi.gnome.org"
|
||||||
, "doc": "uri:deskat:document.at-spi.gnome.org"
|
, "doc": "uri:deskat:document.at-spi.gnome.org"
|
||||||
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
|
, "docattr": "uri:deskat:attributes.document.at-spi.gnome.org"
|
||||||
, "txt": "uri:deskat:text.at-spi.gnome.org"
|
, "txt": "uri:deskat:text.at-spi.gnome.org"
|
||||||
, "val": "uri:deskat:value.at-spi.gnome.org"
|
, "val": "uri:deskat:value.at-spi.gnome.org"
|
||||||
, "act": "uri:deskat:action.at-spi.gnome.org"
|
, "act": "uri:deskat:action.at-spi.gnome.org"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _create_node(node: Accessible) -> _Element:
|
def _create_node(node: Accessible) -> _Element:
|
||||||
attribute_dict: Dict[str, Any] = {"name": node.name}
|
attribute_dict: Dict[str, Any] = {"name": node.name}
|
||||||
|
|
||||||
@@ -164,11 +163,11 @@ def _create_node(node: Accessible) -> _Element:
|
|||||||
states: List[StateType] = node.getState().get_states()
|
states: List[StateType] = node.getState().get_states()
|
||||||
for st in states:
|
for st in states:
|
||||||
state_name: str = StateType._enum_lookup[st]
|
state_name: str = StateType._enum_lookup[st]
|
||||||
attribute_dict[ "{{{:}}}{:}"\
|
attribute_dict["{{{:}}}{:}" \
|
||||||
.format( _accessibility_ns_map["st"]
|
.format(_accessibility_ns_map["st"]
|
||||||
, state_name.split("_", maxsplit=1)[1].lower()
|
, state_name.split("_", maxsplit=1)[1].lower()
|
||||||
)
|
)
|
||||||
] = "true"
|
] = "true"
|
||||||
# }}} States #
|
# }}} States #
|
||||||
|
|
||||||
# Attributes {{{ #
|
# Attributes {{{ #
|
||||||
@@ -177,11 +176,11 @@ def _create_node(node: Accessible) -> _Element:
|
|||||||
attribute_name: str
|
attribute_name: str
|
||||||
attribute_value: str
|
attribute_value: str
|
||||||
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
|
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
|
||||||
attribute_dict[ "{{{:}}}{:}"\
|
attribute_dict["{{{:}}}{:}" \
|
||||||
.format( _accessibility_ns_map["attr"]
|
.format(_accessibility_ns_map["attr"]
|
||||||
, attribute_name
|
, attribute_name
|
||||||
)
|
)
|
||||||
] = attribute_value
|
] = attribute_value
|
||||||
# }}} Attributes #
|
# }}} Attributes #
|
||||||
|
|
||||||
# Component {{{ #
|
# Component {{{ #
|
||||||
@@ -190,9 +189,12 @@ def _create_node(node: Accessible) -> _Element:
|
|||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_SCREEN))
|
attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map["cp"])] = str(
|
||||||
attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_WINDOW))
|
component.getPosition(pyatspi.XY_SCREEN))
|
||||||
attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str(component.getPosition(pyatspi.XY_PARENT))
|
attribute_dict["{{{:}}}windowcoord".format(_accessibility_ns_map["cp"])] = str(
|
||||||
|
component.getPosition(pyatspi.XY_WINDOW))
|
||||||
|
attribute_dict["{{{:}}}parentcoord".format(_accessibility_ns_map["cp"])] = str(
|
||||||
|
component.getPosition(pyatspi.XY_PARENT))
|
||||||
attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = str(component.getSize())
|
attribute_dict["{{{:}}}size".format(_accessibility_ns_map["cp"])] = str(component.getSize())
|
||||||
# }}} Component #
|
# }}} Component #
|
||||||
|
|
||||||
@@ -209,11 +211,11 @@ def _create_node(node: Accessible) -> _Element:
|
|||||||
attribute_name: str
|
attribute_name: str
|
||||||
attribute_value: str
|
attribute_value: str
|
||||||
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
|
attribute_name, attribute_value = attrbt.split(":", maxsplit=1)
|
||||||
attribute_dict[ "{{{:}}}{:}"\
|
attribute_dict["{{{:}}}{:}" \
|
||||||
.format( _accessibility_ns_map["docattr"]
|
.format(_accessibility_ns_map["docattr"]
|
||||||
, attribute_name
|
, attribute_name
|
||||||
)
|
)
|
||||||
] = attribute_value
|
] = attribute_value
|
||||||
# }}} Document #
|
# }}} Document #
|
||||||
|
|
||||||
# Text {{{ #
|
# Text {{{ #
|
||||||
@@ -223,13 +225,13 @@ def _create_node(node: Accessible) -> _Element:
|
|||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
# only text shown on current screen is available
|
# only text shown on current screen is available
|
||||||
#attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
|
# attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
|
||||||
text: str = text_obj.getText(0, text_obj.characterCount)
|
text: str = text_obj.getText(0, text_obj.characterCount)
|
||||||
# }}} Text #
|
# }}} Text #
|
||||||
|
|
||||||
# Selection {{{ #
|
# Selection {{{ #
|
||||||
try:
|
try:
|
||||||
node.querySelection()
|
node.querySelection()
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
@@ -256,34 +258,36 @@ def _create_node(node: Accessible) -> _Element:
|
|||||||
else:
|
else:
|
||||||
for i in range(action.nActions):
|
for i in range(action.nActions):
|
||||||
action_name: str = action.getName(i).replace(" ", "-")
|
action_name: str = action.getName(i).replace(" ", "-")
|
||||||
attribute_dict[ "{{{:}}}{:}_desc"\
|
attribute_dict["{{{:}}}{:}_desc" \
|
||||||
.format( _accessibility_ns_map["act"]
|
.format(_accessibility_ns_map["act"]
|
||||||
, action_name
|
, action_name
|
||||||
)
|
)
|
||||||
] = action.getDescription(i)
|
] = action.getDescription(i)
|
||||||
attribute_dict[ "{{{:}}}{:}_kb"\
|
attribute_dict["{{{:}}}{:}_kb" \
|
||||||
.format( _accessibility_ns_map["act"]
|
.format(_accessibility_ns_map["act"]
|
||||||
, action_name
|
, action_name
|
||||||
)
|
)
|
||||||
] = action.getKeyBinding(i)
|
] = action.getKeyBinding(i)
|
||||||
# }}} Action #
|
# }}} Action #
|
||||||
|
|
||||||
xml_node = lxml.etree.Element( node.getRoleName().replace(" ", "-")
|
xml_node = lxml.etree.Element(node.getRoleName().replace(" ", "-")
|
||||||
, attrib=attribute_dict
|
, attrib=attribute_dict
|
||||||
, nsmap=_accessibility_ns_map
|
, nsmap=_accessibility_ns_map
|
||||||
)
|
)
|
||||||
if "text" in locals() and len(text)>0:
|
if "text" in locals() and len(text) > 0:
|
||||||
xml_node.text = text
|
xml_node.text = text
|
||||||
for ch in node:
|
for ch in node:
|
||||||
xml_node.append(_create_node(ch))
|
xml_node.append(_create_node(ch))
|
||||||
return xml_node
|
return xml_node
|
||||||
|
|
||||||
|
|
||||||
@app.route("/accessibility", methods=["GET"])
|
@app.route("/accessibility", methods=["GET"])
|
||||||
def get_accessibility_tree():
|
def get_accessibility_tree():
|
||||||
desktop: Accessible = pyatspi.Registry.getDesktop(0)
|
desktop: Accessible = pyatspi.Registry.getDesktop(0)
|
||||||
desktop_xml: _Element = _create_node(desktop)
|
desktop_xml: _Element = _create_node(desktop)
|
||||||
return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")})
|
return jsonify({"AT": lxml.etree.tostring(desktop_xml, encoding="unicode")})
|
||||||
|
|
||||||
|
|
||||||
@app.route('/screen_size', methods=['POST'])
|
@app.route('/screen_size', methods=['POST'])
|
||||||
def get_screen_size():
|
def get_screen_size():
|
||||||
d = display.Display()
|
d = display.Display()
|
||||||
@@ -563,5 +567,43 @@ def open_file():
|
|||||||
return f"Failed to open {path}. Error: {e}", 500
|
return f"Failed to open {path}. Error: {e}", 500
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/setup/activate_window", methods=['POST'])
|
||||||
|
def activate_window():
|
||||||
|
data = request.json
|
||||||
|
window_name = data.get('window_name', None)
|
||||||
|
|
||||||
|
os_name = platform.system()
|
||||||
|
|
||||||
|
if os_name == 'Windows':
|
||||||
|
import pygetwindow as gw
|
||||||
|
try:
|
||||||
|
# Find the VS Code window
|
||||||
|
vscode_window = gw.getWindowsWithTitle(window_name)[0]
|
||||||
|
# Activate the window, bringing it to the front
|
||||||
|
vscode_window.activate()
|
||||||
|
except IndexError:
|
||||||
|
return "VS Code window not found.", 404
|
||||||
|
|
||||||
|
elif os_name == 'Darwin':
|
||||||
|
import pygetwindow as gw
|
||||||
|
try:
|
||||||
|
# Find the VS Code window
|
||||||
|
vscode_window = gw.getWindowsWithTitle(window_name)[0]
|
||||||
|
# Un-minimize the window and then bring it to the front
|
||||||
|
vscode_window.unminimize()
|
||||||
|
vscode_window.activate()
|
||||||
|
except IndexError:
|
||||||
|
return "VS Code window not found.", 404
|
||||||
|
|
||||||
|
elif os_name == 'Linux':
|
||||||
|
# Attempt to activate VS Code window using wmctrl
|
||||||
|
subprocess.Popen(["wmctrl", "-a", window_name])
|
||||||
|
|
||||||
|
else:
|
||||||
|
return f"Operating system {os_name} not supported.", 400
|
||||||
|
|
||||||
|
return "File opened successfully", 200
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
app.run(debug=True, host="0.0.0.0")
|
app.run(debug=True, host="0.0.0.0")
|
||||||
|
|||||||
@@ -36,7 +36,8 @@
|
|||||||
"expected": {
|
"expected": {
|
||||||
"type": "rule",
|
"type": "rule",
|
||||||
"rules": {
|
"rules": {
|
||||||
|
"type": "bookmark_bar_folders_names",
|
||||||
|
"names": ["Favorites"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,16 +3,50 @@
|
|||||||
"snapshot": "chrome",
|
"snapshot": "chrome",
|
||||||
"instruction": "Hey, I need a quick way back to this site. Could you whip up a shortcut on my desktop for me?",
|
"instruction": "Hey, I need a quick way back to this site. Could you whip up a shortcut on my desktop for me?",
|
||||||
"source": "https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome",
|
"source": "https://www.laptopmag.com/articles/how-to-create-desktop-shortcuts-for-web-pages-using-chrome",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"google-chrome",
|
||||||
|
"--remote-debugging-port=1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"socat",
|
||||||
|
"tcp-listen:9222,fork",
|
||||||
|
"tcp:localhost:1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "chrome_open_tabs",
|
||||||
|
"parameters": {
|
||||||
|
"urls_to_open": [
|
||||||
|
"https://www.mathsisfun.com/games/2048.html"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
"chrome"
|
"chrome"
|
||||||
],
|
],
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"func": "",
|
"func": "is_shortcut_on_desktop",
|
||||||
"result": {
|
"result": {
|
||||||
|
"type": "shortcuts_on_desktop"
|
||||||
},
|
},
|
||||||
"expected": {
|
"expected": {
|
||||||
|
"type": "rule",
|
||||||
|
"rules": {
|
||||||
|
"type": "name",
|
||||||
|
"name": "Play Puzzle Game 2048"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,18 +1,53 @@
|
|||||||
{
|
{
|
||||||
"id": "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
|
"id": "7a5a7856-f1b6-42a4-ade9-1ca81ca0f263",
|
||||||
"snapshot": "chrome",
|
"snapshot": "chrome",
|
||||||
"instruction": "Can you save this webpage I'm looking at to my bookmarks so I can come back to it later?",
|
"instruction": "Can you save this webpage I'm looking at to bookmarks bar so I can come back to it later?",
|
||||||
"source": "https://www.youtube.com/watch?v=ZaZ8GcTxjXA",
|
"source": "https://www.youtube.com/watch?v=ZaZ8GcTxjXA",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"google-chrome",
|
||||||
|
"--remote-debugging-port=1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"socat",
|
||||||
|
"tcp-listen:9222,fork",
|
||||||
|
"tcp:localhost:1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "chrome_open_tabs",
|
||||||
|
"parameters": {
|
||||||
|
"urls_to_open": [
|
||||||
|
"https://blog.eleuther.ai/rotary-embeddings/",
|
||||||
|
"https://jalammar.github.io/illustrated-transformer/"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
"chrome"
|
"chrome"
|
||||||
],
|
],
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"func": "",
|
"func": "is_expected_bookmarks",
|
||||||
"result": {
|
"result": {
|
||||||
|
"type": "bookmarks"
|
||||||
},
|
},
|
||||||
"expected": {
|
"expected": {
|
||||||
|
"type": "rule",
|
||||||
|
"rules": {
|
||||||
|
"type": "bookmark_bar_websites_urls",
|
||||||
|
"urls": ["https://jalammar.github.io/illustrated-transformer/"]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,18 +1,54 @@
|
|||||||
{
|
{
|
||||||
"id": "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
|
"id": "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3",
|
||||||
"snapshot": "chrome",
|
"snapshot": "chrome",
|
||||||
"instruction": "Can you help me clean up my computer by getting rid of all the tracking things that websites like Amazon or eBay might have saved? I want to make sure my browsing is private and those sites don't remember me.",
|
"instruction": "Can you help me clean up my computer by getting rid of all the tracking things that Amazon might have saved? I want to make sure my browsing is private and those sites don't remember me.",
|
||||||
"source": "https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site",
|
"source": "https://support.google.com/chrome/answer/95647?hl=en&ref_topic=7438325&sjid=16867045591165135686-AP#zippy=%2Cdelete-cookies-from-a-site",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"google-chrome",
|
||||||
|
"--remote-debugging-port=1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"socat",
|
||||||
|
"tcp-listen:9222,fork",
|
||||||
|
"tcp:localhost:1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "chrome_open_tabs",
|
||||||
|
"parameters": {
|
||||||
|
"urls_to_open": [
|
||||||
|
"https://www.amazon.com",
|
||||||
|
"https://www.amazon.com/s?k=huggingface+transformers+book"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
"chrome"
|
"chrome"
|
||||||
],
|
],
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"func": "",
|
"func": "is_cookie_deleted",
|
||||||
"result": {
|
"result": {
|
||||||
|
"type": "cookie_data",
|
||||||
|
"dest": "Cookies"
|
||||||
},
|
},
|
||||||
"expected": {
|
"expected": {
|
||||||
|
"type": "rule",
|
||||||
|
"rules": {
|
||||||
|
"type": "domains",
|
||||||
|
"domains": [".amazon.com"]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,16 +3,50 @@
|
|||||||
"snapshot": "chrome",
|
"snapshot": "chrome",
|
||||||
"instruction": "Computer, can you turn the webpage I'm looking at into a PDF file and put it on my main screen, you know, the Desktop?",
|
"instruction": "Computer, can you turn the webpage I'm looking at into a PDF file and put it on my main screen, you know, the Desktop?",
|
||||||
"source": "https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php",
|
"source": "https://in5stepstutorials.com/google-chrome/save-web-page-as-pdf-in-chrome.php",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"google-chrome",
|
||||||
|
"--remote-debugging-port=1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"socat",
|
||||||
|
"tcp-listen:9222,fork",
|
||||||
|
"tcp:localhost:1337"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "chrome_open_tabs",
|
||||||
|
"parameters": {
|
||||||
|
"urls_to_open": [
|
||||||
|
"https://lilianweng.github.io/posts/2023-06-23-agent/"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
"chrome"
|
"chrome"
|
||||||
],
|
],
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"func": "",
|
"func": "compare_pdfs",
|
||||||
"result": {
|
"result": {
|
||||||
|
"type": "vm_file",
|
||||||
|
"path": "Desktop/LLM Powered Autonomous Agents _ Lil'Log.pdf",
|
||||||
|
"dest": "LLM Powered Autonomous Agents _ Lil'Log.pdf"
|
||||||
},
|
},
|
||||||
"expected": {
|
"expected": {
|
||||||
|
"type": "pdf_from_url",
|
||||||
|
"path": "https://lilianweng.github.io/posts/2023-06-23-agent/",
|
||||||
|
"dest": "LLM Powered Autonomous Agents _ Lil'Log_gold.pdf"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,34 @@
|
|||||||
{
|
{
|
||||||
"id": "455d3c66-7dc6-4537-a39a-36d3e9119df7",
|
"id": "455d3c66-7dc6-4537-a39a-36d3e9119df7",
|
||||||
"snapshot": "libreoffice_impress",
|
"snapshot": "libreoffice_impress",
|
||||||
"instruction": "Could you help me export impress file to image jpg file?",
|
"instruction": "Could you help me export an Impress file to a .jpg image file and save it as res.jpg on the Desktop? ",
|
||||||
"source": "https://stackoverflow.com/questions/75626383/how-export-libreoffice-impress-to-image",
|
"source": "https://stackoverflow.com/questions/75626383/how-export-libreoffice-impress-to-image",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "download",
|
||||||
|
"parameters": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"url": "https://drive.usercontent.google.com/download?id=12MxMjw28_t1nTLihlDpToKebjsSDsjwx&export=download&authuser=0&confirm=t&uuid=1ccc1da0-d7c7-494f-a0e3-59eb55f54e3f&at=APZUnTXvNIRMsF2cjZuFxmQzByhC:1705253210291",
|
||||||
|
"path": "Desktop/wssf-project-plan-on-a-page.pptx"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "open",
|
||||||
|
"parameters": {
|
||||||
|
"path": "Desktop/wssf-project-plan-on-a-page.pptx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
""
|
"libreoffice_impress"
|
||||||
],
|
],
|
||||||
"evaluator": "evaluation_dir"
|
"evaluator": {
|
||||||
}
|
"func": "check_file_exists",
|
||||||
|
"file_name": "res.png",
|
||||||
|
"directory": "/home/user/Desktop/"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,12 +1,37 @@
|
|||||||
{
|
{
|
||||||
"id": "550ce7e7-747b-495f-b122-acdc4d0b8e54",
|
"id": "550ce7e7-747b-495f-b122-acdc4d0b8e54",
|
||||||
"snapshot": "libreoffice_impress",
|
"snapshot": "libreoffice_impress",
|
||||||
"instruction": "Could you help me add a strike-through on this text",
|
"instruction": "I am checking our soccer club's to-do list for the last semester and adding strike-through sign on the line we have already accomplished. Could you help me add a strike-through on the first and second line?",
|
||||||
"source": "https://superuser.com/questions/1211035/libreoffice-impress-animations-how-to-strikethrough-on-click?rq=1",
|
"source": "https://superuser.com/questions/1211035/libreoffice-impress-animations-how-to-strikethrough-on-click?rq=1",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "download",
|
||||||
|
"parameters": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"url": "https://drive.usercontent.google.com/download?id=1fw0baVZ15s0r1WGEBftgED2H0ljZgYtu&export=download&authuser=0&confirm=t&uuid=df03788a-81ef-4e55-b33a-2fba7ab28cb8&at=APZUnTXPb-sm88KNwmNeugbhPrzx:17052529805399",
|
||||||
|
"path": "Desktop/New_Club_Spring_2018_Training.pptx"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "open",
|
||||||
|
"parameters": {
|
||||||
|
"path": "Desktop/New_Club_Spring_2018_Training.pptx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
""
|
"libreoffice_impress"
|
||||||
],
|
],
|
||||||
"evaluator": "evaluation_dir"
|
"evaluator": {
|
||||||
}
|
"func": "check_for_two_lines",
|
||||||
|
"result": {
|
||||||
|
"type": "vm_file",
|
||||||
|
"path": "Desktop/New_Club_Spring_2018_Training.pptx",
|
||||||
|
"dest": "New_Club_Spring_2018_Training.pptx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,12 +1,42 @@
|
|||||||
{
|
{
|
||||||
"id": "5d901039-a89c-4bfb-967b-bf66f4df075e",
|
"id": "5d901039-a89c-4bfb-967b-bf66f4df075e",
|
||||||
"snapshot": "libreoffice_impress",
|
"snapshot": "libreoffice_impress",
|
||||||
"instruction": "Help me stretch the image to fill the entire page, keeping its proportion and centering the image",
|
"instruction": "I want to make this page my cover page. Could you help me stretch this image to fill the entire page, keeping its proportion and centering the image.",
|
||||||
"source": "https://superuser.com/questions/986776/how-can-i-stretch-an-image-in-a-libreoffice-impress-presentation-to-fill-the-pag",
|
"source": "https://superuser.com/questions/986776/how-can-i-stretch-an-image-in-a-libreoffice-impress-presentation-to-fill-the-pag",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "download",
|
||||||
|
"parameters": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"url": "https://drive.usercontent.google.com/download?id=16K6TpGIRZpqOJUu-mtJQ_78kIwLcn-4D&export=download&authuser=0&confirm=t&uuid=945b6f33-53d2-4e87-ada9-efa8b938a499&at=APZUnTVw4fKyJPW0vAAJURruAJIP:1705250184439",
|
||||||
|
"path": "Desktop/CPD_Background_Investigation_Process.pptx"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "open",
|
||||||
|
"parameters": {
|
||||||
|
"path": "Desktop/CPD_Background_Investigation_Process.pptx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
""
|
"libreoffice_impress"
|
||||||
],
|
],
|
||||||
"evaluator": "evaluation_dir"
|
"evaluator": {
|
||||||
}
|
"func": "compare_pptx_files",
|
||||||
|
"expected": {
|
||||||
|
"type": "cloud_file",
|
||||||
|
"path": "https://drive.usercontent.google.com/download?id=1rsvFPyHYiIPh1c8Nj8say0NJCG2VIDr7&export=download&authuser=0&confirm=t&uuid=aac08a92-6595-47d8-84dc-8f1ab1df987f&at=APZUnTXIWCn5B0CpLttvG2bsr_a7:1705250423565",
|
||||||
|
"dest": "CPD_Background_Investigation_Process_Gold.docx"
|
||||||
|
},
|
||||||
|
"result": {
|
||||||
|
"type": "vm_file",
|
||||||
|
"path": "Desktop/CPD_Background_Investigation_Process.pptx",
|
||||||
|
"dest": "CPD_Background_Investigation_Process.pptx"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"id": "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
|
"id": "0ed39f63-6049-43d4-ba4d-5fa2fe04a951",
|
||||||
"snapshot": "vscode",
|
"snapshot": "vscode",
|
||||||
"instruction": "Could you help me find and replace \"text\" with \"test\" in this file?",
|
"instruction": "Please change all the places that say \"text\" to \"test\" in this document for me.",
|
||||||
"source": "https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code",
|
"source": "https://www.quora.com/How-do-you-find-and-replace-text-in-Visual-Studio-Code",
|
||||||
"config": [
|
"config": [
|
||||||
{
|
{
|
||||||
@@ -16,9 +16,15 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "open",
|
"type": "launch",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"path": "Desktop/vscode_replace_text.txt"
|
"command": ["code", "Desktop/vscode_replace_text.txt"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "activate_window",
|
||||||
|
"parameters": {
|
||||||
|
"window_name": "Visual Studio Code"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
|||||||
@@ -1,13 +1,50 @@
|
|||||||
{
|
{
|
||||||
"id": "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
"id": "53ad5833-3455-407b-bbc6-45b4c79ab8fb",
|
||||||
"snapshot": "vscode",
|
"snapshot": "vscode",
|
||||||
"instruction": "Could you help me open the project at /home/user/project?",
|
"instruction": "I'd like the \"project\" in the \"user\" folder under \"home\" to be opened with VS Code, please.",
|
||||||
"source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
|
"source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
|
||||||
"config": [
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"code"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"type": "command",
|
"type": "command",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"command": ["mkdir", "-p", "/home/user/project"]
|
"command": [
|
||||||
|
"mkdir",
|
||||||
|
"-p",
|
||||||
|
"/home/user/project/.vscode"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "download",
|
||||||
|
"parameters": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"url": "https://drive.usercontent.google.com/download?id=1akdsiRVdq6CUtT-FX8Dpf8ruPTq6DcFn&export=download&authuser=0&confirm=t&uuid=ce2fa96a-454e-43d9-bbe3-98553b7eed0d&at=APZUnTVw_YQ1URTvP34vrmKcw0b4:1705222451052",
|
||||||
|
"path": "/home/user/project/main.py"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://drive.usercontent.google.com/download?id=1BkwtqtAzv_K2CrTbJZ0HbMHBffzdD9vc&export=download&authuser=0&confirm=t&uuid=28f77090-deef-49a1-b156-91317881e75e&at=APZUnTXuaR6i_3t3Prslk535GaO5:1705222457290",
|
||||||
|
"path": "/home/user/project/README.md"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"url": "https://drive.usercontent.google.com/download?id=1ea_zF2tbcXOB8w9neBV-U5xI2nnPzIw_&export=download&authuser=0&confirm=t&uuid=9cf8c5bb-a880-475c-b80b-967a0c4fbea4&at=APZUnTUdjIj80F3Mbgi72eZDTZLO:1705222462443",
|
||||||
|
"path": "/home/user/project/.vscode/settings.json"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "activate_window",
|
||||||
|
"parameters": {
|
||||||
|
"window_name": "Visual Studio Code"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@@ -15,5 +52,27 @@
|
|||||||
"related_apps": [
|
"related_apps": [
|
||||||
"vscode"
|
"vscode"
|
||||||
],
|
],
|
||||||
"evaluator": "evaluation_dir"
|
"evaluator": {
|
||||||
|
"postconfig": [
|
||||||
|
{
|
||||||
|
"type": "activate_window",
|
||||||
|
"parameters": {
|
||||||
|
"window_name": "Visual Studio Code"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"func": "compare_config",
|
||||||
|
"expected": {
|
||||||
|
"type": "rule",
|
||||||
|
"rules": {
|
||||||
|
"expect": "project"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"result": {
|
||||||
|
"type": "vscode_config",
|
||||||
|
"vscode_extension_command": "OpenProject",
|
||||||
|
"path": "OpenProject.txt",
|
||||||
|
"dest": "OpenProject.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,50 @@
|
|||||||
{
|
{
|
||||||
"id": "59ed65c7-e9a6-43db-833f-76d6730c0004",
|
"id": "59ed65c7-e9a6-43db-833f-76d6730c0004",
|
||||||
"snapshot": "vscode",
|
"snapshot": "vscode",
|
||||||
"instruction": "Could you help me start debugging with the breakpoint at line 15?",
|
"instruction": "Could you help me start debugging with the breakpoint at line 100?",
|
||||||
"source": "https://www.youtube.com/watch?v=7qZBwhSlfOo",
|
"source": "https://www.youtube.com/watch?v=7qZBwhSlfOo",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "download",
|
||||||
|
"parameters": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"url": "https://drive.usercontent.google.com/download?id=1eLlB7UqRjh55vm0SIxb96aU1WbbK3H3T&export=download&authuser=0&confirm=t&uuid=379d1cbf-cca1-454a-a5a6-c5389024f728&at=APZUnTWn4vJZhfvrdfYZ6byVfaSj:1705159150342",
|
||||||
|
"path": "Desktop/main.py"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": ["code", "Desktop/main.py"]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "activate_window",
|
||||||
|
"parameters": {
|
||||||
|
"window_name": "Visual Studio Code"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/",
|
"trajectory": "trajectories/",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
"vscode"
|
"vscode"
|
||||||
],
|
],
|
||||||
"evaluator": "evaluation_dir"
|
"evaluator": {
|
||||||
|
"func": "compare_config",
|
||||||
|
"expected": {
|
||||||
|
"type": "rule",
|
||||||
|
"rules": {
|
||||||
|
"expect": "100"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"result": {
|
||||||
|
"type": "vscode_config",
|
||||||
|
"vscode_extension_command": "GetBreakPoint",
|
||||||
|
"path": "GetBreakPoint.txt",
|
||||||
|
"dest": "GetBreakPoint.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,10 +3,39 @@
|
|||||||
"snapshot": "vscode",
|
"snapshot": "vscode",
|
||||||
"instruction": "Could you help me change the color theme to Dark?",
|
"instruction": "Could you help me change the color theme to Dark?",
|
||||||
"source": "https://www.youtube.com/watch?v=ORrELERGIHs",
|
"source": "https://www.youtube.com/watch?v=ORrELERGIHs",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"code"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "activate_window",
|
||||||
|
"parameters": {
|
||||||
|
"window_name": "Visual Studio Code"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/982d12a5-beab-424f-8d38-d2a48429e511",
|
"trajectory": "trajectories/982d12a5-beab-424f-8d38-d2a48429e511",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
"vscode"
|
"vscode"
|
||||||
],
|
],
|
||||||
"evaluator": "evaluation_dir"
|
"evaluator": {
|
||||||
|
"func": "compare_config",
|
||||||
|
"expected": {
|
||||||
|
"type": "rule",
|
||||||
|
"rules": {
|
||||||
|
"expect": "2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"result": {
|
||||||
|
"type": "vscode_config",
|
||||||
|
"vscode_extension_command": "GetColorTheme",
|
||||||
|
"path": "GetColorTheme.txt",
|
||||||
|
"dest": "GetColorTheme.txt"
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,20 +3,44 @@
|
|||||||
"snapshot": "vscode",
|
"snapshot": "vscode",
|
||||||
"instruction": "Help me install the extension Python.",
|
"instruction": "Help me install the extension Python.",
|
||||||
"source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
|
"source": "https://www.youtube.com/watch?v=VqCgcpAypFQ",
|
||||||
"config": [],
|
"config": [
|
||||||
|
{
|
||||||
|
"type": "launch",
|
||||||
|
"parameters": {
|
||||||
|
"command": [
|
||||||
|
"code"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "activate_window",
|
||||||
|
"parameters": {
|
||||||
|
"window_name": "Visual Studio Code"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"trajectory": "trajectories/eabc805a-bfcf-4460-b250-ac92135819f6",
|
"trajectory": "trajectories/eabc805a-bfcf-4460-b250-ac92135819f6",
|
||||||
"related_apps": [
|
"related_apps": [
|
||||||
"vscode"
|
"vscode"
|
||||||
],
|
],
|
||||||
"evaluator": {
|
"evaluator": {
|
||||||
"func": "compare_answer",
|
"func": "is_extension_installed",
|
||||||
"expected": {
|
|
||||||
"type": "string",
|
|
||||||
"string": "ms-python.python\n"
|
|
||||||
},
|
|
||||||
"result": {
|
"result": {
|
||||||
"type": "command_line",
|
"type": "vm_command_line",
|
||||||
"command": "code --list-extensions | grep ms-python.python"
|
"command": [
|
||||||
|
"code",
|
||||||
|
"--list-extensions",
|
||||||
|
"|",
|
||||||
|
"grep",
|
||||||
|
"ms-python.python"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"expected": {
|
||||||
|
"type": "rule",
|
||||||
|
"rules": {
|
||||||
|
"type": "contain",
|
||||||
|
"expected": "ms-python.python"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
104
experiment.py
Normal file
104
experiment.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
import datetime
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from desktop_env.envs.desktop_env import DesktopEnv
|
||||||
|
from mm_agents.gpt_4v_agent import GPT4v_Agent
|
||||||
|
|
||||||
|
# Logger Configs {{{ #
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
|
||||||
|
|
||||||
|
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
|
||||||
|
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
|
||||||
|
stdout_handler = logging.StreamHandler(sys.stdout)
|
||||||
|
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
|
||||||
|
|
||||||
|
file_handler.setLevel(logging.INFO)
|
||||||
|
debug_handler.setLevel(logging.DEBUG)
|
||||||
|
stdout_handler.setLevel(logging.INFO)
|
||||||
|
sdebug_handler.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
debug_handler.setFormatter(formatter)
|
||||||
|
stdout_handler.setFormatter(formatter)
|
||||||
|
sdebug_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
stdout_handler.addFilter(logging.Filter("desktopenv"))
|
||||||
|
sdebug_handler.addFilter(logging.Filter("desktopenv"))
|
||||||
|
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
logger.addHandler(debug_handler)
|
||||||
|
logger.addHandler(stdout_handler)
|
||||||
|
logger.addHandler(sdebug_handler)
|
||||||
|
# }}} Logger Configs #
|
||||||
|
|
||||||
|
logger = logging.getLogger("desktopenv.experiment")
|
||||||
|
|
||||||
|
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
|
||||||
|
|
||||||
|
|
||||||
|
def run_one_example(example, agent, max_steps=20, example_trajectory_dir="exp_trajectory"):
|
||||||
|
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
|
||||||
|
env = DesktopEnv(
|
||||||
|
path_to_vm=PATH_TO_VM,
|
||||||
|
action_space=agent.action_space,
|
||||||
|
task_config=example
|
||||||
|
)
|
||||||
|
# reset the environment to certain snapshot
|
||||||
|
observation = env.reset()
|
||||||
|
observation['instruction'] = example['instruction']
|
||||||
|
done = False
|
||||||
|
step_num = 0
|
||||||
|
|
||||||
|
# todo: save the screenshots and actions to a folder
|
||||||
|
while not done and step_num < max_steps:
|
||||||
|
actions = agent.predict(observation)
|
||||||
|
for action in actions:
|
||||||
|
observation, reward, done, info = env.step(action)
|
||||||
|
observation['instruction'] = example['instruction']
|
||||||
|
step_num += 1
|
||||||
|
logger.info("Step %d", step_num)
|
||||||
|
logger.info("Action: %s", actions)
|
||||||
|
observation.pop("accessibility_tree")
|
||||||
|
logger.info("Observation: %s", observation)
|
||||||
|
logger.info("Reward: %.2f", reward)
|
||||||
|
logger.info("Info: %s", info)
|
||||||
|
|
||||||
|
logger.info("================================\n")
|
||||||
|
|
||||||
|
if done:
|
||||||
|
logger.info("The episode is done.")
|
||||||
|
break
|
||||||
|
|
||||||
|
result = env.evaluate()
|
||||||
|
logger.info("Result: %.2f", result)
|
||||||
|
|
||||||
|
# env.close()
|
||||||
|
logger.info("Environment closed.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
action_space = "pyautogui"
|
||||||
|
example_class = "vlc"
|
||||||
|
example_id = "8f080098-ddb1-424c-b438-4e96e5e4786e"
|
||||||
|
|
||||||
|
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r") as f:
|
||||||
|
example = json.load(f)
|
||||||
|
example["snapshot"] = "chrome_setup"
|
||||||
|
|
||||||
|
api_key = os.environ.get("OPENAI_API_KEY")
|
||||||
|
agent = GPT4v_Agent(api_key=api_key, action_space=action_space)
|
||||||
|
|
||||||
|
root_trajectory_dir = "exp_trajectory"
|
||||||
|
|
||||||
|
example_trajectory_dir = os.path.join(root_trajectory_dir, example_class, example_id)
|
||||||
|
os.makedirs(example_trajectory_dir, exist_ok=True)
|
||||||
|
|
||||||
|
run_one_example(example, agent, 20, example_trajectory_dir)
|
||||||
23
main.py
23
main.py
@@ -1,10 +1,10 @@
|
|||||||
|
import datetime
|
||||||
import json
|
import json
|
||||||
from desktop_env.envs.desktop_env import DesktopEnv
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import datetime
|
|
||||||
|
from desktop_env.envs.desktop_env import DesktopEnv
|
||||||
|
|
||||||
# Logger Configs {{{ #
|
# Logger Configs {{{ #
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
@@ -12,17 +12,18 @@ logger.setLevel(logging.DEBUG)
|
|||||||
|
|
||||||
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
|
datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
|
||||||
|
|
||||||
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)))
|
file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
|
||||||
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)))
|
debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
|
||||||
stdout_handler = logging.StreamHandler(sys.stdout)
|
stdout_handler = logging.StreamHandler(sys.stdout)
|
||||||
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)))
|
sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
|
||||||
|
|
||||||
file_handler.setLevel(logging.INFO)
|
file_handler.setLevel(logging.INFO)
|
||||||
debug_handler.setLevel(logging.DEBUG)
|
debug_handler.setLevel(logging.DEBUG)
|
||||||
stdout_handler.setLevel(logging.INFO)
|
stdout_handler.setLevel(logging.INFO)
|
||||||
sdebug_handler.setLevel(logging.DEBUG)
|
sdebug_handler.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
|
formatter = logging.Formatter(
|
||||||
|
fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s")
|
||||||
file_handler.setFormatter(formatter)
|
file_handler.setFormatter(formatter)
|
||||||
debug_handler.setFormatter(formatter)
|
debug_handler.setFormatter(formatter)
|
||||||
stdout_handler.setFormatter(formatter)
|
stdout_handler.setFormatter(formatter)
|
||||||
@@ -39,6 +40,7 @@ logger.addHandler(sdebug_handler)
|
|||||||
|
|
||||||
logger = logging.getLogger("desktopenv.main")
|
logger = logging.getLogger("desktopenv.main")
|
||||||
|
|
||||||
|
|
||||||
def human_agent():
|
def human_agent():
|
||||||
"""
|
"""
|
||||||
Runs the Gym environment with human input.
|
Runs the Gym environment with human input.
|
||||||
@@ -76,7 +78,8 @@ def human_agent():
|
|||||||
# }
|
# }
|
||||||
logger.info(trajectory[i])
|
logger.info(trajectory[i])
|
||||||
|
|
||||||
observation, reward, done, info = env.step(trajectory[i], pause=5)
|
observation, reward, done, info = env.step(trajectory[i])
|
||||||
|
observation.pop("accessibility_tree")
|
||||||
logger.info("Observation: %s", observation)
|
logger.info("Observation: %s", observation)
|
||||||
logger.info("Reward: %.2f", reward)
|
logger.info("Reward: %.2f", reward)
|
||||||
logger.info("Info: %s", info)
|
logger.info("Info: %s", info)
|
||||||
@@ -87,12 +90,14 @@ def human_agent():
|
|||||||
logger.info("The episode is done.")
|
logger.info("The episode is done.")
|
||||||
break
|
break
|
||||||
|
|
||||||
|
#input("PAUSING")
|
||||||
|
|
||||||
result = env.evaluate()
|
result = env.evaluate()
|
||||||
logger.info("Result: %.2f", result)
|
logger.info("Result: %.2f", result)
|
||||||
|
|
||||||
#input("PAUSING")
|
#input("PAUSING")
|
||||||
|
|
||||||
#env.close()
|
# env.close()
|
||||||
logger.info("Environment closed.")
|
logger.info("Environment closed.")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
84
mm_agents/gemini_agent.py
Normal file
84
mm_agents/gemini_agent.py
Normal file
@@ -0,0 +1,84 @@
|
|||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
import PIL.Image
|
||||||
|
import google.generativeai as genai
|
||||||
|
|
||||||
|
from mm_agents.gpt_4v_agent import parse_actions_from_string, parse_code_from_string
|
||||||
|
from mm_agents.gpt_4v_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
|
||||||
|
from mm_agents.gpt_4v_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
|
||||||
|
|
||||||
|
|
||||||
|
class GeminiPro_Agent:
|
||||||
|
def __init__(self, api_key, model='gemini-pro-vision', max_tokens=300, action_space="computer_13"):
|
||||||
|
genai.configure(api_key)
|
||||||
|
self.model = genai.GenerativeModel(model)
|
||||||
|
self.max_tokens = max_tokens
|
||||||
|
self.action_space = action_space
|
||||||
|
|
||||||
|
self.trajectory = [
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"parts": [
|
||||||
|
{
|
||||||
|
"computer_13": SYS_PROMPT_ACTION,
|
||||||
|
"pyautogui": SYS_PROMPT_CODE
|
||||||
|
}[action_space]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def predict(self, obs: Dict):
|
||||||
|
"""
|
||||||
|
Predict the next action(s) based on the current observation.
|
||||||
|
"""
|
||||||
|
img = PIL.Image.open(obs["screenshot"])
|
||||||
|
self.trajectory.append({
|
||||||
|
"role": "user",
|
||||||
|
"parts": ["To accomplish the task '{}' and given the current screenshot, what's the next step?".format(
|
||||||
|
obs["instruction"]), img]
|
||||||
|
})
|
||||||
|
|
||||||
|
traj_to_show = []
|
||||||
|
for i in range(len(self.trajectory)):
|
||||||
|
traj_to_show.append(self.trajectory[i]["parts"][0])
|
||||||
|
if len(self.trajectory[i]["parts"]) > 1:
|
||||||
|
traj_to_show.append("screenshot_obs")
|
||||||
|
|
||||||
|
print("Trajectory:", traj_to_show)
|
||||||
|
|
||||||
|
response = self.model.generate_content(self.trajectory, max_tokens=self.max_tokens)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# fixme: change to fit the new response format from gemini pro
|
||||||
|
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
|
||||||
|
except:
|
||||||
|
# todo: add error handling
|
||||||
|
print("Failed to parse action from response:", response.json()['choices'][0]['message']['content'])
|
||||||
|
actions = None
|
||||||
|
|
||||||
|
return actions
|
||||||
|
|
||||||
|
def parse_actions(self, response: str):
|
||||||
|
# response example
|
||||||
|
"""
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"click_type": "RIGHT"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
|
# parse from the response
|
||||||
|
if self.action_space == "computer_13":
|
||||||
|
actions = parse_actions_from_string(response)
|
||||||
|
elif self.action_space == "pyautogui":
|
||||||
|
actions = parse_code_from_string(response)
|
||||||
|
|
||||||
|
# add action into the trajectory
|
||||||
|
self.trajectory.append({
|
||||||
|
"role": "assistant",
|
||||||
|
"parts": [response]
|
||||||
|
})
|
||||||
|
|
||||||
|
return actions
|
||||||
@@ -1,19 +0,0 @@
|
|||||||
import PIL.Image
|
|
||||||
import google.generativeai as genai
|
|
||||||
|
|
||||||
genai.configure(api_key="AIzaSyANsETKHVo-D8jZu1SnTSaQgLOJEDgnj9Q")
|
|
||||||
|
|
||||||
# for m in genai.list_models():
|
|
||||||
# if 'generateContent' in m.supported_generation_methods:
|
|
||||||
# print(m.name)
|
|
||||||
|
|
||||||
model = genai.GenerativeModel('gemini-pro-vision')
|
|
||||||
|
|
||||||
img = PIL.Image.open('image.jpg')
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{'role':'user',
|
|
||||||
'parts': ["Explain this image.", img]}
|
|
||||||
]
|
|
||||||
|
|
||||||
response = model.generate_content(messages)
|
|
||||||
@@ -1,12 +1,12 @@
|
|||||||
# fixme: Need to be rewrite on new action space
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import base64
|
import base64
|
||||||
from desktop_env.envs.desktop_env import Action, MouseClick
|
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from mm_agents.gpt_4v_prompt import SYS_PROMPT
|
|
||||||
|
from mm_agents.gpt_4v_prompt_action import SYS_PROMPT as SYS_PROMPT_ACTION
|
||||||
|
from mm_agents.gpt_4v_prompt_code import SYS_PROMPT as SYS_PROMPT_CODE
|
||||||
|
|
||||||
|
|
||||||
# Function to encode the image
|
# Function to encode the image
|
||||||
@@ -47,11 +47,26 @@ def parse_actions_from_string(input_string):
|
|||||||
raise ValueError("Invalid response format: " + input_string)
|
raise ValueError("Invalid response format: " + input_string)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_code_from_string(input_string):
|
||||||
|
# This regular expression will match both ```code``` and ```python code```
|
||||||
|
# and capture the `code` part. It uses a non-greedy match for the content inside.
|
||||||
|
pattern = r"```(?:\w+\s+)?(.*?)```"
|
||||||
|
# Find all non-overlapping matches in the string
|
||||||
|
matches = re.findall(pattern, input_string, re.DOTALL)
|
||||||
|
|
||||||
|
# The regex above captures the content inside the triple backticks.
|
||||||
|
# The `re.DOTALL` flag allows the dot `.` to match newline characters as well,
|
||||||
|
# so the code inside backticks can span multiple lines.
|
||||||
|
|
||||||
|
# matches now contains all the captured code snippets
|
||||||
|
return matches
|
||||||
|
|
||||||
|
|
||||||
class GPT4v_Agent:
|
class GPT4v_Agent:
|
||||||
def __init__(self, api_key, instruction, model="gpt-4-vision-preview", max_tokens=300):
|
def __init__(self, api_key, model="gpt-4-vision-preview", max_tokens=300, action_space="computer_13"):
|
||||||
self.instruction = instruction
|
|
||||||
self.model = model
|
self.model = model
|
||||||
self.max_tokens = max_tokens
|
self.max_tokens = max_tokens
|
||||||
|
self.action_space = action_space
|
||||||
|
|
||||||
self.headers = {
|
self.headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
@@ -64,20 +79,27 @@ class GPT4v_Agent:
|
|||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": SYS_PROMPT
|
"text": {
|
||||||
|
"computer_13": SYS_PROMPT_ACTION,
|
||||||
|
"pyautogui": SYS_PROMPT_CODE
|
||||||
|
}[action_space]
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
def predict(self, obs):
|
def predict(self, obs: Dict):
|
||||||
base64_image = encode_image(obs)
|
"""
|
||||||
|
Predict the next action(s) based on the current observation.
|
||||||
|
"""
|
||||||
|
base64_image = encode_image(obs["screenshot"])
|
||||||
self.trajectory.append({
|
self.trajectory.append({
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"text": "What's the next step for instruction '{}'?".format(self.instruction)
|
"text": "To accomplish the task '{}' and given the current screenshot, what's the next step?".format(
|
||||||
|
obs["instruction"])
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
@@ -87,12 +109,15 @@ class GPT4v_Agent:
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
})
|
})
|
||||||
|
|
||||||
traj_to_show = []
|
traj_to_show = []
|
||||||
for i in range(len(self.trajectory)):
|
for i in range(len(self.trajectory)):
|
||||||
traj_to_show.append(self.trajectory[i]["content"][0]["text"])
|
traj_to_show.append(self.trajectory[i]["content"][0]["text"])
|
||||||
if len(self.trajectory[i]["content"]) > 1:
|
if len(self.trajectory[i]["content"]) > 1:
|
||||||
traj_to_show.append("screenshot_obs")
|
traj_to_show.append("screenshot_obs")
|
||||||
|
|
||||||
print("Trajectory:", traj_to_show)
|
print("Trajectory:", traj_to_show)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": self.trajectory,
|
"messages": self.trajectory,
|
||||||
@@ -103,6 +128,7 @@ class GPT4v_Agent:
|
|||||||
try:
|
try:
|
||||||
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
|
actions = self.parse_actions(response.json()['choices'][0]['message']['content'])
|
||||||
except:
|
except:
|
||||||
|
# todo: add error handling
|
||||||
print("Failed to parse action from response:", response.json()['choices'][0]['message']['content'])
|
print("Failed to parse action from response:", response.json()['choices'][0]['message']['content'])
|
||||||
actions = None
|
actions = None
|
||||||
|
|
||||||
@@ -120,7 +146,10 @@ class GPT4v_Agent:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# parse from the response
|
# parse from the response
|
||||||
actions = parse_actions_from_string(response)
|
if self.action_space == "computer_13":
|
||||||
|
actions = parse_actions_from_string(response)
|
||||||
|
elif self.action_space == "pyautogui":
|
||||||
|
actions = parse_code_from_string(response)
|
||||||
|
|
||||||
# add action into the trajectory
|
# add action into the trajectory
|
||||||
self.trajectory.append({
|
self.trajectory.append({
|
||||||
@@ -133,34 +162,4 @@ class GPT4v_Agent:
|
|||||||
]
|
]
|
||||||
})
|
})
|
||||||
|
|
||||||
# parse action
|
return actions
|
||||||
parsed_actions = []
|
|
||||||
for action in actions:
|
|
||||||
parsed_action = {}
|
|
||||||
action_type = Action[action['action_type']].value
|
|
||||||
parsed_action["action_type"] = action_type
|
|
||||||
|
|
||||||
if action_type == Action.CLICK.value or action_type == Action.MOUSE_DOWN.value or action_type == Action.MOUSE_UP.value:
|
|
||||||
parsed_action["click_type"] = MouseClick[action['click_type']].value
|
|
||||||
|
|
||||||
if action_type == Action.MOUSE_MOVE.value:
|
|
||||||
parsed_action["x"] = action["x"]
|
|
||||||
parsed_action["y"] = action["y"]
|
|
||||||
|
|
||||||
if action_type == Action.KEY.value:
|
|
||||||
parsed_action["key"] = action["key"] # handle the condition of single key and multiple keys
|
|
||||||
|
|
||||||
if action_type == Action.TYPE.value:
|
|
||||||
parsed_action["text"] = action["text"]
|
|
||||||
|
|
||||||
parsed_actions.append(parsed_action)
|
|
||||||
|
|
||||||
return parsed_actions
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# OpenAI API Key
|
|
||||||
api_key = os.environ.get("OPENAI_API_KEY")
|
|
||||||
|
|
||||||
agent = GPT4v_Agent(api_key=api_key, instruction="Open Google Sheet")
|
|
||||||
print(agent.predict(obs="stackoverflow.png"))
|
|
||||||
|
|||||||
@@ -1,52 +0,0 @@
|
|||||||
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
|
|
||||||
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
|
|
||||||
Here is the description of the action space:
|
|
||||||
|
|
||||||
Firstly you need to predict the class of your action, select from one below:
|
|
||||||
- **MOUSE_MOVE**: move the mouse to a specific position
|
|
||||||
- **CLICK**: click on the screen
|
|
||||||
- **MOUSE_DOWN**: press the mouse button
|
|
||||||
- **MOUSE_UP**: release the mouse button
|
|
||||||
- **KEY**: press a key on the keyboard
|
|
||||||
- **KEY_DOWN**: press a key on the keyboard
|
|
||||||
- **KEY_UP**: release a key on the keyboard
|
|
||||||
- **TYPE**: type a string on the keyboard
|
|
||||||
|
|
||||||
Then you need to predict the parameters of your action:
|
|
||||||
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor
|
|
||||||
for example, format as:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"action_type": "MOUSE_MOVE",
|
|
||||||
"x": 1319.11,
|
|
||||||
"y": 65.06
|
|
||||||
}
|
|
||||||
```
|
|
||||||
- For [CLICK, MOUSE_DOWN, MOUSE_UP], you need to specify the click_type as well, select from [LEFT, MIDDLE, RIGHT, WHEEL_UP, WHEEL_DOWN], which means you click the left button, middle button, right button, wheel up or wheel down of your mouse:
|
|
||||||
for example, format as:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"action_type": "CLICK",
|
|
||||||
"click_type": "LEFT"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
- For [KEY, KEY_DOWN, KEY_UP, TYPE], you need to choose a(multiple) key(s) from the keyboard, select from [A-Z, 0-9, F1-F12, ESC, TAB, ENTER, SPACE, BACKSPACE, SHIFT, CTRL, ALT, UP, DOWN, LEFT, RIGHT, CAPSLOCK, NUMLOCK, SCROLLLOCK, INSERT, DELETE, HOME, END, PAGEUP, PAGEDOWN]:
|
|
||||||
for example, format as:
|
|
||||||
```
|
|
||||||
{
|
|
||||||
"action_type": "TYPE",
|
|
||||||
"text": [
|
|
||||||
"w",
|
|
||||||
"i",
|
|
||||||
"k",
|
|
||||||
"i",
|
|
||||||
"p",
|
|
||||||
"e",
|
|
||||||
"d",
|
|
||||||
"i",
|
|
||||||
"a"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
For every setup, you should only return the action_type and the parameters of your action as a dict, without any other things.
|
|
||||||
@@ -1,19 +1,207 @@
|
|||||||
SYS_PROMPT = """
|
SYS_PROMPT = """
|
||||||
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
|
You will act as an agent which follow my instruction and perform desktop computer tasks as instructed. You must have good knowledge of computer and good internet connection.
|
||||||
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
|
For each step, you will get an observation of an image, which is the screenshot of the computer screen. And you will predict the action of the computer based on the image.
|
||||||
Here is the description of the action space:
|
|
||||||
|
|
||||||
Firstly you need to predict the class of your action, select from one below:
|
HERE is the description of the action space you need to predict, follow the format and choose the correct action type and parameters:
|
||||||
- **MOUSE_MOVE**: move the mouse to a specific position
|
ACTION_SPACE = [
|
||||||
- **CLICK**: click on the screen
|
{
|
||||||
- **MOUSE_DOWN**: press the mouse button
|
"action_type": "MOVE_TO",
|
||||||
- **MOUSE_UP**: release the mouse button
|
"note": "move the cursor to the specified position",
|
||||||
- **KEY**: press a key on the keyboard
|
"parameters": {
|
||||||
- **KEY_DOWN**: press a key on the keyboard
|
"x": {
|
||||||
- **KEY_UP**: release a key on the keyboard
|
"type": float,
|
||||||
- **TYPE**: type a string on the keyboard
|
"range": [0, X_MAX],
|
||||||
|
"optional": False,
|
||||||
Then you need to predict the parameters of your action:
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "CLICK",
|
||||||
|
"note": "click the left button if the button not specified, otherwise click the specified button; click at the current position if x and y are not specified, otherwise click at the specified position",
|
||||||
|
"parameters": {
|
||||||
|
"button": {
|
||||||
|
"type": str,
|
||||||
|
"range": ["left", "right", "middle"],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"num_clicks": {
|
||||||
|
"type": int,
|
||||||
|
"range": [1, 2, 3],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_DOWN",
|
||||||
|
"note": "press the left button if the button not specified, otherwise press the specified button",
|
||||||
|
"parameters": {
|
||||||
|
"button": {
|
||||||
|
"type": str,
|
||||||
|
"range": ["left", "right", "middle"],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "MOUSE_UP",
|
||||||
|
"note": "release the left button if the button not specified, otherwise release the specified button",
|
||||||
|
"parameters": {
|
||||||
|
"button": {
|
||||||
|
"type": str,
|
||||||
|
"range": ["left", "right", "middle"],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "RIGHT_CLICK",
|
||||||
|
"note": "right click at the current position if x and y are not specified, otherwise right click at the specified position",
|
||||||
|
"parameters": {
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "DOUBLE_CLICK",
|
||||||
|
"note": "double click at the current position if x and y are not specified, otherwise double click at the specified position",
|
||||||
|
"parameters": {
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": True,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": True,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "DRAG_TO",
|
||||||
|
"note": "drag the cursor to the specified position with the left button pressed",
|
||||||
|
"parameters": {
|
||||||
|
"x": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, X_MAX],
|
||||||
|
"optional": False,
|
||||||
|
},
|
||||||
|
"y": {
|
||||||
|
"type": float,
|
||||||
|
"range": [0, Y_MAX],
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "SCROLL",
|
||||||
|
"note": "scroll the mouse wheel up or down",
|
||||||
|
"parameters": {
|
||||||
|
"dx": {
|
||||||
|
"type": int,
|
||||||
|
"range": None,
|
||||||
|
"optional": False,
|
||||||
|
},
|
||||||
|
"dy": {
|
||||||
|
"type": int,
|
||||||
|
"range": None,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "TYPING",
|
||||||
|
"note": "type the specified text",
|
||||||
|
"parameters": {
|
||||||
|
"text": {
|
||||||
|
"type": str,
|
||||||
|
"range": None,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "PRESS",
|
||||||
|
"note": "press the specified key and release it",
|
||||||
|
"parameters": {
|
||||||
|
"key": {
|
||||||
|
"type": str,
|
||||||
|
"range": KEYBOARD_KEYS,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_DOWN",
|
||||||
|
"note": "press the specified key",
|
||||||
|
"parameters": {
|
||||||
|
"key": {
|
||||||
|
"type": str,
|
||||||
|
"range": KEYBOARD_KEYS,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "KEY_UP",
|
||||||
|
"note": "release the specified key",
|
||||||
|
"parameters": {
|
||||||
|
"key": {
|
||||||
|
"type": str,
|
||||||
|
"range": KEYBOARD_KEYS,
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "HOTKEY",
|
||||||
|
"note": "press the specified key combination",
|
||||||
|
"parameters": {
|
||||||
|
"keys": {
|
||||||
|
"type": list,
|
||||||
|
"range": [KEYBOARD_KEYS],
|
||||||
|
"optional": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
############################################################################################################
|
||||||
|
{
|
||||||
|
"action_type": "WAIT",
|
||||||
|
"note": "wait until the next action",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "FAIL",
|
||||||
|
"note": "decide the task can not be performed",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"action_type": "DONE",
|
||||||
|
"note": "decide the task is done",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
Firstly you need to predict the class of your action, then you need to predict the parameters of your action:
|
||||||
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
|
- For MOUSE_MOVE, you need to predict the x and y coordinate of the mouse cursor, the left top corner of the screen is (0, 0), the right bottom corner of the screen is (1920, 1080)
|
||||||
for example, format as:
|
for example, format as:
|
||||||
```
|
```
|
||||||
@@ -48,7 +236,9 @@ for example, format as:
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
For every step, you should only return the action_type and the parameters of your action as a dict, without any other things. You MUST wrap the dict with backticks (\`).
|
REMEMBER:
|
||||||
You can predict multiple actions at one step, but you should only return one action for each step.
|
For every step, you should only return the action_type and the parameters of your action as a dict, without any other things.
|
||||||
|
You MUST wrap the dict with backticks (\`).
|
||||||
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
|
You MUST choose and ONLY CHOOSE from the action space above, otherwise your action will be considered as invalid and you will get a penalty.
|
||||||
|
You CAN predict multiple actions at one step, but you should only return one action for each step.
|
||||||
"""
|
"""
|
||||||
@@ -4,5 +4,8 @@ For each step, you will get an observation of an image, which is the screenshot
|
|||||||
|
|
||||||
You are required to use `pyautogui` to perform the action.
|
You are required to use `pyautogui` to perform the action.
|
||||||
Return one line or multiple lines of python code to perform the action each time, be time efficient.
|
Return one line or multiple lines of python code to perform the action each time, be time efficient.
|
||||||
Return `None` if you cannot perform the action.
|
|
||||||
|
When you think you have to wait for some time, return `WAIT`.
|
||||||
|
When you think the task can not be done, return `FAIL`.
|
||||||
|
When you think the task is done, return `DONE`.
|
||||||
"""
|
"""
|
||||||
@@ -29,3 +29,4 @@ opencv-python
|
|||||||
ImageHash
|
ImageHash
|
||||||
scikit-image
|
scikit-image
|
||||||
librosa
|
librosa
|
||||||
|
pymupdf
|
||||||
|
|||||||
Reference in New Issue
Block a user