Merge branch 'main' into zdy
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,6 +2,9 @@
|
|||||||
*.pth
|
*.pth
|
||||||
*.pt
|
*.pt
|
||||||
|
|
||||||
|
# Credential files
|
||||||
|
evaluation_examples/settings/googledrive/credentials.json
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|||||||
@@ -23,8 +23,11 @@ Please refer to [guidance](https://docs.google.com/document/d/1KBdeZwmZs2Vi_Wsnn
|
|||||||
2. Install the environment package, download the examples and the virtual machine image.
|
2. Install the environment package, download the examples and the virtual machine image.
|
||||||
For x86_64 Linux or Windows, you can install the environment package and download the examples and the virtual machine image by running the following commands:
|
For x86_64 Linux or Windows, you can install the environment package and download the examples and the virtual machine image by running the following commands:
|
||||||
```bash
|
```bash
|
||||||
pip install desktop-env
|
git clone https://github.com/xlang-ai/DesktopEnv
|
||||||
gdown xxxx
|
cd DesktopEnv
|
||||||
|
pip install -r requirements.txt
|
||||||
|
gdown https://drive.google.com/drive/folders/1HX5gcf7UeyR-2UmiA15Q9U-
|
||||||
|
Wr6E6Gio8 -O Ubuntu --folder
|
||||||
vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
|
vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
|
||||||
vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
|
vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
|
||||||
```
|
```
|
||||||
|
|||||||
@@ -58,7 +58,8 @@ class DesktopEnv(gym.Env):
|
|||||||
tmp_dir: str = "tmp",
|
tmp_dir: str = "tmp",
|
||||||
cache_dir: str = "cache",
|
cache_dir: str = "cache",
|
||||||
screen_size: Tuple[int] = (1920, 1080),
|
screen_size: Tuple[int] = (1920, 1080),
|
||||||
headless: bool = False
|
headless: bool = False,
|
||||||
|
require_a11y_tree: bool = True,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Args:
|
Args:
|
||||||
@@ -77,6 +78,7 @@ class DesktopEnv(gym.Env):
|
|||||||
self.cache_dir_base: str = cache_dir
|
self.cache_dir_base: str = cache_dir
|
||||||
self.vm_screen_size = screen_size # todo: add the logic to get the screen size from the VM
|
self.vm_screen_size = screen_size # todo: add the logic to get the screen size from the VM
|
||||||
self.headless = headless
|
self.headless = headless
|
||||||
|
self.require_a11y_tree = require_a11y_tree
|
||||||
|
|
||||||
os.makedirs(self.tmp_dir_base, exist_ok=True)
|
os.makedirs(self.tmp_dir_base, exist_ok=True)
|
||||||
|
|
||||||
@@ -248,7 +250,7 @@ class DesktopEnv(gym.Env):
|
|||||||
|
|
||||||
observation = {
|
observation = {
|
||||||
"screenshot": self._get_obs(),
|
"screenshot": self._get_obs(),
|
||||||
"accessibility_tree": self.controller.get_accessibility_tree(),
|
"accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
|
||||||
}
|
}
|
||||||
return observation
|
return observation
|
||||||
|
|
||||||
@@ -284,7 +286,7 @@ class DesktopEnv(gym.Env):
|
|||||||
|
|
||||||
observation = {
|
observation = {
|
||||||
"screenshot": self._get_obs(),
|
"screenshot": self._get_obs(),
|
||||||
"accessibility_tree": self.controller.get_accessibility_tree(),
|
"accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
|
||||||
# "terminal": self.controller.get_terminal_output(),
|
# "terminal": self.controller.get_terminal_output(),
|
||||||
"instruction": self.instruction
|
"instruction": self.instruction
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -77,6 +77,7 @@ from .general import (
|
|||||||
literal_match
|
literal_match
|
||||||
)
|
)
|
||||||
from .gimp import (
|
from .gimp import (
|
||||||
|
check_structure_sim_resized,
|
||||||
check_brightness_decrease_and_structure_sim,
|
check_brightness_decrease_and_structure_sim,
|
||||||
check_contrast_increase_and_structure_sim,
|
check_contrast_increase_and_structure_sim,
|
||||||
check_saturation_increase_and_structure_sim,
|
check_saturation_increase_and_structure_sim,
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"parameters": {
|
"parameters": {
|
||||||
"files": [
|
"files": [
|
||||||
{
|
{
|
||||||
"url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/tvf25tcbo8jda5qvbhlr5et7mb3l00qr/1709005500000/767b5ea4-1bdf-4b49-9fa0-c17b53e21f8a/108888117743638485671/ADt3v-PaaonAsKLjIRGjHf-MSjw2YlZrA_AiqizGDB9kBc9aOX8OpnU4AjTlh83sB2TPylr28DyOIJhAt4Wpnvm3DK8bVGBOM7JyLSyFtO_hXXbDtrF2DyWDuYy-9PqaxJuwgPfpXVnTuwOwYbZh5kebA99822_ymo383VWrpSaga6MjZXZFtGdl5r87fxwi5G7KgL_bQFo3QUWadawJzldqrwe6KRIIo0Zru0oIVazeM7LtjFV4WWLozAJ7ZJ3lS6qCKJltKN0wpg6Sdw1rS1VzDq_tYo0n2uR4zDll5cMMA8fW5AhU44PNxnWmGmivzJszfXA4Fn7I?j=767b5ea4-1bdf-4b49-9fa0-c17b53e21f8a&user=6816948370&i=0&authuser=0",
|
"url": "https://drive.usercontent.google.com/download?id=1k1d2UbXvp05gDdV669gNDnbdEv9SsAtN&export=download&authuser=0&confirm=t&uuid=c3d51b38-e061-4198-80cd-3cd251de8dae&at=APZUnTXaiHViMYwtweYPykye7N5u:1710938272734",
|
||||||
"path": "/home/user/Downloads/HW-8-main-20240207T164539Z-001.zip"
|
"path": "/home/user/Downloads/HW-8-main-20240207T164539Z-001.zip"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"parameters": {
|
"parameters": {
|
||||||
"files": [
|
"files": [
|
||||||
{
|
{
|
||||||
"url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/94gda7383revq68jl0c4fu852bb4a375/1709010000000/09ed1496-6945-4b34-b938-8e5f67e64d8f/108888117743638485671/ADt3v-NkzdbsoW3-0iDvDvlUAcCN3jRyAtBZH4ork--gAhv8JfYhMxiHDU7hr0GB-M8y8BSEArd4Z6becXlgNGuv7k50hOVsShmmQ22KgJkGimg6urK1fzkCG0VS_5cfdxRsjKQByRZmmvX675Zw5NQoRMgcJpTqcjIBr01BsSljkvtPU1wu_iVz_w1i2lk6TlTsNNIa3MRCK1zG4Fd7qySq5Tg6TzWhga1uewXlYGfQVwxyBlVX7rPuJBud2CB7UfZFQbd-2DftnZRA3zSYpDmfwc9NutAdmmuvGr6Fj9395yItzi5Vt6sUWHZfSykXy8DdHtsONn32?j=09ed1496-6945-4b34-b938-8e5f67e64d8f&user=6816948370&i=0&authuser=0",
|
"url": "https://drive.usercontent.google.com/download?id=1ITuXkSbTF0BcbTQ3v4A1qnSzbTPrP5ax&export=download&authuser=0&confirm=t&uuid=c6c45cbf-63bc-4cb0-b76c-5a663c0ed820&at=APZUnTVrE-pn_e6HGTp_Eg4ziQhi:1710938673095",
|
||||||
"path": "/home/user/Downloads/hw_python_oop-master.zip"
|
"path": "/home/user/Downloads/hw_python_oop-master.zip"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
{
|
{
|
||||||
"email": "xlang2024anonym@gmail.com",
|
"email": "xlang2024anonym@gmail.com",
|
||||||
"password": "q]wN~0iD>H:6"
|
"password": "Evt5LLj!VJ6Y!C$B"
|
||||||
}
|
}
|
||||||
@@ -1 +0,0 @@
|
|||||||
{"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-13T10:09:01Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
|
|
||||||
@@ -286,7 +286,6 @@
|
|||||||
"788b3701-3ec9-4b67-b679-418bfa726c22",
|
"788b3701-3ec9-4b67-b679-418bfa726c22",
|
||||||
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
|
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
|
||||||
"42d25c08-fb87-4927-8b65-93631280a26f",
|
"42d25c08-fb87-4927-8b65-93631280a26f",
|
||||||
"bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
|
|
||||||
"e8172110-ec08-421b-a6f5-842e6451911f",
|
"e8172110-ec08-421b-a6f5-842e6451911f",
|
||||||
"42f4d1c7-4521-4161-b646-0a8934e36081",
|
"42f4d1c7-4521-4161-b646-0a8934e36081",
|
||||||
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
|
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
|
||||||
|
|||||||
@@ -70,7 +70,6 @@
|
|||||||
"c2751594-0cd5-4088-be1b-b5f2f9ec97c4",
|
"c2751594-0cd5-4088-be1b-b5f2f9ec97c4",
|
||||||
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
|
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
|
||||||
"42d25c08-fb87-4927-8b65-93631280a26f",
|
"42d25c08-fb87-4927-8b65-93631280a26f",
|
||||||
"bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
|
|
||||||
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
|
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
|
||||||
"d68204bf-11c1-4b13-b48b-d303c73d4bf6",
|
"d68204bf-11c1-4b13-b48b-d303c73d4bf6",
|
||||||
"91190194-f406-4cd6-b3f9-c43fac942b22",
|
"91190194-f406-4cd6-b3f9-c43fac942b22",
|
||||||
|
|||||||
@@ -350,7 +350,7 @@ class PromptAgent:
|
|||||||
# {{{1
|
# {{{1
|
||||||
if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
|
if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
|
||||||
base64_image = encode_image(obs["screenshot"])
|
base64_image = encode_image(obs["screenshot"])
|
||||||
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
|
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"]) if self.observation_type == "screenshot_a11y_tree" else None
|
||||||
logger.debug("LINEAR AT: %s", linearized_accessibility_tree)
|
logger.debug("LINEAR AT: %s", linearized_accessibility_tree)
|
||||||
|
|
||||||
if self.observation_type == "screenshot_a11y_tree":
|
if self.observation_type == "screenshot_a11y_tree":
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
wget https://github.com/UX-Decoder/Semantic-SAM/releases/download/checkpoint/swinl_only_sam_many2many.pth
|
|
||||||
wget https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v1.pt
|
|
||||||
wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
|
|
||||||
@@ -801,7 +801,7 @@ You CAN predict multiple actions at one step, but you should only return one act
|
|||||||
SYS_PROMPT_IN_SOM_OUT_TAG = """
|
SYS_PROMPT_IN_SOM_OUT_TAG = """
|
||||||
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
|
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
|
||||||
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
|
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
|
||||||
For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and test information.
|
For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and text information.
|
||||||
|
|
||||||
You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot.
|
You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot.
|
||||||
You can replace x, y in the code with the tag of the element you want to operate with. such as:
|
You can replace x, y in the code with the tag of the element you want to operate with. such as:
|
||||||
|
|||||||
11
run.py
11
run.py
@@ -95,6 +95,10 @@ def config() -> argparse.Namespace:
|
|||||||
parser.add_argument("--max_tokens", type=int, default=1500)
|
parser.add_argument("--max_tokens", type=int, default=1500)
|
||||||
parser.add_argument("--stop_token", type=str, default=None)
|
parser.add_argument("--stop_token", type=str, default=None)
|
||||||
|
|
||||||
|
# example config
|
||||||
|
parser.add_argument("--domain", type=str, default="all")
|
||||||
|
parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_all.json")
|
||||||
|
|
||||||
# logging related
|
# logging related
|
||||||
parser.add_argument("--result_dir", type=str, default="./results")
|
parser.add_argument("--result_dir", type=str, default="./results")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
@@ -141,10 +145,10 @@ def test(
|
|||||||
|
|
||||||
env = DesktopEnv(
|
env = DesktopEnv(
|
||||||
path_to_vm=args.path_to_vm,
|
path_to_vm=args.path_to_vm,
|
||||||
snapshot_name="Snapshot 35",
|
|
||||||
action_space=agent.action_space,
|
action_space=agent.action_space,
|
||||||
screen_size=(args.screen_width, args.screen_height),
|
screen_size=(args.screen_width, args.screen_height),
|
||||||
headless=args.headless,
|
headless=args.headless,
|
||||||
|
require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
|
||||||
)
|
)
|
||||||
|
|
||||||
for domain in tqdm(test_all_meta, desc="Domain"):
|
for domain in tqdm(test_all_meta, desc="Domain"):
|
||||||
@@ -265,9 +269,12 @@ if __name__ == '__main__':
|
|||||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||||
args = config()
|
args = config()
|
||||||
|
|
||||||
with open("evaluation_examples/test_all.json", "r", encoding="utf-8") as f:
|
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||||
test_all_meta = json.load(f)
|
test_all_meta = json.load(f)
|
||||||
|
|
||||||
|
if args.domain != "all":
|
||||||
|
test_all_meta = {args.domain: test_all_meta[args.domain]}
|
||||||
|
|
||||||
test_file_list = get_unfinished(
|
test_file_list = get_unfinished(
|
||||||
args.action_space,
|
args.action_space,
|
||||||
args.model,
|
args.model,
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
{
|
{
|
||||||
"time_limit": "600"
|
"time_limit": "1800"
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user