Merge branch 'main' into zdy
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,6 +2,9 @@
|
||||
*.pth
|
||||
*.pt
|
||||
|
||||
# Credential files
|
||||
evaluation_examples/settings/googledrive/credentials.json
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
@@ -23,8 +23,11 @@ Please refer to [guidance](https://docs.google.com/document/d/1KBdeZwmZs2Vi_Wsnn
|
||||
2. Install the environment package, download the examples and the virtual machine image.
|
||||
For x86_64 Linux or Windows, you can install the environment package and download the examples and the virtual machine image by running the following commands:
|
||||
```bash
|
||||
pip install desktop-env
|
||||
gdown xxxx
|
||||
git clone https://github.com/xlang-ai/DesktopEnv
|
||||
cd DesktopEnv
|
||||
pip install -r requirements.txt
|
||||
gdown https://drive.google.com/drive/folders/1HX5gcf7UeyR-2UmiA15Q9U-
|
||||
Wr6E6Gio8 -O Ubuntu --folder
|
||||
vmrun -T ws start "Ubuntu/Ubuntu.vmx" nogui
|
||||
vmrun -T ws snapshot "Ubuntu/Ubuntu.vmx" "init_state"
|
||||
```
|
||||
@@ -89,4 +92,4 @@ If you find this environment useful, please consider citing our work:
|
||||
journal={arXiv preprint arXiv:xxxx.xxxx},
|
||||
year={2024}
|
||||
}
|
||||
```
|
||||
```
|
||||
|
||||
@@ -58,7 +58,8 @@ class DesktopEnv(gym.Env):
|
||||
tmp_dir: str = "tmp",
|
||||
cache_dir: str = "cache",
|
||||
screen_size: Tuple[int] = (1920, 1080),
|
||||
headless: bool = False
|
||||
headless: bool = False,
|
||||
require_a11y_tree: bool = True,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
@@ -77,6 +78,7 @@ class DesktopEnv(gym.Env):
|
||||
self.cache_dir_base: str = cache_dir
|
||||
self.vm_screen_size = screen_size # todo: add the logic to get the screen size from the VM
|
||||
self.headless = headless
|
||||
self.require_a11y_tree = require_a11y_tree
|
||||
|
||||
os.makedirs(self.tmp_dir_base, exist_ok=True)
|
||||
|
||||
@@ -248,7 +250,7 @@ class DesktopEnv(gym.Env):
|
||||
|
||||
observation = {
|
||||
"screenshot": self._get_obs(),
|
||||
"accessibility_tree": self.controller.get_accessibility_tree(),
|
||||
"accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
|
||||
}
|
||||
return observation
|
||||
|
||||
@@ -284,7 +286,7 @@ class DesktopEnv(gym.Env):
|
||||
|
||||
observation = {
|
||||
"screenshot": self._get_obs(),
|
||||
"accessibility_tree": self.controller.get_accessibility_tree(),
|
||||
"accessibility_tree": self.controller.get_accessibility_tree() if self.require_a11y_tree else None,
|
||||
# "terminal": self.controller.get_terminal_output(),
|
||||
"instruction": self.instruction
|
||||
}
|
||||
|
||||
@@ -77,6 +77,7 @@ from .general import (
|
||||
literal_match
|
||||
)
|
||||
from .gimp import (
|
||||
check_structure_sim_resized,
|
||||
check_brightness_decrease_and_structure_sim,
|
||||
check_contrast_increase_and_structure_sim,
|
||||
check_saturation_increase_and_structure_sim,
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/tvf25tcbo8jda5qvbhlr5et7mb3l00qr/1709005500000/767b5ea4-1bdf-4b49-9fa0-c17b53e21f8a/108888117743638485671/ADt3v-PaaonAsKLjIRGjHf-MSjw2YlZrA_AiqizGDB9kBc9aOX8OpnU4AjTlh83sB2TPylr28DyOIJhAt4Wpnvm3DK8bVGBOM7JyLSyFtO_hXXbDtrF2DyWDuYy-9PqaxJuwgPfpXVnTuwOwYbZh5kebA99822_ymo383VWrpSaga6MjZXZFtGdl5r87fxwi5G7KgL_bQFo3QUWadawJzldqrwe6KRIIo0Zru0oIVazeM7LtjFV4WWLozAJ7ZJ3lS6qCKJltKN0wpg6Sdw1rS1VzDq_tYo0n2uR4zDll5cMMA8fW5AhU44PNxnWmGmivzJszfXA4Fn7I?j=767b5ea4-1bdf-4b49-9fa0-c17b53e21f8a&user=6816948370&i=0&authuser=0",
|
||||
"url": "https://drive.usercontent.google.com/download?id=1k1d2UbXvp05gDdV669gNDnbdEv9SsAtN&export=download&authuser=0&confirm=t&uuid=c3d51b38-e061-4198-80cd-3cd251de8dae&at=APZUnTXaiHViMYwtweYPykye7N5u:1710938272734",
|
||||
"path": "/home/user/Downloads/HW-8-main-20240207T164539Z-001.zip"
|
||||
}
|
||||
]
|
||||
@@ -62,4 +62,4 @@
|
||||
"dest": "settings.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
"parameters": {
|
||||
"files": [
|
||||
{
|
||||
"url": "https://drive-data-export.usercontent.google.com/download/aht97d3cgh8crp6c8nof1vt3tipaiqt5/94gda7383revq68jl0c4fu852bb4a375/1709010000000/09ed1496-6945-4b34-b938-8e5f67e64d8f/108888117743638485671/ADt3v-NkzdbsoW3-0iDvDvlUAcCN3jRyAtBZH4ork--gAhv8JfYhMxiHDU7hr0GB-M8y8BSEArd4Z6becXlgNGuv7k50hOVsShmmQ22KgJkGimg6urK1fzkCG0VS_5cfdxRsjKQByRZmmvX675Zw5NQoRMgcJpTqcjIBr01BsSljkvtPU1wu_iVz_w1i2lk6TlTsNNIa3MRCK1zG4Fd7qySq5Tg6TzWhga1uewXlYGfQVwxyBlVX7rPuJBud2CB7UfZFQbd-2DftnZRA3zSYpDmfwc9NutAdmmuvGr6Fj9395yItzi5Vt6sUWHZfSykXy8DdHtsONn32?j=09ed1496-6945-4b34-b938-8e5f67e64d8f&user=6816948370&i=0&authuser=0",
|
||||
"url": "https://drive.usercontent.google.com/download?id=1ITuXkSbTF0BcbTQ3v4A1qnSzbTPrP5ax&export=download&authuser=0&confirm=t&uuid=c6c45cbf-63bc-4cb0-b76c-5a663c0ed820&at=APZUnTVrE-pn_e6HGTp_Eg4ziQhi:1710938673095",
|
||||
"path": "/home/user/Downloads/hw_python_oop-master.zip"
|
||||
}
|
||||
]
|
||||
@@ -63,4 +63,4 @@
|
||||
"dest": "settings.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
{
|
||||
"email": "xlang2024anonym@gmail.com",
|
||||
"password": "q]wN~0iD>H:6"
|
||||
}
|
||||
"password": "Evt5LLj!VJ6Y!C$B"
|
||||
}
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
{"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "client_id": "786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com", "client_secret": "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", "refresh_token": "1//0eVpYfdSAjvbCCgYIARAAGA4SNwF-L9IrAgL6KVceiEVTjtQdmPki2I3m8ejP3lzTLL2Wa3-rdrYfU7eYeKDVCS5KRxa_xCE_pPY", "token_expiry": "2024-03-13T10:09:01Z", "token_uri": "https://oauth2.googleapis.com/token", "user_agent": null, "revoke_uri": "https://oauth2.googleapis.com/revoke", "id_token": null, "id_token_jwt": null, "token_response": {"access_token": "ya29.a0Ad52N382_JIl2nZBNpJCgoU3HXk2Kz7CArVYn_PGI8pXFucAozry1Vmp5QolzGrnl4UChZswJDOgcdPm5Ew-NbdHPX95wxknoG1oJKqjWYtjl3mw433hiGtriuKWKnXcz1NUf8ewqqq458tJLLDhbbZFW7eZRQrdJzmrGAaCgYKAZ4SARISFQHGX2Mik2MQ5qx0goIypVyzbcUmYw0173", "expires_in": 3599, "scope": "https://www.googleapis.com/auth/drive", "token_type": "Bearer"}, "scopes": ["https://www.googleapis.com/auth/drive"], "token_info_uri": "https://oauth2.googleapis.com/tokeninfo", "invalid": false, "_class": "OAuth2Credentials", "_module": "oauth2client.client"}
|
||||
@@ -286,7 +286,6 @@
|
||||
"788b3701-3ec9-4b67-b679-418bfa726c22",
|
||||
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
|
||||
"42d25c08-fb87-4927-8b65-93631280a26f",
|
||||
"bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
|
||||
"e8172110-ec08-421b-a6f5-842e6451911f",
|
||||
"42f4d1c7-4521-4161-b646-0a8934e36081",
|
||||
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
|
||||
|
||||
@@ -70,7 +70,6 @@
|
||||
"c2751594-0cd5-4088-be1b-b5f2f9ec97c4",
|
||||
"48c46dc7-fe04-4505-ade7-723cba1aa6f6",
|
||||
"42d25c08-fb87-4927-8b65-93631280a26f",
|
||||
"bb7db4c2-30b5-4be7-8dd7-b8c4ec7d3108",
|
||||
"3c8f201a-009d-4bbe-8b65-a6f8b35bb57f",
|
||||
"d68204bf-11c1-4b13-b48b-d303c73d4bf6",
|
||||
"91190194-f406-4cd6-b3f9-c43fac942b22",
|
||||
|
||||
@@ -350,7 +350,7 @@ class PromptAgent:
|
||||
# {{{1
|
||||
if self.observation_type in ["screenshot", "screenshot_a11y_tree"]:
|
||||
base64_image = encode_image(obs["screenshot"])
|
||||
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"])
|
||||
linearized_accessibility_tree = linearize_accessibility_tree(accessibility_tree=obs["accessibility_tree"]) if self.observation_type == "screenshot_a11y_tree" else None
|
||||
logger.debug("LINEAR AT: %s", linearized_accessibility_tree)
|
||||
|
||||
if self.observation_type == "screenshot_a11y_tree":
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
wget https://github.com/UX-Decoder/Semantic-SAM/releases/download/checkpoint/swinl_only_sam_many2many.pth
|
||||
wget https://huggingface.co/xdecoder/SEEM/resolve/main/seem_focall_v1.pt
|
||||
wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
|
||||
@@ -801,7 +801,7 @@ You CAN predict multiple actions at one step, but you should only return one act
|
||||
SYS_PROMPT_IN_SOM_OUT_TAG = """
|
||||
You are an agent which follow my instruction and perform desktop computer tasks as instructed.
|
||||
You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
|
||||
For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and test information.
|
||||
For each step, you will get an observation of the desktop by 1) a screenshot with interact-able elements marked with numerical tags; and 2) accessibility tree, which is based on AT-SPI library. And you will predict the action of the computer based on the image and text information.
|
||||
|
||||
You are required to use `pyautogui` to perform the action grounded to the observation, but DONOT use the `pyautogui.locateCenterOnScreen` function to locate the element you want to operate with since we have no image of the element you want to operate with. DONOT USE `pyautogui.screenshot()` to make screenshot.
|
||||
You can replace x, y in the code with the tag of the element you want to operate with. such as:
|
||||
|
||||
11
run.py
11
run.py
@@ -95,6 +95,10 @@ def config() -> argparse.Namespace:
|
||||
parser.add_argument("--max_tokens", type=int, default=1500)
|
||||
parser.add_argument("--stop_token", type=str, default=None)
|
||||
|
||||
# example config
|
||||
parser.add_argument("--domain", type=str, default="all")
|
||||
parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_all.json")
|
||||
|
||||
# logging related
|
||||
parser.add_argument("--result_dir", type=str, default="./results")
|
||||
args = parser.parse_args()
|
||||
@@ -141,10 +145,10 @@ def test(
|
||||
|
||||
env = DesktopEnv(
|
||||
path_to_vm=args.path_to_vm,
|
||||
snapshot_name="Snapshot 35",
|
||||
action_space=agent.action_space,
|
||||
screen_size=(args.screen_width, args.screen_height),
|
||||
headless=args.headless,
|
||||
require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
|
||||
)
|
||||
|
||||
for domain in tqdm(test_all_meta, desc="Domain"):
|
||||
@@ -265,9 +269,12 @@ if __name__ == '__main__':
|
||||
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
||||
args = config()
|
||||
|
||||
with open("evaluation_examples/test_all.json", "r", encoding="utf-8") as f:
|
||||
with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
|
||||
test_all_meta = json.load(f)
|
||||
|
||||
if args.domain != "all":
|
||||
test_all_meta = {args.domain: test_all_meta[args.domain]}
|
||||
|
||||
test_file_list = get_unfinished(
|
||||
args.action_space,
|
||||
args.model,
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
{
|
||||
"time_limit": "600"
|
||||
"time_limit": "1800"
|
||||
}
|
||||
Reference in New Issue
Block a user