diff --git a/.gitignore b/.gitignore
index f8343ce..00abad5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,9 @@
*.pt
# Credential files
+evaluation_examples/settings/google/settings.json
evaluation_examples/settings/googledrive/credentials.json
+evaluation_examples/settings/googledrive/client_secrets.json
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/ACCOUNT_GUIDELINE.md b/ACCOUNT_GUIDELINE.md
new file mode 100644
index 0000000..bfb3f03
--- /dev/null
+++ b/ACCOUNT_GUIDELINE.md
@@ -0,0 +1,138 @@
+# Real Accounts
+
+For tasks including google or google drive, we need a real Google account as well as configured OAuth2.0 secrets.
+
+> Attention: to prevent environment reset and result evaluation conflicts caused by multiple people using the same Google account simultaneously, we will not provide the public test accounts available. Please register a private Google account.
+
+## Table of Contents
+1. [Register A Blank Google Account](#register-a-blank-google-account)
+2. [Create A Google Cloud Project](#create-a-google-cloud-project)
+3. [Configure OAuth Consent Screen](#configure-oauth-consent-screen)
+4. [Create OAuth2.0 Credentials](#create-oauth20-credentials)
+5. [Potential Issues](#potential-issues)
+
+## Register A Blank Google Account
+
+1. Go to Google web site and register a blank new account
+ - In this testbed, you do not need to provide any recovery email or phone, since we only use it for testing cases
+ - Just **IGNORE** any security recommendations
+ - Shut **OFF** the [2-Step Verification](https://support.google.com/accounts/answer/1064203?hl=en&co=GENIE.Platform%3DDesktop#:~:text=Open%20your%20Google%20Account.,Select%20Turn%20off.) to avoid failure in environment setup (requesting phone verification code)
+
+
+
+
+
+> Attention: we strongly recommend that you register a new blank account instead of using an existing one, in order to avoid messing up your personal workspace.
+
+2. Next, copy and rename the template file `settings.json.template` into `settings.json` under folder `evaluation_examples/settings/google/`. Remember to replace the two fields `email` and `password`:
+ - these two fields are used to simulate real people login to Chrome browser during environment setup for relevant examples in the virtual machine
+
+```
+{
+ "email": "your_google_account@gmail.com",
+ "password": "your_google_account_password"
+}
+```
+
+## Create A Google Cloud Project
+
+1. Navigate to [Google Cloud Project Creation](https://console.cloud.google.com/projectcreate) page and create a new GCP (see [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project) for detailed steps). You can use any project name.
+
+2. Go to the [Google Drive API console](https://console.cloud.google.com/apis/library/drive.googleapis.com?) and enable the GoogleDrive API for the created project (see [Enable and disable APIs](https://support.google.com/googleapi/answer/6158841?hl=en) for detailed steps)
+
+
+
+
+
+
+## Configure OAuth Consent Screen
+To configure the OAuth2.0 screen for the created GCP. Go to page [OAuth consent screen](https://console.cloud.google.com/apis/credentials/consent?):
+
+1. For User Type, choose "External" and click "Create"
+
+
+
+
+
+2. For App information, type in any App name you like (e.g., DataAccess), and choose the current Google gmail into field `User support email`.
+
+
+
+
+
+3. For Developer information, also fill in the current gmail account. Leave other fields blank and click button "SAVE AND CONTINUE".
+
+
+
+
+
+4. Leave fields blank for `Scopes` and continue to `Test Users`. Add the current gmail account via clicking button "+ ADD USERS".
+
+
+
+
+
+5. Finish all configuration and we will come to the configured OAuth consent screen. There is another thing, PUBLISH APP to extend the lifecycle of credentials. Otherwise, the refresh token is only valid in 7 days (refer to [google offical doc](https://developers.google.com/identity/protocols/oauth2#expiration) and [stackoverflow post](https://stackoverflow.com/questions/74659774/google-oauth-2-0-refresh-token-expiry-how-many-days) for details).
+
+
+
+
+
+
+## Create OAuth2.0 Credentials
+
+1. Goto the [credentials page](https://console.cloud.google.com/apis/credentials?), click "CREATE CREDENTIALS -> OAuth client ID"
+
+
+
+
+
+2. For Application type, please choose "Desktop app". You can use any Name. And click "CREATE".
+
+
+
+
+
+3. Now, in the pop-up window, you can download the JSON file `client_secret_xxxxx.json`. Move and rename this .json file to file path `evaluation_examples/settings/googledrive/client_secrets.json` in the OSWorld project. The folder should look like:
+
+```
+- evaluation_examples/
+ - settings/
+ - google/
+ - settings.json
+ - settings.json.template
+ - googledrive/
+ - settings.yml
+ - client_secrets.json
+```
+
+4. Note that, when you first run a task including Google Drive, there will be a url requesting your permission. Open the link in unsafe mode using the gmail you filled in `evaluation_examples/settings/google/settings.json`, authorize and confirm your choice once for all. Eventually, you will see a prompt message "The authentication flow has completed." in a blank web page.
+
+
+
+
+
+
+## Potential Issues
+
+Due to strict check by Google safety teams, even if we shut down the 2-step verification, Google still detects potential risks of your account, especially __when you frequently change the login device__. You may encounter the following issues:
+
+### Phone Verification Code Required
+
+When the VM tries to log into the Google Drive page, Google requests you to provide a phone number and verfification number. This may occur when you change your IP or device for the first time.
+
+
+
+
+
+To solve it, typing any phone number is adequate (since we shut off the 2-step verification and do not provide any recovery phone number). And fill in the received verification code. After that, hopefully, Google will remember this new login IP or device. Now, you can restart the task, and in this time, it should work.
+
+### Identity Verification
+
+
+
+
+
+In this case, Google does not give you the chance to use phone verification code. Since we do not provide any recovery email/phone and shut down the 2-step verification, we are unable to login from the new device. We hypothesize that this problem may occur when you frequently change the login IPs or devices, such that Google detects the unusual usages. The only solution is to reset the password from the device in which you register this Google account.
+
+> Sadly, we do not have a permanent solution. The only suggestion is not to frequently change your login IP or device. If you encounter any problem above, Google may urge you to change the password. Also remember to update the password in `evaluation_examples/settings/google/settings.json`.
\ No newline at end of file
diff --git a/README.md b/README.md
index 0a92158..ff3bbc7 100644
--- a/README.md
+++ b/README.md
@@ -140,6 +140,10 @@ Afterward, you can execute a command similar to the one in the previous section
The username and password for the virtual machines are as follows:
- **Ubuntu:** `user` / `password`
+### How to setup the account and credentials for Google and Google Drive?
+
+See [Account Guideline](ACCOUNT_GUIDELINE.md)
+
### How can I configure a proxy for the VM if I'm behind a GFW?
See [Proxy Guideline](PROXY_GUIDELINE.md).
diff --git a/assets/authorization.png b/assets/authorization.png
new file mode 100644
index 0000000..4e3b4e4
Binary files /dev/null and b/assets/authorization.png differ
diff --git a/assets/creategcp.png b/assets/creategcp.png
new file mode 100644
index 0000000..1186982
Binary files /dev/null and b/assets/creategcp.png differ
diff --git a/assets/desktopapp.png b/assets/desktopapp.png
new file mode 100644
index 0000000..e31d9b0
Binary files /dev/null and b/assets/desktopapp.png differ
diff --git a/assets/developer.png b/assets/developer.png
new file mode 100644
index 0000000..992bebb
Binary files /dev/null and b/assets/developer.png differ
diff --git a/assets/enableapi.png b/assets/enableapi.png
new file mode 100644
index 0000000..53460ab
Binary files /dev/null and b/assets/enableapi.png differ
diff --git a/assets/googleidentity.png b/assets/googleidentity.png
new file mode 100644
index 0000000..7904788
Binary files /dev/null and b/assets/googleidentity.png differ
diff --git a/assets/googlephonecode.png b/assets/googlephonecode.png
new file mode 100644
index 0000000..5d44ade
Binary files /dev/null and b/assets/googlephonecode.png differ
diff --git a/assets/googleshutoff.png b/assets/googleshutoff.png
new file mode 100644
index 0000000..9bc360a
Binary files /dev/null and b/assets/googleshutoff.png differ
diff --git a/assets/oauth2.0.png b/assets/oauth2.0.png
new file mode 100644
index 0000000..cf11bd6
Binary files /dev/null and b/assets/oauth2.0.png differ
diff --git a/assets/oauthapp.png b/assets/oauthapp.png
new file mode 100644
index 0000000..e862a6f
Binary files /dev/null and b/assets/oauthapp.png differ
diff --git a/assets/publishapp.png b/assets/publishapp.png
new file mode 100644
index 0000000..77ed112
Binary files /dev/null and b/assets/publishapp.png differ
diff --git a/assets/testusers.png b/assets/testusers.png
new file mode 100644
index 0000000..5e13a8a
Binary files /dev/null and b/assets/testusers.png differ
diff --git a/assets/unsafemode.png b/assets/unsafemode.png
new file mode 100644
index 0000000..ff1ba03
Binary files /dev/null and b/assets/unsafemode.png differ
diff --git a/assets/usertype.png b/assets/usertype.png
new file mode 100644
index 0000000..a3923fd
Binary files /dev/null and b/assets/usertype.png differ
diff --git a/desktop_env/envs/__init__.py b/desktop_env/envs/__init__.py
index 133812e..7f5baf6 100644
--- a/desktop_env/envs/__init__.py
+++ b/desktop_env/envs/__init__.py
@@ -7,28 +7,25 @@ import threading
import uuid
import zipfile
from time import sleep
-import logging
-
-logger = logging.getLogger("desktopenv.envinit")
-
+import shutil
import psutil
import requests
from tqdm import tqdm
-__version__ = "0.1.6"
+__version__ = "0.1.12"
MAX_RETRY_TIMES = 10
UBUNTU_ARM_URL = "https://huggingface.co/datasets/xlangai/ubuntu_arm/resolve/main/Ubuntu.zip"
UBUNTU_X86_URL = "https://huggingface.co/datasets/xlangai/ubuntu_x86/resolve/main/Ubuntu.zip"
+DOWNLOADED_FILE_NAME = "Ubuntu.zip"
REGISTRY_PATH = '.vms'
-REGISTRY_IDX_PATH = ".vms_idx"
+VMS_DIR = "./vm_data"
update_lock = threading.Lock()
class VirtualMachineManager:
- def __init__(self, registry_path=REGISTRY_PATH, registry_idx_path=REGISTRY_IDX_PATH):
+ def __init__(self, registry_path=REGISTRY_PATH):
self.registry_path = registry_path
- self.registry_idx_path = registry_idx_path
self.lock = threading.Lock()
self.initialize_registry()
@@ -37,9 +34,6 @@ class VirtualMachineManager:
if not os.path.exists(self.registry_path):
with open(self.registry_path, 'w') as file:
file.write('')
- if not os.path.exists(self.registry_idx_path):
- with open(self.registry_idx_path, 'w') as file:
- file.write('0')
def add_vm(self, vm_path):
with self.lock:
@@ -77,14 +71,18 @@ class VirtualMachineManager:
with open(self.registry_path, 'w') as file:
file.writelines(new_lines)
- def check_and_clean(self):
- with self.lock: # Lock when cleaning up the registry
+ def check_and_clean(self, vms_dir):
+ with self.lock: # Lock when cleaning up the registry and vms_dir
+
+ # Check and clean on the running vms, detect the released ones and mark then as 'free'
active_pids = {p.pid for p in psutil.process_iter()}
new_lines = []
+ vm_paths = []
with open(self.registry_path, 'r') as file:
lines = file.readlines()
for line in lines:
vm_path, pid_str = line.strip().split('|')
+ vm_paths.append(vm_path)
if pid_str == "free":
new_lines.append(line)
continue
@@ -96,6 +94,24 @@ class VirtualMachineManager:
with open(self.registry_path, 'w') as file:
file.writelines(new_lines)
+ # Check and clean on the files inside vms_dir, delete the unregistered ones
+ os.makedirs(vms_dir, exist_ok=True)
+ vm_names = os.listdir(vms_dir)
+ for vm_name in vm_names:
+ # skip the downloaded .zip file
+ if vm_name == DOWNLOADED_FILE_NAME:
+ continue
+ # Skip the .DS_Store file on macOS
+ if vm_name == ".DS_Store":
+ continue
+
+ flag = True
+ for vm_path in vm_paths:
+ if vm_name + ".vmx" in vm_path:
+ flag = False
+ if flag:
+ shutil.rmtree(os.path.join(vms_dir, vm_name))
+
def list_vms(self):
with self.lock: # Lock when reading the registry
all_vms = []
@@ -117,17 +133,15 @@ class VirtualMachineManager:
free_vms.append((vm_path, pid_str))
return free_vms
- def generate_new_vm_name(self):
- with self.lock: # Lock when generating a new path
- with open(self.registry_idx_path, 'r') as file:
- idx = int(file.read())
-
- new_name = f"Ubuntu{idx}"
-
- with open(self.registry_idx_path, 'w') as file:
- file.write(str(idx + 1))
-
- return new_name
+ def generate_new_vm_name(self, vms_dir):
+ registry_idx = 0
+ while True:
+ attempted_new_name = f"Ubuntu{registry_idx}"
+ if os.path.exists(
+ os.path.join(vms_dir, attempted_new_name, attempted_new_name, attempted_new_name + ".vmx")):
+ registry_idx += 1
+ else:
+ return attempted_new_name
def _update_vm(vmx_path, target_vm_name):
@@ -188,17 +202,16 @@ def _update_vm(vmx_path, target_vm_name):
print("VM files renamed successfully.")
-def _install_virtual_machine(vm_name, working_dir="./vm_data", downloaded_file_name="Ubuntu.zip", original_vm_name="Ubuntu"):
- os.makedirs(working_dir, exist_ok=True)
+def _install_virtual_machine(vm_name, vms_dir, downloaded_file_name, original_vm_name="Ubuntu"):
+ os.makedirs(vms_dir, exist_ok=True)
+
def __download_and_unzip_vm():
# Determine the platform and CPU architecture to decide the correct VM image to download
if platform.system() == 'Darwin': # macOS
- #if os.uname().machine == 'arm64': # Apple Silicon
- #url = UBUNTU_ARM_URL
- #else:
- #url = UBUNTU_X86_URL
- url = UBUNTU_ARM_URL # a workout as most new macs are using apple silicon and they are frequently misrecognized as x86_64
- logger.warning("Your platform is temporarily considered to be ARM. If this is a mistake, please manually replace the downloaded VM under ./vm_data with the VM of x86 version from %s", UBUNTU_X86_URL)
+ # if os.uname().machine == 'arm64': # Apple Silicon
+ url = UBUNTU_ARM_URL
+ # else:
+ # url = UBUNTU_X86_URL
elif platform.machine().lower() in ['amd64', 'x86_64']:
url = UBUNTU_X86_URL
else:
@@ -209,7 +222,7 @@ def _install_virtual_machine(vm_name, working_dir="./vm_data", downloaded_file_n
downloaded_size = 0
while True:
- downloaded_file_path = os.path.join(working_dir, downloaded_file_name)
+ downloaded_file_path = os.path.join(vms_dir, downloaded_file_name)
headers = {}
if os.path.exists(downloaded_file_path):
downloaded_size = os.path.getsize(downloaded_file_path)
@@ -248,15 +261,15 @@ def _install_virtual_machine(vm_name, working_dir="./vm_data", downloaded_file_n
# Unzip the downloaded file
print("Unzipping the downloaded file...☕️")
with zipfile.ZipFile(downloaded_file_path, 'r') as zip_ref:
- zip_ref.extractall(os.path.join(working_dir, vm_name))
- print("Files have been successfully extracted to the directory:", os.path.join(working_dir, vm_name))
+ zip_ref.extractall(os.path.join(vms_dir, vm_name))
+ print("Files have been successfully extracted to the directory:", os.path.join(vms_dir, vm_name))
- vm_path = os.path.join(working_dir, vm_name, vm_name, vm_name + ".vmx")
+ vm_path = os.path.join(vms_dir, vm_name, vm_name, vm_name + ".vmx")
# Execute the function to download and unzip the VM, and update the vm metadata
if not os.path.exists(vm_path):
__download_and_unzip_vm()
- _update_vm(os.path.join(working_dir, vm_name, original_vm_name, original_vm_name + ".vmx"), vm_name)
+ _update_vm(os.path.join(vms_dir, vm_name, original_vm_name, original_vm_name + ".vmx"), vm_name)
else:
print(f"Virtual machine exists: {vm_path}")
@@ -270,31 +283,45 @@ def _install_virtual_machine(vm_name, working_dir="./vm_data", downloaded_file_n
raise Exception("Unsupported operating system")
# Start the virtual machine
- subprocess.run(f'vmrun {get_vmrun_type()} start "{vm_path}" nogui', shell=True)
- print("Starting virtual machine...")
+ def start_vm(vm_path, max_retries=20):
+ command = f'vmrun {get_vmrun_type()} start "{vm_path}" nogui'
+ for attempt in range(max_retries):
+ result = subprocess.run(command, shell=True, text=True, capture_output=True)
+ if result.returncode == 0:
+ print("Virtual machine started.")
+ return True
+ else:
+ if "Error" in result.stderr:
+ print(f"Attempt {attempt + 1} failed with specific error: {result.stderr}")
+ else:
+ print(f"Attempt {attempt + 1} failed: {result.stderr}")
- # Get the IP address of the virtual machine
- for i in range(MAX_RETRY_TIMES):
- get_vm_ip = subprocess.run(f'vmrun {get_vmrun_type()} getGuestIPAddress "{vm_path}" -wait', shell=True,
- capture_output=True,
- text=True)
- if "Error" in get_vm_ip.stdout:
- print("Retry on getting IP")
- continue
- print("Virtual machine IP address:", get_vm_ip.stdout.strip())
- break
+ if attempt == max_retries - 1:
+ print("Maximum retry attempts reached, failed to start the virtual machine.")
+ return False
- vm_ip = get_vm_ip.stdout.strip()
+ if not start_vm(vm_path):
+ raise ValueError("Error encountered during installation, please rerun the code for retrying.")
- def is_url_accessible(url, timeout=1):
- try:
- response = requests.head(url, timeout=timeout)
- return response.status_code == 200
- except requests.exceptions.RequestException:
- return False
+ def get_vm_ip(vm_path, max_retries=20):
+ command = f'vmrun {get_vmrun_type()} getGuestIPAddress "{vm_path}" -wait'
+ for attempt in range(max_retries):
+ result = subprocess.run(command, shell=True, text=True, capture_output=True)
+ if result.returncode == 0:
+ return result.stdout.strip()
+ else:
+ if "Error" in result.stderr:
+ print(f"Attempt {attempt + 1} failed with specific error: {result.stderr}")
+ else:
+ print(f"Attempt {attempt + 1} failed: {result.stderr}")
- url = f"http://{vm_ip}:5000/screenshot"
- check_url = is_url_accessible(url)
+ if attempt == max_retries - 1:
+ print("Maximum retry attempts reached, failed to get the IP of virtual machine.")
+ return None
+
+ vm_ip = get_vm_ip(vm_path)
+ if not vm_ip:
+ raise ValueError("Error encountered during installation, please rerun the code for retrying.")
# Function used to check whether the virtual machine is ready
def download_screenshot(ip):
@@ -317,22 +344,39 @@ def _install_virtual_machine(vm_name, working_dir="./vm_data", downloaded_file_n
print("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...")
- # Create a snapshot of the virtual machine
- subprocess.run(f'vmrun {get_vmrun_type()} snapshot "{vm_path}" "init_state"', shell=True)
- print("Snapshot created.")
+ def create_vm_snapshot(vm_path, max_retries=20):
+ command = f'vmrun {get_vmrun_type()} snapshot "{vm_path}" "init_state"'
+ for attempt in range(max_retries):
+ result = subprocess.run(command, shell=True, text=True, capture_output=True)
+ if result.returncode == 0:
+ print("Snapshot created.")
+ return True
+ else:
+ if "Error" in result.stderr:
+ print(f"Attempt {attempt + 1} failed with specific error: {result.stderr}")
+ else:
+ print(f"Attempt {attempt + 1} failed: {result.stderr}")
- return vm_path
+ if attempt == max_retries - 1:
+ print("Maximum retry attempts reached, failed to create snapshot.")
+ return False
+
+ # Create a snapshot of the virtual machine
+ if create_vm_snapshot(vm_path, max_retries=MAX_RETRY_TIMES):
+ return vm_path
+ else:
+ raise ValueError("Error encountered during installation, please rerun the code for retrying.")
def _get_vm_path():
vm_manager = VirtualMachineManager(REGISTRY_PATH)
- vm_manager.check_and_clean()
+ vm_manager.check_and_clean(vms_dir=VMS_DIR)
free_vms_paths = vm_manager.list_free_vms()
if len(free_vms_paths) == 0:
# No free virtual machine available, generate a new one
print("No free virtual machine available. Generating a new one, which would take a while...☕")
- new_vm_name = vm_manager.generate_new_vm_name()
- new_vm_path = _install_virtual_machine(new_vm_name)
+ new_vm_name = vm_manager.generate_new_vm_name(vms_dir=VMS_DIR)
+ new_vm_path = _install_virtual_machine(new_vm_name, vms_dir=VMS_DIR, downloaded_file_name=DOWNLOADED_FILE_NAME)
vm_manager.add_vm(new_vm_path)
vm_manager.occupy_vm(new_vm_path, os.getpid())
return new_vm_path
diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py
index 125679a..2f84efe 100644
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -217,9 +217,6 @@ class DesktopEnv(gym.Env):
logger.info("Resetting environment...")
logger.info("Switching task...")
- if task_config is not None:
- self._set_task_info(task_config)
- self.setup_controller.reset_cache_dir(self.cache_dir)
logger.info("Setting counters...")
self._traj_no += 1
@@ -234,11 +231,13 @@ class DesktopEnv(gym.Env):
self._start_emulator()
logger.info("Emulator started.")
- logger.info("Setting up environment...")
- self.setup_controller.setup(self.config)
-
- time.sleep(5)
- logger.info("Environment setup complete.")
+ if task_config is not None:
+ self._set_task_info(task_config)
+ self.setup_controller.reset_cache_dir(self.cache_dir)
+ logger.info("Setting up environment...")
+ self.setup_controller.setup(self.config)
+ time.sleep(5)
+ logger.info("Environment setup complete.")
observation = self._get_obs()
return observation
diff --git a/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json b/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
index e0cbe4f..88ff97c 100644
--- a/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
+++ b/evaluation_examples/examples/multi_apps/2b9493d7-49b8-493a-a71b-56cd1f4d6908.json
@@ -32,13 +32,10 @@
}
},
{
- "type": "launch",
+ "type": "execute",
"parameters": {
- "command": [
- "gnome-terminal",
- "--maximize",
- "--working-directory=/home/user/"
- ]
+ "command": "export DBUS_SESSION_BUS_ADDRESS='unix:path=/run/user/1000/bus'\ngnome-terminal --maximize --working-directory=/home/user/",
+ "shell": true
}
}
],
diff --git a/evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json b/evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json
index e3eac08..dd47d85 100644
--- a/evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json
+++ b/evaluation_examples/examples/multi_apps/2c9fc0de-3ee7-45e1-a5df-c86206ad78b5.json
@@ -36,13 +36,10 @@
}
},
{
- "type": "launch",
+ "type": "execute",
"parameters": {
- "command": [
- "gnome-terminal",
- "--maximize",
- "--working-directory=/home/user/projects/binder"
- ]
+ "command": "export DBUS_SESSION_BUS_ADDRESS='unix:path=/run/user/1000/bus'\ngnome-terminal --maximize --working-directory=/home/user/projects/binder",
+ "shell": true
}
}
],
diff --git a/evaluation_examples/examples/multi_apps/3680a5ee-6870-426a-a997-eba929a0d25c.json b/evaluation_examples/examples/multi_apps/3680a5ee-6870-426a-a997-eba929a0d25c.json
index 2c3df48..c24a1f0 100644
--- a/evaluation_examples/examples/multi_apps/3680a5ee-6870-426a-a997-eba929a0d25c.json
+++ b/evaluation_examples/examples/multi_apps/3680a5ee-6870-426a-a997-eba929a0d25c.json
@@ -20,13 +20,10 @@
}
},
{
- "type": "launch",
+ "type": "execute",
"parameters": {
- "command": [
- "gnome-terminal",
- "--maximize",
- "--working-directory=/home/user/Desktop"
- ]
+ "command": "export DBUS_SESSION_BUS_ADDRESS='unix:path=/run/user/1000/bus'\ngnome-terminal --maximize --working-directory=/home/user/Desktop",
+ "shell": true
}
}
],
diff --git a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
index 2cc3cdb..83539b7 100644
--- a/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
+++ b/evaluation_examples/examples/multi_apps/47f7c0ce-a5fb-4100-a5e6-65cd0e7429e5.json
@@ -13,7 +13,7 @@
"path": "/home/user/Desktop/landscape.mp4"
},
{
- "url": "https://drive.usercontent.google.com/download?id=1v7J3uaxjM5wSz8xidcV0p7tMfkJCHw3_&export=download&authuser=0&confirm=t",
+ "url": "https://drive.usercontent.google.com/download?id=1prBlaNd3mTEj-pmWcX1sEHBHDz4lX3eh&export=download&authuser=0&confirm=t",
"path": "/home/user/Desktop/Robotic_Workshop_Infographics.pptx"
}
]
diff --git a/evaluation_examples/examples/multi_apps/510f64c8-9bcc-4be1-8d30-638705850618.json b/evaluation_examples/examples/multi_apps/510f64c8-9bcc-4be1-8d30-638705850618.json
index cb4a198..3fd5585 100644
--- a/evaluation_examples/examples/multi_apps/510f64c8-9bcc-4be1-8d30-638705850618.json
+++ b/evaluation_examples/examples/multi_apps/510f64c8-9bcc-4be1-8d30-638705850618.json
@@ -45,13 +45,10 @@
}
},
{
- "type": "launch",
+ "type": "execute",
"parameters": {
- "command": [
- "gnome-terminal",
- "--maximize",
- "--working-directory=/home/user/"
- ]
+ "command": "export DBUS_SESSION_BUS_ADDRESS='unix:path=/run/user/1000/bus'\ngnome-terminal --maximize --working-directory=/home/user",
+ "shell": true
}
}
],
diff --git a/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json
index a18657a..7090421 100644
--- a/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json
+++ b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json
@@ -9,7 +9,7 @@
"parameters": {
"files": [
{
- "url": "https://drive.usercontent.google.com/download?id=1I-ArULOnZzlGkS9UyIuE8Dyuyus27iZt&export=download&authuser=0&confirm=t",
+ "url": "https://drive.usercontent.google.com/download?id=1FrBMxnyzzc7xMsgLc1IDOsvEYwZ_ignM&export=download&authuser=0&confirm=t",
"path": "/home/user/Desktop/Minimalist_Business_Slides.pptx"
},
{
diff --git a/evaluation_examples/examples/multi_apps/ee9a3c83-f437-4879-8918-be5efbb9fac7.json b/evaluation_examples/examples/multi_apps/ee9a3c83-f437-4879-8918-be5efbb9fac7.json
index 55c7a6c..b1961ed 100644
--- a/evaluation_examples/examples/multi_apps/ee9a3c83-f437-4879-8918-be5efbb9fac7.json
+++ b/evaluation_examples/examples/multi_apps/ee9a3c83-f437-4879-8918-be5efbb9fac7.json
@@ -32,13 +32,10 @@
}
},
{
- "type": "launch",
+ "type": "execute",
"parameters": {
- "command": [
- "gnome-terminal",
- "--maximize",
- "--working-directory=/home/user/Desktop"
- ]
+ "command": "export DBUS_SESSION_BUS_ADDRESS='unix:path=/run/user/1000/bus'\ngnome-terminal --maximize --working-directory=/home/user/Desktop",
+ "shell": true
}
}
],
diff --git a/evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json b/evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json
index 77114b9..84a1c7b 100644
--- a/evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json
+++ b/evaluation_examples/examples/multi_apps/f7dfbef3-7697-431c-883a-db8583a4e4f9.json
@@ -36,13 +36,10 @@
}
},
{
- "type": "launch",
+ "type": "execute",
"parameters": {
- "command": [
- "gnome-terminal",
- "--maximize",
- "--working-directory=/home/user/Desktop"
- ]
+ "command": "export DBUS_SESSION_BUS_ADDRESS='unix:path=/run/user/1000/bus'\ngnome-terminal --maximize --working-directory=/home/user/Desktop",
+ "shell": true
}
}
],
diff --git a/evaluation_examples/settings/google/settings.json b/evaluation_examples/settings/google/settings.json
deleted file mode 100644
index cc20c51..0000000
--- a/evaluation_examples/settings/google/settings.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
- "email": "xlang2024anonym@gmail.com",
- "password": "Evt5LLj!VJ6Y!C$B"
-}
diff --git a/evaluation_examples/settings/google/settings.json.template b/evaluation_examples/settings/google/settings.json.template
new file mode 100644
index 0000000..1892088
--- /dev/null
+++ b/evaluation_examples/settings/google/settings.json.template
@@ -0,0 +1,4 @@
+{
+ "email": "your_google_account@gmail.com",
+ "password": "your_google_account_password"
+}
diff --git a/evaluation_examples/settings/googledrive/client_secrets.json b/evaluation_examples/settings/googledrive/client_secrets.json
deleted file mode 100644
index fc4185a..0000000
--- a/evaluation_examples/settings/googledrive/client_secrets.json
+++ /dev/null
@@ -1 +0,0 @@
-{"installed":{"client_id":"786888752612-rgng5v9hcq4as7pn0b40gt9r5lekmht9.apps.googleusercontent.com","project_id":"xlang-2024-benchmarking","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}
\ No newline at end of file
diff --git a/mm_agents/README.md b/mm_agents/README.md
index be4c476..e096bf2 100644
--- a/mm_agents/README.md
+++ b/mm_agents/README.md
@@ -15,6 +15,7 @@ And those from the open-source community:
- `Mixtral 8x7B`
- `QWEN`, `QWEN-VL`
- `CogAgent`
+- `Llama3`
- ...
In the future, we will integrate and support more foundational models to enhance digital agents, so stay tuned.
diff --git a/mm_agents/agent.py b/mm_agents/agent.py
index da28ea8..893d45f 100644
--- a/mm_agents/agent.py
+++ b/mm_agents/agent.py
@@ -14,6 +14,8 @@ import backoff
import dashscope
import google.generativeai as genai
import openai
+from groq import Groq
+
import requests
import tiktoken
from PIL import Image
@@ -27,6 +29,8 @@ from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_S
logger = logging.getLogger("desktopenv.agent")
+pure_text_settings = ['a11y_tree']
+
# Function to encode the image
def encode_image(image_content):
@@ -131,7 +135,7 @@ def parse_actions_from_string(input_string):
def parse_code_from_string(input_string):
- input_string = input_string.replace(";", "\n")
+ input_string = "\n".join([line.strip() for line in input_string.split(';') if line.strip()])
if input_string.strip() in ['WAIT', 'DONE', 'FAIL']:
return [input_string.strip()]
@@ -510,7 +514,7 @@ class PromptAgent:
return response, actions
@backoff.on_exception(
- backoff.expo,
+ backoff.constant,
# here you should add more model exceptions as you want,
# but you are forbidden to add "Exception", that is, a common type of exception
# because we want to catch this kind of Exception in the outside to ensure each example won't exceed the time limit
@@ -525,8 +529,12 @@ class PromptAgent:
ResourceExhausted,
InternalServerError,
BadRequest,
+
+ # Groq exceptions
+ # todo: check
),
- max_tries=5
+ interval=30,
+ max_tries=10
)
def call_llm(self, payload):
@@ -632,6 +640,8 @@ class PromptAgent:
top_p = payload["top_p"]
temperature = payload["temperature"]
+ assert self.observation_type in pure_text_settings, f"The model {self.model} can only support text-based input, please consider change based model or settings"
+
mistral_messages = []
for i, message in enumerate(messages):
@@ -650,12 +660,13 @@ class PromptAgent:
client = OpenAI(api_key=os.environ["TOGETHER_API_KEY"],
base_url='https://api.together.xyz',
)
- logger.info("Generating content with Mistral model: %s", self.model)
flag = 0
while True:
try:
- if flag > 20: break
+ if flag > 20:
+ break
+ logger.info("Generating content with model: %s", self.model)
response = client.chat.completions.create(
messages=mistral_messages,
model=self.model,
@@ -733,6 +744,9 @@ class PromptAgent:
top_p = payload["top_p"]
temperature = payload["temperature"]
+ if self.model == "gemini-pro":
+ assert self.observation_type in pure_text_settings, f"The model {self.model} can only support text-based input, please consider change based model or settings"
+
gemini_messages = []
for i, message in enumerate(messages):
role_mapping = {
@@ -782,7 +796,7 @@ class PromptAgent:
gemini_messages,
generation_config={
"candidate_count": 1,
- "max_output_tokens": max_tokens,
+ # "max_output_tokens": max_tokens,
"top_p": top_p,
"temperature": temperature
},
@@ -796,7 +810,6 @@ class PromptAgent:
)
return response.text
-
elif self.model == "gemini-1.5-pro-latest":
messages = payload["messages"]
max_tokens = payload["max_tokens"]
@@ -858,7 +871,7 @@ class PromptAgent:
gemini_messages,
generation_config={
"candidate_count": 1,
- "max_output_tokens": max_tokens,
+ # "max_output_tokens": max_tokens,
"top_p": top_p,
"temperature": temperature
},
@@ -873,6 +886,59 @@ class PromptAgent:
return response.text
+ elif self.model == "llama3-70b":
+ messages = payload["messages"]
+ max_tokens = payload["max_tokens"]
+ top_p = payload["top_p"]
+ temperature = payload["temperature"]
+
+ assert self.observation_type in pure_text_settings, f"The model {self.model} can only support text-based input, please consider change based model or settings"
+
+ groq_messages = []
+
+ for i, message in enumerate(messages):
+ groq_message = {
+ "role": message["role"],
+ "content": ""
+ }
+
+ for part in message["content"]:
+ groq_message['content'] = part['text'] if part['type'] == "text" else ""
+
+ groq_messages.append(groq_message)
+
+ # The implementation based on Groq API
+ client = Groq(
+ api_key=os.environ.get("GROQ_API_KEY"),
+ )
+
+ flag = 0
+ while True:
+ try:
+ if flag > 20:
+ break
+ logger.info("Generating content with model: %s", self.model)
+ response = client.chat.completions.create(
+ messages=groq_messages,
+ model="llama3-70b-8192",
+ max_tokens=max_tokens,
+ top_p=top_p,
+ temperature=temperature
+ )
+ break
+ except:
+ if flag == 0:
+ groq_messages = [groq_messages[0]] + groq_messages[-1:]
+ else:
+ groq_messages[-1]["content"] = ' '.join(groq_messages[-1]["content"].split()[:-500])
+ flag = flag + 1
+
+ try:
+ return response.choices[0].message.content
+ except Exception as e:
+ print("Failed to call LLM: " + str(e))
+ return ""
+
elif self.model.startswith("qwen"):
messages = payload["messages"]
max_tokens = payload["max_tokens"]
diff --git a/requirements.txt b/requirements.txt
index 0229b7d..2cc96ab 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -52,3 +52,4 @@ wandb
wrapt_timeout_decorator
gdown
tiktoken
+groq