diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py index f900c9c..d422003 100644 --- a/desktop_env/controllers/python.py +++ b/desktop_env/controllers/python.py @@ -20,6 +20,18 @@ class PythonController: print("Failed to get screenshot. Status code:", response.status_code) return None + def get_file(self, file_path: str): + """ + Gets a file from the server. + """ + response = requests.post(self.http_server + "/file", data={"file_path": file_path}) + if response.status_code == 200: + print("File downloaded successfully") + return response.content + else: + print("Failed to get file. Status code:", response.status_code) + return None + def execute_python_command(self, command: str) -> None: """ Executes a python command on the server. diff --git a/desktop_env/envs/desktop_env.py b/desktop_env/envs/desktop_env.py index 942c7c7..183411d 100644 --- a/desktop_env/envs/desktop_env.py +++ b/desktop_env/envs/desktop_env.py @@ -8,10 +8,11 @@ import platform from typing import List import gymnasium as gym - +import requests from desktop_env.controllers.python import PythonController from desktop_env.controllers.setup import SetupController +from desktop_env.evaluators import eval_funcs def _execute_command(command: List[str]) -> None: @@ -32,7 +33,9 @@ class DesktopEnv(gym.Env): self, path_to_vm: str, snapshot_path: str = "base", + instruction: str = None, config: dict = None, + evaluator: dict = None, action_space: str = "computer_13", ): # Initialize environment variables @@ -45,7 +48,9 @@ class DesktopEnv(gym.Env): self.host = f"http://{self._get_vm_ip()}:5000" self.controller = PythonController(http_server=self.host) self.setup_controller = SetupController(http_server=self.host) + self.instruction = instruction self.config = config + self.evaluator = evaluator # mode: human or machine assert action_space in ["computer_13", "pyautogui"] @@ -113,6 +118,9 @@ class DesktopEnv(gym.Env): print("Setting up environment...") self.setup_controller.setup(self.config) + time.sleep(5) + print("Environment setup complete.") + observation = self._get_obs() return observation @@ -127,12 +135,52 @@ class DesktopEnv(gym.Env): # todo: maybe for the better here we need to add a logic to wait until the rendering is done time.sleep(pause) - observation = self._get_obs() + observation = { + "screenshot": self._get_obs(), + "instruction": self.instruction + } reward = 0 # todo: Define reward calculation for each example done = False # todo: Define episode termination condition for each example info = {} return observation, reward, done, info + def evaluate(self): + """ + Evaluate whether the task is successfully completed. + """ + def copy_file_to_local(_file_info): + random_uuid = str(uuid.uuid4()) + os.makedirs(os.path.join("tmp", random_uuid), exist_ok=True) + _path = os.path.join("tmp", random_uuid, "tmp.xlsx") + if _file_info["type"] == "cloud_file": + url = _file_info["path"] + response = requests.get(url, stream=True) + response.raise_for_status() + + with open(_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + elif _file_info["type"] == "vm_file": + # fixme: stream this part maybe as well + file = self.controller.get_file(_file_info["path"]) + with open(_path, "wb") as f: + f.write(file) + else: + raise NotImplementedError + + return _path + + # todo: make this more flexible by refactoring + eval_func = eval_funcs[self.evaluator["func"]] + eval_func_vars = {} + + for var_name, file_info in self.evaluator["paths"].items(): + path = copy_file_to_local(file_info) + eval_func_vars[var_name] = path + + return eval_func(**eval_func_vars) + def render(self, mode='rgb_array'): if mode == 'rgb_array': return self._get_obs() diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index 15a5493..8bc1323 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -74,6 +74,22 @@ def capture_screen_with_cursor(): return send_file(file_path, mimetype='image/png') +@app.route('/file', methods=['POST']) +def get_file(): + # Retrieve filename from the POST request + if 'file_path' in request.form: + file_path = request.form['file_path'] + else: + return jsonify({"error": "file_path is required"}), 400 + + try: + # Check if the file exists and send it to the user + return send_file(file_path, as_attachment=True) + except FileNotFoundError: + # If the file is not found, return a 404 error + return jsonify({"error": "File not found"}), 404 + + @app.route('/platform', methods=['GET']) def get_platform(): return platform.system() diff --git a/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json b/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json index b1f9455..e25e698 100644 --- a/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json +++ b/evaluation_examples/examples/37608790-6147-45d0-9f20-1137bb35703d.json @@ -18,5 +18,17 @@ "related_apps": [ "libreoffice calc" ], - "evaluator": "evaluation_dir" + "evaluator": { + "func": "compare_table(expected, actual)", + "paths": { + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1dxpiUqP_CVvQp5tddxlwO3Cp1BqJ-ZDE&export=download&authuser=0&confirm=t&uuid=ccd204c7-07ce-4fdf-a5d4-a7e4f37b9ce6&at=APZUnTVBs7TgrVrDXpkiU8S7WbQo:1702360836747" + }, + "actual": { + "type": "vm_file", + "path": "C:\\Users\\tianbaox\\Desktop\\Employee_Roles_and_Ranks.xlsx" + } + } + } } diff --git a/evaluation_examples/examples/d681960f-7bc3-4286-9913-a8812ba3261a.json b/evaluation_examples/examples/d681960f-7bc3-4286-9913-a8812ba3261a.json index 52316e6..509fd77 100644 --- a/evaluation_examples/examples/d681960f-7bc3-4286-9913-a8812ba3261a.json +++ b/evaluation_examples/examples/d681960f-7bc3-4286-9913-a8812ba3261a.json @@ -18,5 +18,17 @@ "related_apps": [ "libreoffice calc" ], - "evaluator": "evaluation_dir" + "evaluator": { + "func": "compare_table(expected, actual)", + "paths": { + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=1kfEHJH1n0yCsQp443IIFvdD9uWv0DWMr&export=download&authuser=0&confirm=t&uuid=d9907f65-8d39-4ecc-8747-b4ed7e6011f5&at=APZUnTXpPAnlh5sD6q-R8oQtqL6g:1702362952170" + }, + "actual": { + "type": "vm_file", + "path": "C:\\Users\\tianbaox\\Desktop\\Student_Grades_and_Remarks.xlsx" + } + } + } } diff --git a/evaluation_examples/examples/f9584479-3d0d-4c79-affa-9ad7afdd8850.json b/evaluation_examples/examples/f9584479-3d0d-4c79-affa-9ad7afdd8850.json index 1bb0ff6..5a01d56 100644 --- a/evaluation_examples/examples/f9584479-3d0d-4c79-affa-9ad7afdd8850.json +++ b/evaluation_examples/examples/f9584479-3d0d-4c79-affa-9ad7afdd8850.json @@ -18,5 +18,17 @@ "related_apps": [ "libreoffice calc" ], - "evaluator": "evaluation_dir" + "evaluator": { + "func": "compare_table(expected, actual)", + "paths": { + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=17f1wZuJPvUEc5at_Fy3c18VFdOk0x7xz&export=download&authuser=0&confirm=t&uuid=6d2edffd-0ce0-426e-9820-8af25b4667f3&at=APZUnTVh7JS85dwZBaV2hytWQgDK:1702361510956" + }, + "actual": { + "type": "vm_file", + "path": "C:\\Users\\tianbaox\\Desktop\\Quarterly_Product_Sales_by_Zone.xlsx" + } + } + } } diff --git a/main.py b/main.py index 3c8acce..d7c3073 100644 --- a/main.py +++ b/main.py @@ -15,8 +15,10 @@ def human_agent(): path_to_vm=r"""C:\Users\tianbaox\Documents\Virtual Machines\Win10\Win10.vmx""", # path_to_vm="/home/yuri/vmware/Ubuntu 64-bit/Ubuntu 64-bit.vmx", action_space="computer_13", - snapshot_path="base_setup2", - # config=example["config"], + snapshot_path="base_setup3", + instruction=example["instruction"], + config=example["config"], + evaluator=example["evaluator"] ) # reset the environment to certain snapshot @@ -31,43 +33,7 @@ def human_agent(): "y": 1057 } }, - {"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}}, - { - "action_type": "MOVE_TO", - "parameters": { - "x": 754, - "y": 1057 - } - }, - { - "action_type": "MOVE_TO", - "parameters": { - "x": 754, - "y": 1057 - } - }, - { - "action_type": "MOVE_TO", - "parameters": { - "x": 754, - "y": 1057 - } - }, - { - "action_type": "MOVE_TO", - "parameters": { - "x": 754, - "y": 1057 - } - }, - { - "action_type": "MOVE_TO", - "parameters": { - "x": 754, - "y": 1057 - } - } - + {"action_type": "CLICK", "parameters": {"button": "right", "num_clicks": 1}} ] for i in range(len(trajectory)): @@ -90,6 +56,9 @@ def human_agent(): print("The episode is done.") break + result = env.evaluate() + print("Result:", result) + env.close() print("Environment closed.") diff --git a/requirements.txt b/requirements.txt index cb56195..0a27eff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,16 @@ -numpy -Pillow +numpy~=1.24.3 +Pillow~=10.1.0 fabric -gymnasium -requests -transformers -torch +gymnasium~=0.28.1 +requests~=2.31.0 +transformers~=4.35.2 +torch~=2.1.1+cu118 accelerate +opencv-python~=4.8.1.78 +matplotlib~=3.7.4 +pynput~=1.7.6 +pyautogui~=0.9.54 +psutil~=5.9.6 +tqdm~=4.65.0 +pandas~=2.0.3 +flask~=3.0.0 \ No newline at end of file