diff --git a/desktop_env/controllers/python.py b/desktop_env/controllers/python.py index 3f077fa..169bec7 100644 --- a/desktop_env/controllers/python.py +++ b/desktop_env/controllers/python.py @@ -1,11 +1,15 @@ import json +import logging +import random from typing import Any, Dict + import requests + from desktop_env.envs.actions import KEYBOARD_KEYS -import logging logger = logging.getLogger("desktopenv.pycontroller") + class PythonController: def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""): self.http_server = http_server @@ -62,6 +66,10 @@ class PythonController: action_type = action["action_type"] parameters = action["parameters"] if "parameters" in action else {} + move_mode = random.choice( + ["pyautogui.easeInQuad", "pyautogui.easeOutQuad", "pyautogui.easeInOutQuad", "pyautogui.easeInBounce", + "pyautogui.easeInElastic"]) + duration = random.uniform(0.5, 1) if action_type == "MOVE_TO": if parameters == {} or None: @@ -69,7 +77,7 @@ class PythonController: elif "x" in parameters and "y" in parameters: x = parameters["x"] y = parameters["y"] - self.execute_python_command(f"pyautogui.moveTo({x}, {y})") + self.execute_python_command(f"pyautogui.moveTo({x}, {y}, {duration}, {move_mode})") else: raise Exception(f"Unknown parameters: {parameters}") @@ -211,6 +219,27 @@ class PythonController: else: raise Exception(f"Unknown action type: {action_type}") + def get_vm_screen_size(self): + """ + Gets the size of the vm screen. + """ + response = requests.post(self.http_server + "/screen_size") + if response.status_code == 200: + return response.json() + else: + logger.error("Failed to get screen size. Status code: %d", response.status_code) + return None + + def get_vm_window_size(self, app_class_name: str): + """ + Gets the size of the vm app window. + """ + response = requests.post(self.http_server + "/window_size", data={"app_class_name": app_class_name}) + if response.status_code == 200: + return response.json() + else: + logger.error("Failed to get window size. Status code: %d", response.status_code) + return None def get_vlc_status(self, host='localhost', port=8080, password='password'): url = f'http://{host}:{port}/requests/status.xml' @@ -218,8 +247,7 @@ class PythonController: response = requests.get(url, auth=('', password)) if response.status_code == 200: - print("File downloaded successfully") return response.content else: - print("Failed to get vlc status. Status code:", response.status_code) + logger.error("Failed to get vlc status. Status code: %d", response.status_code) return None diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py index a0e9adc..cf97120 100644 --- a/desktop_env/controllers/setup.py +++ b/desktop_env/controllers/setup.py @@ -199,10 +199,14 @@ class SetupController: except requests.exceptions.RequestException as e: logger.error("An error occurred while trying to send the request: %s", e) - def _launch_setup(self, command: List[str]): + def _launch_setup(self, command: Union[str, List[str]]): if not command: raise Exception("Empty command to launch.") + if isinstance(command, str) and len(command.split()) > 1: + logger.warning("Command should be a list of strings. Now it is a string. Will split it by space.") + command = command.split() + payload = json.dumps({"command": command}) headers = {"Content-Type": "application/json"} diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index 2c59035..d969333 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -1,6 +1,6 @@ +import os from typing import Dict -import os import requests @@ -37,11 +37,16 @@ def get_vm_file(env, config: Dict[str, str]) -> str: _path = os.path.join(env.cache_dir, config["dest"]) file = env.controller.get_file(config["path"]) + + if file is None: + raise FileNotFoundError("File not found on VM: {:}".format(config["path"])) + with open(_path, "wb") as f: f.write(file) return _path + def get_cache_file(env, config: Dict[str, str]) -> str: """ Config: diff --git a/desktop_env/evaluators/getters/misc.py b/desktop_env/evaluators/getters/misc.py index f66322b..a27cfa2 100644 --- a/desktop_env/evaluators/getters/misc.py +++ b/desktop_env/evaluators/getters/misc.py @@ -1,66 +1,13 @@ +import logging from typing import TypeVar -import platform -import subprocess -import ctypes -import os - -import logging logger = logging.getLogger("desktopenv.getters.misc") R = TypeVar("Rule") + + def get_rule(env, config: R) -> R: """ Returns the rule as-is. """ return config["rules"] - - -def get_desktop_path(): - username = os.getlogin() # Get the current username - if platform.system() == "Windows": - return os.path.join("C:", "Users", username, "Desktop") - elif platform.system() == "Darwin": # macOS is identified as 'Darwin' - return os.path.join("/Users", username, "Desktop") - elif platform.system() == "Linux": - return os.path.join("/home", username, "Desktop") - else: - raise Exception("Unsupported operating system") - - -def get_wallpaper(): - def get_wallpaper_windows(): - SPI_GETDESKWALLPAPER = 0x73 - MAX_PATH = 260 - buffer = ctypes.create_unicode_buffer(MAX_PATH) - ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0) - return buffer.value - - def get_wallpaper_macos(): - script = """ - tell application "System Events" to tell every desktop to get picture - """ - process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE) - output, error = process.communicate() - if error: - logger.error("Error: %s", error) - else: - return output.strip().decode('utf-8') - - def get_wallpaper_linux(): - try: - output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"]) - return output.decode('utf-8').strip().replace('file://', '').replace("'", "") - except Exception as e: - logger.error("Error: %s", e) - return None - - os_name = platform.system() - if os_name == 'Windows': - return get_wallpaper_windows() - elif os_name == 'Darwin': - return get_wallpaper_macos() - elif os_name == 'Linux': - return get_wallpaper_linux() - else: - return "Unsupported OS" diff --git a/desktop_env/evaluators/getters/vlc.py b/desktop_env/evaluators/getters/vlc.py index abdb085..fb4a7d5 100644 --- a/desktop_env/evaluators/getters/vlc.py +++ b/desktop_env/evaluators/getters/vlc.py @@ -33,7 +33,7 @@ def get_vlc_config(env, config: Dict[str, str]): # fixme: depends on how we config and install the vlc in virtual machine, need to be aligned and double-checked if os_type == "Linux": config_path = \ - env.controller.execute_python_command("import os; print(os.path.expanduser('~/snap/vlc/common/vlcrc'))")[ + env.controller.execute_python_command("import os; print(os.path.expanduser('~/.config/vlc/vlcrc'))")[ 'output'].strip() elif os_type == "Darwin": config_path = env.controller.execute_python_command( diff --git a/desktop_env/evaluators/metrics/vlc.py b/desktop_env/evaluators/metrics/vlc.py index 24eddb9..21d7b08 100644 --- a/desktop_env/evaluators/metrics/vlc.py +++ b/desktop_env/evaluators/metrics/vlc.py @@ -7,8 +7,6 @@ from xml.etree import ElementTree import acoustid import cv2 import imagehash -import pyautogui -import pygetwindow as gw # todo: change to the library that supports Linux from PIL import Image logger = logging.getLogger("desktopenv.metrics.vlc") @@ -72,30 +70,11 @@ def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> f return False -def are_audio_files_similar(mp3_file_path, mp4_file_path): - # Extract audio fingerprint from MP3 file - mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path) - - # Extract the audio stream from the MP4 file - mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3' - try: - subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3", - mp4_audio_path], check=True) - except subprocess.CalledProcessError as e: - print(f"An error occurred during audio extraction from MP4: {e}") - return False - - # Extract audio fingerprint from the extracted audio - mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path) - - # Clean up temporary extracted audio file - os.remove(mp4_audio_path) - - # Compare fingerprints (rudimentary comparison) - if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint: +def is_vlc_fullscreen(actual_window_size, screen_size): + if actual_window_size['width'] == screen_size['width'] and actual_window_size['height'] == screen_size['height']: return True - - return False + else: + return False def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5): @@ -137,28 +116,27 @@ def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold= return True -def is_vlc_fullscreen(): - """ - Checks if the VLC window is in full-screen mode. +def are_audio_files_similar(mp3_file_path, mp4_file_path): + # Extract audio fingerprint from MP3 file + mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path) - When VLC is in full-screen mode, its window size matches the screen size with no borders. - """ + # Extract the audio stream from the MP4 file + mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3' try: - # Get the VLC window; adjust the title as per your VLC window's title - vlc_window = gw.getWindowsWithTitle('VLC media player')[0] # Adjust title if needed - if not vlc_window: - return False - - # Get screen size - screen_width, screen_height = pyautogui.size() - - # Check if VLC window size matches the screen size - return (vlc_window.width == screen_width and vlc_window.height == screen_height) - - except IndexError: - # VLC window not found - logger.error("VLC window not found.") - return False - except Exception as e: - logger.error(f"An error occurred: {e}") + subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3", + mp4_audio_path], check=True) + except subprocess.CalledProcessError as e: + print(f"An error occurred during audio extraction from MP4: {e}") return False + + # Extract audio fingerprint from the extracted audio + mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path) + + # Clean up temporary extracted audio file + os.remove(mp4_audio_path) + + # Compare fingerprints (rudimentary comparison) + if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint: + return True + + return False diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index a9bcbc7..829e9d0 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -1,13 +1,17 @@ +import ctypes import os import platform import subprocess from pathlib import Path from typing import List +import Xlib import pyautogui import requests from PIL import Image -from flask import Flask, request, jsonify, send_file +from Xlib import display, X +from flask import Flask, request, jsonify, send_file, abort +from werkzeug.utils import secure_filename from pyxcursor import Xcursor @@ -99,6 +103,164 @@ def capture_screen_with_cursor(): return send_file(file_path, mimetype='image/png') +@app.route('/screen_size', methods=['POST']) +def get_screen_size(): + d = display.Display() + screen_width = d.screen().width_in_pixels + screen_height = d.screen().height_in_pixels + return jsonify( + { + "width": screen_width, + "height": screen_height + } + ) + + +@app.route('/window_size', methods=['POST']) +def get_window_size(): + if 'app_class_name' in request.form: + app_class_name = request.form['app_class_name'] + else: + return jsonify({"error": "app_class_name is required"}), 400 + + d = display.Display() + root = d.screen().root + window_ids = root.get_full_property(d.intern_atom('_NET_CLIENT_LIST'), X.AnyPropertyType).value + + for window_id in window_ids: + try: + window = d.create_resource_object('window', window_id) + wm_class = window.get_wm_class() + + if wm_class is None: + continue + + if app_class_name.lower() in [name.lower() for name in wm_class]: + geom = window.get_geometry() + return jsonify( + { + "width": geom.width, + "height": geom.height + } + ) + except Xlib.error.XError: # Ignore windows that give an error + continue + return None + + +@app.route('/desktop_path', methods=['POST']) +def get_desktop_path(): + # Get the home directory in a platform-independent manner using pathlib + home_directory = str(Path.home()) + + # Determine the desktop path based on the operating system + desktop_path = { + "Windows": os.path.join(home_directory, "Desktop"), + "Darwin": os.path.join(home_directory, "Desktop"), # macOS + "Linux": os.path.join(home_directory, "Desktop") + }.get(platform.system(), None) + + # Check if the operating system is supported and the desktop path exists + if desktop_path and os.path.exists(desktop_path): + return jsonify(desktop_path=desktop_path) + else: + return jsonify(error="Unsupported operating system or desktop path not found"), 404 + + +@app.route('/wallpaper', methods=['POST']) +def get_wallpaper(): + def get_wallpaper_windows(): + SPI_GETDESKWALLPAPER = 0x73 + MAX_PATH = 260 + buffer = ctypes.create_unicode_buffer(MAX_PATH) + ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0) + return buffer.value + + def get_wallpaper_macos(): + script = """ + tell application "System Events" to tell every desktop to get picture + """ + process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, error = process.communicate() + if error: + app.logger.error("Error: %s", error.decode('utf-8')) + return None + return output.strip().decode('utf-8') + + def get_wallpaper_linux(): + try: + output = subprocess.check_output( + ["gsettings", "get", "org.gnome.desktop.background", "picture-uri"], + stderr=subprocess.PIPE + ) + return output.decode('utf-8').strip().replace('file://', '').replace("'", "") + except subprocess.CalledProcessError as e: + app.logger.error("Error: %s", e) + return None + + os_name = platform.system() + wallpaper_path = None + if os_name == 'Windows': + wallpaper_path = get_wallpaper_windows() + elif os_name == 'Darwin': + wallpaper_path = get_wallpaper_macos() + elif os_name == 'Linux': + wallpaper_path = get_wallpaper_linux() + else: + app.logger.error(f"Unsupported OS: {os_name}") + abort(400, description="Unsupported OS") + + if wallpaper_path: + try: + # Ensure the filename is secure + filename = secure_filename(os.path.basename(wallpaper_path)) + return send_file(wallpaper_path, attachment_filename=filename) + except Exception as e: + app.logger.error(f"An error occurred while serving the wallpaper file: {e}") + abort(500, description="Unable to serve the wallpaper file") + else: + abort(404, description="Wallpaper file not found") + + +@app.route('/list_directory', methods=['POST']) +def get_directory_tree(): + def _list_dir_contents(directory): + """ + List the contents of a directory recursively, building a tree structure. + + :param directory: The path of the directory to inspect. + :return: A nested dictionary with the contents of the directory. + """ + tree = {'type': 'directory', 'name': os.path.basename(directory), 'children': []} + try: + # List all files and directories in the current directory + for entry in os.listdir(directory): + full_path = os.path.join(directory, entry) + # If entry is a directory, recurse into it + if os.path.isdir(full_path): + tree['children'].append(_list_dir_contents(full_path)) + else: + tree['children'].append({'type': 'file', 'name': entry}) + except OSError as e: + # If the directory cannot be accessed, return the exception message + tree = {'error': str(e)} + return tree + + # Extract the 'path' parameter from the JSON request + data = request.get_json() + if 'path' not in data: + return jsonify(error="Missing 'path' parameter"), 400 + + start_path = data['path'] + # Ensure the provided path is a directory + if not os.path.isdir(start_path): + return jsonify(error="The provided path is not a directory"), 400 + + # Generate the directory tree starting from the provided path + directory_tree = _list_dir_contents(start_path) + return jsonify(directory_tree=directory_tree) + + @app.route('/file', methods=['POST']) def get_file(): # Retrieve filename from the POST request diff --git a/evaluation_examples/examples/vlc/8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f.json b/evaluation_examples/examples/vlc/8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f.json index 8f2a609..8c2fdb2 100644 --- a/evaluation_examples/examples/vlc/8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f.json +++ b/evaluation_examples/examples/vlc/8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f.json @@ -1,12 +1,28 @@ { "id": "8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f", "snapshot": "base_setup", - "instruction": "Make the video window whole-screen", + "instruction": "Can you make the video fill up the whole screen? It's a bit too small to see right now, and I'd like to see it better.", "source": "https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s", - "config": [], + "config": [ + { + "type": "launch", + "parameters": { + "command": "vlc" + } + } + ], "trajectory": "trajectories/", "related_apps": [ "vlc" ], - "evaluator": "evaluation_dir" + "evaluator": { + "func": "is_vlc_fullscreen", + "expected": { + "type": "vm_window_size", + "app_class_name": "vlc" + }, + "result": { + "type": "vm_screen_size" + } + } } diff --git a/evaluation_examples/examples/vlc/aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6.json b/evaluation_examples/examples/vlc/aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6.json index d5c48ec..f525f96 100644 --- a/evaluation_examples/examples/vlc/aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6.json +++ b/evaluation_examples/examples/vlc/aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6.json @@ -1,7 +1,7 @@ { "id": "aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6", "snapshot": "base_setup", - "instruction": "This video is upside down, help me rotate it", + "instruction": "Hey, could you turn this video the right way up for me? And once it's flipped around, could you save it for me with the name '1984_Apple_Macintosh_Commercial.mp4' on the main screen where all my files are?", "source": "https://www.dedoimedo.com/computers/vlc-rotate-videos.html", "config": [ { @@ -10,21 +10,33 @@ "files": [ { "url": "https://drive.usercontent.google.com/download?id=1CLBjjsjGmHlbDg1lDcxfdE0F0C7-A5gZ&export=download&authuser=0&confirm=t&uuid=dde635fc-e223-4cd3-8065-899396e68d0a&at=APZUnTWQHdWYLLxlofuOIuhE2qiS:1704722380621", - "path": "flipped_1984_Apple_Macintosh_Commercial.mp4" + "path": "Desktop/flipped_1984_Apple_Macintosh_Commercial.mp4" } ] } }, { "type": "launch", - "parameters": { - "command": "vlc" - } + "parameters": { + "command": ["vlc", "Desktop/flipped_1984_Apple_Macintosh_Commercial.mp4"] + } } ], "trajectory": "trajectories/", "related_apps": [ "vlc" ], - "evaluator": "evaluation_dir" + "evaluator": { + "func": "compare_videos", + "expected": { + "type": "cloud_file", + "path": "https://drive.usercontent.google.com/download?id=10P9nKW7VTaHGS15tj4C0GVxUIYA0Nh42&export=download&authuser=0&confirm=t&uuid=1b812f06-b624-4aed-8d91-734c54421da3&at=APZUnTUCB-58SwcccQ_WN1T4KdJy:1704808326066", + "dest": "1984_Apple_Macintosh_Commercial_gold.mp4" + }, + "result": { + "type": "vm_file", + "path": "1984_Apple_Macintosh_Commercial.mp4", + "dest": "1984_Apple_Macintosh_Commercial.mp4" + } + } } diff --git a/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json b/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json index 71f2019..3238837 100644 --- a/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json +++ b/evaluation_examples/examples/vlc/efcf0d81-0835-4880-b2fd-d866e8bc2294.json @@ -3,7 +3,25 @@ "snapshot": "base_setup", "instruction": "Set this frame of the current video as my wallpaper", "source": "https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s", - "config": [], + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "", + "path": "" + } + ] + } + }, + { + "type": "launch", + "parameters": { + "command": ["vlc", "/path/to/your/video.mp4", "--start-time=0", "--run-time=10", "vlc://quit", "&&", "vlc", "/path/to/your/video.mp4", "--start-time=10"] + } + } + ], "trajectory": "trajectories/", "related_apps": [ "vlc"