diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py index d27aa00..f9c17ec 100644 --- a/desktop_env/desktop_env.py +++ b/desktop_env/desktop_env.py @@ -164,6 +164,9 @@ class DesktopEnv(gym.Env): self.is_environment_used = False elif self.provider_name in {"vmware", "virtualbox", "proxmox"}: self.is_environment_used = True + elif self.provider_name == "direct": + # Physical machine: never revert snapshot, never stop emulator + self.is_environment_used = False else: raise ValueError(f"Invalid provider name: {self.provider_name}") diff --git a/desktop_env/providers/__init__.py b/desktop_env/providers/__init__.py index 1555d83..06a9ae7 100644 --- a/desktop_env/providers/__init__.py +++ b/desktop_env/providers/__init__.py @@ -43,5 +43,9 @@ def create_vm_manager_and_provider(provider_name: str, region: str, use_proxy: b from desktop_env.providers.proxmox.manager import ProxmoxVMManager from desktop_env.providers.proxmox.provider import ProxmoxProvider return ProxmoxVMManager(), ProxmoxProvider(region) + elif provider_name == "direct": + from desktop_env.providers.direct.manager import DirectVMManager + from desktop_env.providers.direct.provider import DirectProvider + return DirectVMManager(), DirectProvider(region) else: raise NotImplementedError(f"{provider_name} not implemented!") diff --git a/desktop_env/providers/direct/__init__.py b/desktop_env/providers/direct/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/desktop_env/providers/direct/manager.py b/desktop_env/providers/direct/manager.py new file mode 100644 index 0000000..26fc8a6 --- /dev/null +++ b/desktop_env/providers/direct/manager.py @@ -0,0 +1,32 @@ +import logging +from desktop_env.providers.base import VMManager + +logger = logging.getLogger("desktopenv.providers.direct.DirectVMManager") + + +class DirectVMManager(VMManager): + """No-op manager for direct-IP connections (physical machines / bare-metal).""" + + def __init__(self, registry_path=""): + pass + + def initialize_registry(self, **kwargs): + pass + + def add_vm(self, vm_path, **kwargs): + pass + + def delete_vm(self, vm_path, **kwargs): + pass + + def occupy_vm(self, vm_path, pid, **kwargs): + pass + + def list_free_vms(self, **kwargs): + return [] + + def check_and_clean(self, **kwargs): + pass + + def get_vm_path(self, os_type="Windows", region=None, screen_size=(1920, 1080), **kwargs): + return os.environ.get("DIRECT_VM_IP", "192.168.1.11") diff --git a/desktop_env/providers/direct/provider.py b/desktop_env/providers/direct/provider.py new file mode 100644 index 0000000..af46db5 --- /dev/null +++ b/desktop_env/providers/direct/provider.py @@ -0,0 +1,63 @@ +import logging +import os +import time + +import requests + +from desktop_env.providers.base import Provider + +logger = logging.getLogger("desktopenv.providers.direct.DirectProvider") +logger.setLevel(logging.INFO) + +RETRY_INTERVAL = 3 +MAX_WAIT_READY = 60 + + +class DirectProvider(Provider): + """ + Provider for directly connected machines (physical / bare-metal). + No VM lifecycle management — the machine is assumed to be already running. + The Flask server IP is read from the environment variable DIRECT_VM_IP. + + Usage: + export DIRECT_VM_IP=192.168.1.11 + python run.py --provider_name direct --path_to_vm ignored ... + """ + + def __init__(self, region: str = None): + super().__init__(region) + self.vm_ip = os.environ.get("DIRECT_VM_IP", "192.168.1.11") + + def start_emulator(self, path_to_vm: str, headless: bool, os_type: str = "Windows"): + """No-op: machine is already on. Just verify Flask server is reachable.""" + logger.info(f"[direct] Using physical machine at {self.vm_ip}:5000 (no VM lifecycle)") + self._wait_for_vm_ready(self.vm_ip) + + def _wait_for_vm_ready(self, ip: str, timeout: int = MAX_WAIT_READY) -> bool: + url = f"http://{ip}:5000/screenshot" + deadline = time.time() + timeout + while time.time() < deadline: + try: + r = requests.get(url, timeout=5) + if r.status_code == 200: + logger.info(f"[direct] Flask server ready at {url}") + return True + except Exception: + pass + logger.info(f"[direct] Waiting for Flask server at {url}...") + time.sleep(RETRY_INTERVAL) + logger.warning(f"[direct] Flask server at {url} not ready within {timeout}s — continuing anyway") + return False + + def get_ip_address(self, path_to_vm: str) -> str: + return self.vm_ip + + def save_state(self, path_to_vm: str, snapshot_name: str): + logger.info("[direct] save_state: no-op (physical machine)") + + def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str) -> str: + logger.info("[direct] revert_to_snapshot: no-op (physical machine)") + return path_to_vm + + def stop_emulator(self, path_to_vm: str): + logger.info("[direct] stop_emulator: no-op (physical machine)") diff --git a/desktop_env/server_win7/extract_and_install.bat b/desktop_env/server_win7/extract_and_install.bat new file mode 100644 index 0000000..8606d41 --- /dev/null +++ b/desktop_env/server_win7/extract_and_install.bat @@ -0,0 +1,29 @@ +@echo off +echo ======================================== +echo Unzip + Install (Win7 32bit Python3.8) +echo ======================================== +echo. + +set "ZIPFILE=%~dp0win7_server_packages.zip" +set "DESTDIR=%~dp0" + +if not exist "%ZIPFILE%" ( + echo [ERROR] win7_server_packages.zip not found! + echo Please put this bat and the zip in the same folder. + pause + exit /b 1 +) + +echo [1/3] Unzipping via Shell.Application (Win7 compatible) ... +powershell -NoProfile -ExecutionPolicy Bypass -Command "$s=New-Object -ComObject Shell.Application; $d=$s.NameSpace('%DESTDIR%'); $z=$s.NameSpace('%ZIPFILE%'); $d.CopyHere($z.Items(), 20); Start-Sleep -s 5" + +echo. +if not exist "%~dp0win7_offline_packages\install_win7.bat" ( + echo [ERROR] Unzip failed. Folder win7_offline_packages not found. + pause + exit /b 1 +) + +echo [2/3] Unzip OK. Starting install ... +echo. +call "%~dp0win7_offline_packages\install_win7.bat" diff --git a/desktop_env/server_win7/main.py b/desktop_env/server_win7/main.py new file mode 100644 index 0000000..bc9e9be --- /dev/null +++ b/desktop_env/server_win7/main.py @@ -0,0 +1,2008 @@ +import ctypes +import os +import platform +import shlex +import json +import subprocess, signal +import sys +import time +from pathlib import Path +from typing import Any, Optional, Sequence +from typing import List, Dict, Tuple, Literal +import concurrent.futures + +import lxml.etree +import pyautogui +import requests +import re +from PIL import Image, ImageGrab +from flask import Flask, request, jsonify, send_file, abort # , send_from_directory +from lxml.etree import _Element + +platform_name: str = platform.system() + +if platform_name == "Linux": + import Xlib + from Xlib import display, X + from pyxcursor import Xcursor + import pyatspi + from pyatspi import Accessible, StateType, STATE_SHOWING + from pyatspi import Action as ATAction + from pyatspi import Component # , Document + from pyatspi import Text as ATText + from pyatspi import Value as ATValue + + BaseWrapper = Any + +elif platform_name == "Windows": + from pywinauto import Desktop + from pywinauto.base_wrapper import BaseWrapper + import pywinauto.application + import win32ui, win32gui + + Accessible = Any + Xlib = None + display = None + X = None + Xcursor = None + +elif platform_name == "Darwin": + import plistlib + from pyxcursor import Xcursor + + import AppKit + import ApplicationServices + import Foundation + import Quartz + import oa_atomacos + + Accessible = Any + BaseWrapper = Any + Xlib = None + +else: + # Platform not supported + Accessible = None + BaseWrapper = Any + Xlib = None + display = None + X = None + Xcursor = None + +# todo: need to reformat and organize this whole file + +app = Flask(__name__) + +pyautogui.PAUSE = 0 +pyautogui.DARWIN_CATCH_UP_TIME = 0 + +TIMEOUT = 1800 # seconds + +logger = app.logger +recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process +recording_path = "/tmp/recording.mp4" + + +@app.route('/setup/execute', methods=['POST']) +@app.route('/execute', methods=['POST']) +def execute_command(): + data = request.json + # The 'command' key in the JSON request should contain the command to be executed. + shell = data.get('shell', False) + command = data.get('command', "" if shell else []) + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + # Replace 'python' with sys.executable to use the same Python interpreter as the server + if len(command) > 0 and command[0] in ['python', 'python3', 'python.exe', 'python3.exe']: + command[0] = sys.executable + + # On Windows, if the command is `python -c ""`, exec() the code directly in this + # process instead of spawning a subprocess. Subprocesses launched from a windowless + # pythonw.exe parent cannot access the interactive desktop, so pyautogui calls fail + # with a 500 inside a subprocess. Running exec() in the Flask process is safe because + # the Flask server itself already has desktop access (proven by /screenshot working). + if (platform_name == "Windows" + and not shell + and len(command) >= 3 + and command[0] == sys.executable + and command[1] == "-c"): + code_str = command[2] + import io + from contextlib import redirect_stdout, redirect_stderr + stdout_buf = io.StringIO() + stderr_buf = io.StringIO() + try: + exec_globals = {"__builtins__": __builtins__} + with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf): + exec(compile(code_str, "", "exec"), exec_globals) + return jsonify({ + 'status': 'success', + 'output': stdout_buf.getvalue(), + 'error': stderr_buf.getvalue(), + 'returncode': 0 + }) + except Exception as e: + import traceback + return jsonify({ + 'status': 'error', + 'output': stdout_buf.getvalue(), + 'error': traceback.format_exc(), + 'returncode': 1 + }) + + # Execute the command without any safety checks. + try: + if platform_name == "Windows": + flags = subprocess.CREATE_NO_WINDOW + else: + flags = 0 + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=shell, + text=True, + timeout=120, + creationflags=flags, + ) + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode + }) + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': str(e) + }), 500 + + +@app.route('/setup/execute_with_verification', methods=['POST']) +@app.route('/execute_with_verification', methods=['POST']) +def execute_command_with_verification(): + """Execute command and verify the result based on provided verification criteria""" + data = request.json + shell = data.get('shell', False) + command = data.get('command', "" if shell else []) + verification = data.get('verification', {}) + max_wait_time = data.get('max_wait_time', 10) # Maximum wait time in seconds + check_interval = data.get('check_interval', 1) # Check interval in seconds + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + # Execute the main command + try: + if platform_name == "Windows": + flags = subprocess.CREATE_NO_WINDOW + else: + flags = 0 + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=shell, + text=True, + timeout=120, + creationflags=flags, + ) + + # If no verification is needed, return immediately + if not verification: + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode + }) + + # Wait and verify the result + import time + start_time = time.time() + while time.time() - start_time < max_wait_time: + verification_passed = True + + # Check window existence if specified + if 'window_exists' in verification: + window_name = verification['window_exists'] + try: + if platform_name == 'Linux': + wmctrl_result = subprocess.run(['wmctrl', '-l'], + capture_output=True, text=True, check=True) + if window_name.lower() not in wmctrl_result.stdout.lower(): + verification_passed = False + elif platform_name in ['Windows', 'Darwin']: + import pygetwindow as gw + windows = gw.getWindowsWithTitle(window_name) + if not windows: + verification_passed = False + except Exception: + verification_passed = False + + # Check command execution if specified + if 'command_success' in verification: + verify_cmd = verification['command_success'] + try: + verify_result = subprocess.run(verify_cmd, shell=True, + capture_output=True, text=True, timeout=5) + if verify_result.returncode != 0: + verification_passed = False + except Exception: + verification_passed = False + + if verification_passed: + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode, + 'verification': 'passed', + 'wait_time': time.time() - start_time + }) + + time.sleep(check_interval) + + # Verification failed + return jsonify({ + 'status': 'verification_failed', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode, + 'verification': 'failed', + 'wait_time': max_wait_time + }), 500 + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': str(e) + }), 500 + + +def _get_machine_architecture() -> str: + """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc. + """ + architecture = platform.machine().lower() + if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']: + return 'amd' + elif architecture in ['arm64', 'aarch64', 'aarch32']: + return 'arm' + else: + return 'unknown' + + +@app.route('/setup/launch', methods=["POST"]) +def launch_app(): + data = request.json + shell = data.get("shell", False) + command: List[str] = data.get("command", "" if shell else []) + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + try: + if 'google-chrome' in command and _get_machine_architecture() == 'arm': + index = command.index('google-chrome') + command[index] = 'chromium' # arm64 chrome is not available yet, can only use chromium + + # On Windows, use os.startfile() for direct exe/file launches (equivalent to double-click). + # Popen lacks the Shell context (ShellExecute) that some apps require to initialize correctly. + if platform_name == "Windows" and not shell: + exe = command[0] if isinstance(command, list) else command + exe_dir = os.path.dirname(exe) + # os.startfile uses ShellExecuteEx — same as Explorer double-click + os.startfile(exe) + return "{:} launched successfully via startfile".format(exe) + + # Set cwd to the exe's directory so apps that rely on relative paths work correctly + cwd = None + if not shell and isinstance(command, list) and len(command) > 0: + exe_path = command[0] + exe_dir = os.path.dirname(exe_path) + if exe_dir and os.path.isdir(exe_dir): + cwd = exe_dir + subprocess.Popen(command, shell=shell, cwd=cwd) + return "{:} launched successfully".format(command if shell else " ".join(command)) + except Exception as e: + return jsonify({"status": "error", "message": str(e)}), 500 + + +@app.route('/screenshot', methods=['GET']) +def capture_screen_with_cursor(): + file_path = os.path.join(os.path.dirname(__file__), "screenshots", "screenshot.png") + user_platform = platform.system() + + # Ensure the screenshots directory exists + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + if user_platform == "Windows": + def get_cursor(): + hcursor = win32gui.GetCursorInfo()[1] + hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0)) + hbmp = win32ui.CreateBitmap() + hbmp.CreateCompatibleBitmap(hdc, 36, 36) + hdc = hdc.CreateCompatibleDC() + hdc.SelectObject(hbmp) + hdc.DrawIcon((0,0), hcursor) + + bmpinfo = hbmp.GetInfo() + bmpstr = hbmp.GetBitmapBits(True) + cursor = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1).convert("RGBA") + + win32gui.DestroyIcon(hcursor) + win32gui.DeleteObject(hbmp.GetHandle()) + hdc.DeleteDC() + + pixdata = cursor.load() + + width, height = cursor.size + for y in range(height): + for x in range(width): + if pixdata[x, y] == (0, 0, 0, 255): + pixdata[x, y] = (0, 0, 0, 0) + + hotspot = win32gui.GetIconInfo(hcursor)[1:3] + + return (cursor, hotspot) + + # Win8.1+ uses shcore; Win7 fallback uses GetDeviceCaps via GDI + try: + ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100 + except (OSError, AttributeError): + hdc = ctypes.windll.user32.GetDC(0) + logical_dpi = ctypes.windll.gdi32.GetDeviceCaps(hdc, 88) # LOGPIXELSX + ctypes.windll.user32.ReleaseDC(0, hdc) + ratio = logical_dpi / 96.0 # 96 DPI == 100% scaling + + # get logical screen size + user32 = ctypes.windll.user32 + logical_width = user32.GetSystemMetrics(0) + logical_height = user32.GetSystemMetrics(1) + + # ===== Key fix: get cursor position before taking screenshot ===== + # win32gui.GetCursorPos() returns logical coordinates (consistent with pyautogui) + pos_win = win32gui.GetCursorPos() + logger.info(f"Cursor position (logical coordinates): {pos_win}") + + # Take screenshot immediately to reduce time difference + img = ImageGrab.grab(bbox=None, include_layered_windows=True) + # ============================================= + + # ===== DPI scaling fix ===== + if ratio != 1.0: + physical_width, physical_height = img.size + logger.info(f"Detected DPI scaling: {ratio}x ({ratio*100}%)") + logger.info(f"Physical screenshot size: {physical_width}x{physical_height}") + logger.info(f"Logical resolution: {logical_width}x{logical_height}") + logger.info(f"Resizing screenshot to match logical resolution...") + img = img.resize((logical_width, logical_height), Image.Resampling.LANCZOS) + logger.info(f"Screenshot resized to: {img.size}") + # ========================== + + try: + cursor, (hotspotx, hotspoty) = get_cursor() + + # ===== Cursor position handling ===== + # win32gui.GetCursorPos() and pyautogui both use logical coordinates + # The screenshot has been resized to logical resolution, so use directly + logical_cursor_x = pos_win[0] + logical_cursor_y = pos_win[1] + + pos = (logical_cursor_x - hotspotx, logical_cursor_y - hotspoty) + + logger.info(f"Cursor position (logical coordinates): ({logical_cursor_x}, {logical_cursor_y})") + logger.info(f"Hotspot offset: ({hotspotx}, {hotspoty})") + logger.info(f"Final paste position: {pos}") + # =================================== + + img.paste(cursor, pos, cursor) + except Exception as e: + logger.warning(f"Failed to capture cursor on Windows, screenshot will not include cursor. Error: {e}") + + img.save(file_path) + + elif user_platform == "Linux": + cursor_obj = Xcursor() + imgarray = cursor_obj.getCursorImageArrayFast() + cursor_img = Image.fromarray(imgarray) + screenshot = pyautogui.screenshot() + cursor_x, cursor_y = pyautogui.position() + screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img) + screenshot.save(file_path) + + elif user_platform == "Darwin": # (Mac OS) + subprocess.run(["screencapture", "-C", file_path]) + + else: + logger.warning(f"The platform you're using ({user_platform}) is not currently supported") + + return send_file(file_path, mimetype='image/png') + + + +def _has_active_terminal(desktop: Accessible) -> bool: + """ A quick check whether the terminal window is open and active (Linux only). + """ + for app in desktop: + if app.getRoleName() == "application" and app.name == "gnome-terminal-server": + for frame in app: + if frame.getRoleName() == "frame" and frame.getState().contains(pyatspi.STATE_ACTIVE): + return True + return False + + +def _get_windows_terminal_output() -> Optional[str]: + """ Get terminal output on Windows platform. + Supports Windows Terminal, PowerShell, Command Prompt, and ConHost. + """ + try: + from pywinauto import Desktop + from pywinauto.findwindows import ElementNotFoundError + + desktop = Desktop(backend="uia") + + # Common terminal applications on Windows + terminal_apps = [ + "WindowsTerminal.exe", # Windows Terminal + "powershell.exe", # PowerShell + "pwsh.exe", # PowerShell Core + "cmd.exe", # Command Prompt + "conhost.exe" # Console Host + ] + + # Try to find active terminal windows + for window in desktop.windows(): + try: + # Check if window is visible and not minimized + if not window.is_visible() or window.is_minimized(): + continue + + # Get window process name + process_name = window.element_info.name.lower() + + # Check if this is a terminal window + is_terminal = False + for term_app in terminal_apps: + if term_app.lower() in process_name or \ + any(term_name in process_name for term_name in ['terminal', 'powershell', 'command prompt', 'cmd']): + is_terminal = True + break + + if not is_terminal: + continue + + # Try to get text content from the terminal + # First, try to find console/edit controls that contain the output + try: + # For Windows Terminal and modern consoles + # Look for Edit or Document controls that contain the text + text_controls = window.descendants(control_type="Edit") + if not text_controls: + text_controls = window.descendants(control_type="Document") + if not text_controls: + text_controls = window.descendants(control_type="Text") + + for control in text_controls: + try: + text = control.window_text() + if text and len(text.strip()) > 0: + return text.rstrip() + except: + pass + + # If no text controls found, try to get the window text directly + window_text = window.window_text() + if window_text and len(window_text.strip()) > 0: + # Filter out just the window title + if window_text not in ['Windows PowerShell', 'Command Prompt', 'PowerShell', 'Administrator: Windows PowerShell']: + return window_text.rstrip() + + except Exception as e: + logger.debug(f"Error getting text from window {process_name}: {e}") + continue + + except Exception as e: + logger.debug(f"Error processing window: {e}") + continue + + return None + + except Exception as e: + logger.error(f"Error in _get_windows_terminal_output: {e}") + return None + + +@app.route('/terminal', methods=['GET']) +def get_terminal_output(): + user_platform = platform.system() + output: Optional[str] = None + try: + if user_platform == "Linux": + desktop: Accessible = pyatspi.Registry.getDesktop(0) + if _has_active_terminal(desktop): + desktop_xml: _Element = _create_atspi_node(desktop) + # 1. the terminal window (frame of application is st:active) is open and active + # 2. the terminal tab (terminal status is st:focused) is focused + xpath = '//application[@name="gnome-terminal-server"]/frame[@st:active="true"]//terminal[@st:focused="true"]' + terminals: List[_Element] = desktop_xml.xpath(xpath, namespaces=_accessibility_ns_map_ubuntu) + output = terminals[0].text.rstrip() if len(terminals) == 1 else None + elif user_platform == "Windows": + output = _get_windows_terminal_output() + logger.debug(f"Terminal output retrieved: {output}") + else: # macOS platform is not implemented currently + return "Currently not implemented for platform {:}.".format(platform.platform()), 500 + return jsonify({"output": output, "status": "success"}) + except Exception as e: + logger.error("Failed to get terminal output. Error: %s", e) + return jsonify({"status": "error", "message": str(e)}), 500 + + +_accessibility_ns_map = { + "ubuntu": { + "st": "https://accessibility.ubuntu.example.org/ns/state", + "attr": "https://accessibility.ubuntu.example.org/ns/attributes", + "cp": "https://accessibility.ubuntu.example.org/ns/component", + "doc": "https://accessibility.ubuntu.example.org/ns/document", + "docattr": "https://accessibility.ubuntu.example.org/ns/document/attributes", + "txt": "https://accessibility.ubuntu.example.org/ns/text", + "val": "https://accessibility.ubuntu.example.org/ns/value", + "act": "https://accessibility.ubuntu.example.org/ns/action", + }, + "windows": { + "st": "https://accessibility.windows.example.org/ns/state", + "attr": "https://accessibility.windows.example.org/ns/attributes", + "cp": "https://accessibility.windows.example.org/ns/component", + "doc": "https://accessibility.windows.example.org/ns/document", + "docattr": "https://accessibility.windows.example.org/ns/document/attributes", + "txt": "https://accessibility.windows.example.org/ns/text", + "val": "https://accessibility.windows.example.org/ns/value", + "act": "https://accessibility.windows.example.org/ns/action", + "class": "https://accessibility.windows.example.org/ns/class" + }, + "macos": { + "st": "https://accessibility.macos.example.org/ns/state", + "attr": "https://accessibility.macos.example.org/ns/attributes", + "cp": "https://accessibility.macos.example.org/ns/component", + "doc": "https://accessibility.macos.example.org/ns/document", + "txt": "https://accessibility.macos.example.org/ns/text", + "val": "https://accessibility.macos.example.org/ns/value", + "act": "https://accessibility.macos.example.org/ns/action", + "role": "https://accessibility.macos.example.org/ns/role", + } + +} + +_accessibility_ns_map_ubuntu = _accessibility_ns_map['ubuntu'] +_accessibility_ns_map_windows = _accessibility_ns_map['windows'] +_accessibility_ns_map_macos = _accessibility_ns_map['macos'] + +# A11y tree getter for Ubuntu +libreoffice_version_tuple: Optional[Tuple[int, ...]] = None +MAX_DEPTH = 50 +MAX_WIDTH = 1024 +MAX_CALLS = 5000 + + +def _get_libreoffice_version() -> Tuple[int, ...]: + """Function to get the LibreOffice version as a tuple of integers.""" + result = subprocess.run("libreoffice --version", shell=True, text=True, stdout=subprocess.PIPE) + version_str = result.stdout.split()[1] # Assuming version is the second word in the command output + return tuple(map(int, version_str.split("."))) + + +def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = None) -> _Element: + node_name = node.name + attribute_dict: Dict[str, Any] = {"name": node_name} + + # States + states: List[StateType] = node.getState().get_states() + for st in states: + state_name: str = StateType._enum_lookup[st] + state_name: str = state_name.split("_", maxsplit=1)[1].lower() + if len(state_name) == 0: + continue + attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["st"], state_name)] = "true" + + # Attributes + attributes: Dict[str, str] = node.get_attributes() + for attribute_name, attribute_value in attributes.items(): + if len(attribute_name) == 0: + continue + attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["attr"], attribute_name)] = attribute_value + + # Component + if attribute_dict.get("{{{:}}}visible".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true" \ + and attribute_dict.get("{{{:}}}showing".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true": + try: + component: Component = node.queryComponent() + except NotImplementedError: + pass + else: + bbox: Sequence[int] = component.getExtents(pyatspi.XY_SCREEN) + attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_ubuntu["cp"])] = \ + str(tuple(bbox[0:2])) + attribute_dict["{{{:}}}size".format(_accessibility_ns_map_ubuntu["cp"])] = str(tuple(bbox[2:])) + + text = "" + # Text + try: + text_obj: ATText = node.queryText() + # only text shown on current screen is available + # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount) + text: str = text_obj.getText(0, text_obj.characterCount) + # if flag=="thunderbird": + # appeared in thunderbird (uFFFC) (not only in thunderbird), "Object + # Replacement Character" in Unicode, "used as placeholder in text for + # an otherwise unspecified object; uFFFD is another "Replacement + # Character", just in case + text = text.replace("\ufffc", "").replace("\ufffd", "") + except NotImplementedError: + pass + + # Image, Selection, Value, Action + try: + node.queryImage() + attribute_dict["image"] = "true" + except NotImplementedError: + pass + + try: + node.querySelection() + attribute_dict["selection"] = "true" + except NotImplementedError: + pass + + try: + value: ATValue = node.queryValue() + value_key = f"{{{_accessibility_ns_map_ubuntu['val']}}}" + + for attr_name, attr_func in [ + ("value", lambda: value.currentValue), + ("min", lambda: value.minimumValue), + ("max", lambda: value.maximumValue), + ("step", lambda: value.minimumIncrement) + ]: + try: + attribute_dict[f"{value_key}{attr_name}"] = str(attr_func()) + except: + pass + except NotImplementedError: + pass + + try: + action: ATAction = node.queryAction() + for i in range(action.nActions): + action_name: str = action.getName(i).replace(" ", "-") + attribute_dict[ + "{{{:}}}{:}_desc".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getDescription( + i) + attribute_dict[ + "{{{:}}}{:}_kb".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getKeyBinding(i) + except NotImplementedError: + pass + + # Add from here if we need more attributes in the future... + + raw_role_name: str = node.getRoleName().strip() + node_role_name = (raw_role_name or "unknown").replace(" ", "-") + + if not flag: + if raw_role_name == "document spreadsheet": + flag = "calc" + if raw_role_name == "application" and node.name == "Thunderbird": + flag = "thunderbird" + + xml_node = lxml.etree.Element( + node_role_name, + attrib=attribute_dict, + nsmap=_accessibility_ns_map_ubuntu + ) + + if len(text) > 0: + xml_node.text = text + + if depth == MAX_DEPTH: + logger.warning("Max depth reached") + return xml_node + + if flag == "calc" and node_role_name == "table": + # Maximum column: 1024 if ver<=7.3 else 16384 + # Maximum row: 104 8576 + # Maximun sheet: 1 0000 + + global libreoffice_version_tuple + MAXIMUN_COLUMN = 1024 if libreoffice_version_tuple < (7, 4) else 16384 + MAX_ROW = 104_8576 + + index_base = 0 + first_showing = False + column_base = None + for r in range(MAX_ROW): + for clm in range(column_base or 0, MAXIMUN_COLUMN): + child_node: Accessible = node[index_base + clm] + showing: bool = child_node.getState().contains(STATE_SHOWING) + if showing: + child_node: _Element = _create_atspi_node(child_node, depth + 1, flag) + if not first_showing: + column_base = clm + first_showing = True + xml_node.append(child_node) + elif first_showing and column_base is not None or clm >= 500: + break + if first_showing and clm == column_base or not first_showing and r >= 500: + break + index_base += MAXIMUN_COLUMN + return xml_node + else: + try: + for i, ch in enumerate(node): + if i == MAX_WIDTH: + logger.warning("Max width reached") + break + xml_node.append(_create_atspi_node(ch, depth + 1, flag)) + except: + logger.warning("Error occurred during children traversing. Has Ignored. Node: %s", + lxml.etree.tostring(xml_node, encoding="unicode")) + return xml_node + + +# A11y tree getter for Windows +def _create_pywinauto_node(node, nodes, depth: int = 0, flag: Optional[str] = None) -> _Element: + nodes = nodes or set() + if node in nodes: + return + nodes.add(node) + + attribute_dict: Dict[str, Any] = {"name": node.element_info.name} + + base_properties = {} + try: + base_properties.update( + node.get_properties()) # get all writable/not writable properties, but have bugs when landing on chrome and it's slower! + except: + logger.debug("Failed to call get_properties(), trying to get writable properites") + try: + _element_class = node.__class__ + + class TempElement(node.__class__): + writable_props = pywinauto.base_wrapper.BaseWrapper.writable_props + + # Instantiate the subclass + node.__class__ = TempElement + # Retrieve properties using get_properties() + properties = node.get_properties() + node.__class__ = _element_class + + base_properties.update(properties) # only get all writable properties + logger.debug("get writable properties") + except Exception as e: + logger.error(e) + pass + + # Count-cnt + for attr_name in ["control_count", "button_count", "item_count", "column_count"]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['cnt']}}}{attr_name}"] = base_properties[ + attr_name].lower() + except: + pass + + # Columns-cols + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['cols']}}}columns"] = base_properties["columns"].lower() + except: + pass + + # Id-id + for attr_name in ["control_id", "automation_id", "window_id"]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['id']}}}{attr_name}"] = base_properties[attr_name].lower() + except: + pass + + # States + # 19 sec out of 20 + for attr_name, attr_func in [ + ("enabled", lambda: node.is_enabled()), + ("visible", lambda: node.is_visible()), + # ("active", lambda: node.is_active()), # occupied most of the time: 20s out of 21s for slack, 51.5s out of 54s for WeChat # maybe use for cutting branches + ("minimized", lambda: node.is_minimized()), + ("maximized", lambda: node.is_maximized()), + ("normal", lambda: node.is_normal()), + ("unicode", lambda: node.is_unicode()), + ("collapsed", lambda: node.is_collapsed()), + ("checkable", lambda: node.is_checkable()), + ("checked", lambda: node.is_checked()), + ("focused", lambda: node.is_focused()), + ("keyboard_focused", lambda: node.is_keyboard_focused()), + ("selected", lambda: node.is_selected()), + ("selection_required", lambda: node.is_selection_required()), + ("pressable", lambda: node.is_pressable()), + ("pressed", lambda: node.is_pressed()), + ("expanded", lambda: node.is_expanded()), + ("editable", lambda: node.is_editable()), + ("has_keyboard_focus", lambda: node.has_keyboard_focus()), + ("is_keyboard_focusable", lambda: node.is_keyboard_focusable()), + ]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['st']}}}{attr_name}"] = str(attr_func()).lower() + except: + pass + + # Component + try: + rectangle = node.rectangle() + attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_windows["cp"])] = \ + "({:d}, {:d})".format(rectangle.left, rectangle.top) + attribute_dict["{{{:}}}size".format(_accessibility_ns_map_windows["cp"])] = \ + "({:d}, {:d})".format(rectangle.width(), rectangle.height()) + + except Exception as e: + logger.error("Error accessing rectangle: ", e) + + # Text + text: str = node.window_text() + if text == attribute_dict["name"]: + text = "" + + # Selection + if hasattr(node, "select"): + attribute_dict["selection"] = "true" + + # Value + for attr_name, attr_funcs in [ + ("step", [lambda: node.get_step()]), + ("value", [lambda: node.value(), lambda: node.get_value(), lambda: node.get_position()]), + ("min", [lambda: node.min_value(), lambda: node.get_range_min()]), + ("max", [lambda: node.max_value(), lambda: node.get_range_max()]) + ]: + for attr_func in attr_funcs: + if hasattr(node, attr_func.__name__): + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['val']}}}{attr_name}"] = str(attr_func()) + break # exit once the attribute is set successfully + except: + pass + + attribute_dict["{{{:}}}class".format(_accessibility_ns_map_windows["class"])] = str(type(node)) + + # class_name + for attr_name in ["class_name", "friendly_class_name"]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['class']}}}{attr_name}"] = base_properties[ + attr_name].lower() + except: + pass + + node_role_name: str = node.class_name().lower().replace(" ", "-") + node_role_name = "".join( + map(lambda _ch: _ch if _ch.isidentifier() or _ch in {"-"} or _ch.isalnum() else "-", node_role_name)) + + if node_role_name.strip() == "": + node_role_name = "unknown" + if not node_role_name[0].isalpha(): + node_role_name = "tag" + node_role_name + + xml_node = lxml.etree.Element( + node_role_name, + attrib=attribute_dict, + nsmap=_accessibility_ns_map_windows + ) + + if text is not None and len(text) > 0 and text != attribute_dict["name"]: + xml_node.text = text + + if depth == MAX_DEPTH: + logger.warning("Max depth reached") + return xml_node + + # use multi thread to accelerate children fetching + children = node.children() + if children: + with concurrent.futures.ThreadPoolExecutor() as executor: + future_to_child = [executor.submit(_create_pywinauto_node, ch, nodes, depth + 1, flag) for ch in + children[:MAX_WIDTH]] + try: + xml_node.extend([future.result() for future in concurrent.futures.as_completed(future_to_child)]) + except Exception as e: + logger.error(f"Exception occurred: {e}") + return xml_node + + +# A11y tree getter for macOS + +def _create_axui_node(node, nodes: set = None, depth: int = 0, bbox: tuple = None): + nodes = nodes or set() + if node in nodes: + return + nodes.add(node) + + reserved_keys = { + "AXEnabled": "st", + "AXFocused": "st", + "AXFullScreen": "st", + "AXTitle": "attr", + "AXChildrenInNavigationOrder": "attr", + "AXChildren": "attr", + "AXFrame": "attr", + "AXRole": "role", + "AXHelp": "attr", + "AXRoleDescription": "role", + "AXSubrole": "role", + "AXURL": "attr", + "AXValue": "val", + "AXDescription": "attr", + "AXDOMIdentifier": "attr", + "AXSelected": "st", + "AXInvalid": "st", + "AXRows": "attr", + "AXColumns": "attr", + } + attribute_dict = {} + + if depth == 0: + bbox = ( + node["kCGWindowBounds"]["X"], + node["kCGWindowBounds"]["Y"], + node["kCGWindowBounds"]["X"] + node["kCGWindowBounds"]["Width"], + node["kCGWindowBounds"]["Y"] + node["kCGWindowBounds"]["Height"] + ) + app_ref = ApplicationServices.AXUIElementCreateApplication(node["kCGWindowOwnerPID"]) + + attribute_dict["name"] = node["kCGWindowOwnerName"] + if attribute_dict["name"] != "Dock": + error_code, app_wins_ref = ApplicationServices.AXUIElementCopyAttributeValue( + app_ref, "AXWindows", None) + if error_code: + logger.error("MacOS parsing %s encountered Error code: %d", app_ref, error_code) + else: + app_wins_ref = [app_ref] + node = app_wins_ref[0] + + error_code, attr_names = ApplicationServices.AXUIElementCopyAttributeNames(node, None) + + if error_code: + # -25202: AXError.invalidUIElement + # The accessibility object received in this event is invalid. + return + + value = None + + if "AXFrame" in attr_names: + error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, "AXFrame", None) + rep = repr(attr_val) + x_value = re.search(r"x:(-?[\d.]+)", rep) + y_value = re.search(r"y:(-?[\d.]+)", rep) + w_value = re.search(r"w:(-?[\d.]+)", rep) + h_value = re.search(r"h:(-?[\d.]+)", rep) + type_value = re.search(r"type\s?=\s?(\w+)", rep) + value = { + "x": float(x_value.group(1)) if x_value else None, + "y": float(y_value.group(1)) if y_value else None, + "w": float(w_value.group(1)) if w_value else None, + "h": float(h_value.group(1)) if h_value else None, + "type": type_value.group(1) if type_value else None, + } + + if not any(v is None for v in value.values()): + x_min = max(bbox[0], value["x"]) + x_max = min(bbox[2], value["x"] + value["w"]) + y_min = max(bbox[1], value["y"]) + y_max = min(bbox[3], value["y"] + value["h"]) + + if x_min > x_max or y_min > y_max: + # No intersection + return + + role = None + text = None + + for attr_name, ns_key in reserved_keys.items(): + if attr_name not in attr_names: + continue + + if value and attr_name == "AXFrame": + bb = value + if not any(v is None for v in bb.values()): + attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_macos["cp"])] = \ + "({:d}, {:d})".format(int(bb["x"]), int(bb["y"])) + attribute_dict["{{{:}}}size".format(_accessibility_ns_map_macos["cp"])] = \ + "({:d}, {:d})".format(int(bb["w"]), int(bb["h"])) + continue + + error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None) + + full_attr_name = f"{{{_accessibility_ns_map_macos[ns_key]}}}{attr_name}" + + if attr_name == "AXValue" and not text: + text = str(attr_val) + continue + + if attr_name == "AXRoleDescription": + role = attr_val + continue + + # Set the attribute_dict + if not (isinstance(attr_val, ApplicationServices.AXUIElementRef) + or isinstance(attr_val, (AppKit.NSArray, list))): + if attr_val is not None: + attribute_dict[full_attr_name] = str(attr_val) + + node_role_name = role.lower().replace(" ", "_") if role else "unknown_role" + + xml_node = lxml.etree.Element( + node_role_name, + attrib=attribute_dict, + nsmap=_accessibility_ns_map_macos + ) + + if text is not None and len(text) > 0: + xml_node.text = text + + if depth == MAX_DEPTH: + logger.warning("Max depth reached") + return xml_node + + future_to_child = [] + + with concurrent.futures.ThreadPoolExecutor() as executor: + for attr_name, ns_key in reserved_keys.items(): + if attr_name not in attr_names: + continue + + error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None) + if isinstance(attr_val, ApplicationServices.AXUIElementRef): + future_to_child.append(executor.submit(_create_axui_node, attr_val, nodes, depth + 1, bbox)) + + elif isinstance(attr_val, (AppKit.NSArray, list)): + for child in attr_val: + future_to_child.append(executor.submit(_create_axui_node, child, nodes, depth + 1, bbox)) + + try: + for future in concurrent.futures.as_completed(future_to_child): + result = future.result() + if result is not None: + xml_node.append(result) + except Exception as e: + logger.error(f"Exception occurred: {e}") + + return xml_node + + +@app.route("/accessibility", methods=["GET"]) +def get_accessibility_tree(): + os_name: str = platform.system() + + # AT-SPI works for KDE as well + if os_name == "Linux": + global libreoffice_version_tuple + libreoffice_version_tuple = _get_libreoffice_version() + + desktop: Accessible = pyatspi.Registry.getDesktop(0) + xml_node = lxml.etree.Element("desktop-frame", nsmap=_accessibility_ns_map_ubuntu) + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [executor.submit(_create_atspi_node, app_node, 1) for app_node in desktop] + for future in concurrent.futures.as_completed(futures): + xml_tree = future.result() + xml_node.append(xml_tree) + return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")}) + + elif os_name == "Windows": + # Attention: Windows a11y tree is implemented to be read through `pywinauto` module, however, + # two different backends `win32` and `uia` are supported and different results may be returned + desktop: Desktop = Desktop(backend="uia") + xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_windows) + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [executor.submit(_create_pywinauto_node, wnd, {}, 1) for wnd in desktop.windows()] + for future in concurrent.futures.as_completed(futures): + xml_tree = future.result() + xml_node.append(xml_tree) + return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")}) + + elif os_name == "Darwin": + # TODO: Add Dock and MenuBar + xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_macos) + + with concurrent.futures.ThreadPoolExecutor() as executor: + foreground_windows = [ + win for win in Quartz.CGWindowListCopyWindowInfo( + (Quartz.kCGWindowListExcludeDesktopElements | + Quartz.kCGWindowListOptionOnScreenOnly), + Quartz.kCGNullWindowID + ) if win["kCGWindowLayer"] == 0 and win["kCGWindowOwnerName"] != "Window Server" + ] + dock_info = [ + win for win in Quartz.CGWindowListCopyWindowInfo( + Quartz.kCGWindowListOptionAll, + Quartz.kCGNullWindowID + ) if win.get("kCGWindowName", None) == "Dock" + ] + + futures = [ + executor.submit(_create_axui_node, wnd, None, 0) + for wnd in foreground_windows + dock_info + ] + + for future in concurrent.futures.as_completed(futures): + xml_tree = future.result() + if xml_tree is not None: + xml_node.append(xml_tree) + + return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")}) + + else: + return "Currently not implemented for platform {:}.".format(platform.platform()), 500 + + +@app.route('/screen_size', methods=['POST']) +def get_screen_size(): + if platform_name == "Linux": + d = display.Display() + screen_width = d.screen().width_in_pixels + screen_height = d.screen().height_in_pixels + elif platform_name == "Windows": + user32 = ctypes.windll.user32 + screen_width: int = user32.GetSystemMetrics(0) + screen_height: int = user32.GetSystemMetrics(1) + return jsonify( + { + "width": screen_width, + "height": screen_height + } + ) + + +@app.route('/window_size', methods=['POST']) +def get_window_size(): + if 'app_class_name' in request.form: + app_class_name = request.form['app_class_name'] + else: + return jsonify({"error": "app_class_name is required"}), 400 + + if platform_name != "Linux": + return jsonify({"error": "window_size is only supported on Linux"}), 501 + + d = display.Display() + root = d.screen().root + window_ids = root.get_full_property(d.intern_atom('_NET_CLIENT_LIST'), X.AnyPropertyType).value + + for window_id in window_ids: + try: + window = d.create_resource_object('window', window_id) + wm_class = window.get_wm_class() + + if wm_class is None: + continue + + if app_class_name.lower() in [name.lower() for name in wm_class]: + geom = window.get_geometry() + return jsonify( + { + "width": geom.width, + "height": geom.height + } + ) + except Xlib.error.XError: # Ignore windows that give an error + continue + return None + + +@app.route('/desktop_path', methods=['POST']) +def get_desktop_path(): + # Get the home directory in a platform-independent manner using pathlib + home_directory = str(Path.home()) + + # Determine the desktop path based on the operating system + desktop_path = { + "Windows": os.path.join(home_directory, "Desktop"), + "Darwin": os.path.join(home_directory, "Desktop"), # macOS + "Linux": os.path.join(home_directory, "Desktop") + }.get(platform.system(), None) + + # Check if the operating system is supported and the desktop path exists + if desktop_path and os.path.exists(desktop_path): + return jsonify(desktop_path=desktop_path) + else: + return jsonify(error="Unsupported operating system or desktop path not found"), 404 + + +@app.route('/wallpaper', methods=['POST']) +def get_wallpaper(): + def get_wallpaper_windows(): + SPI_GETDESKWALLPAPER = 0x73 + MAX_PATH = 260 + buffer = ctypes.create_unicode_buffer(MAX_PATH) + ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0) + return buffer.value + + def get_wallpaper_macos(): + script = """ + tell application "System Events" to tell every desktop to get picture + """ + process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, error = process.communicate() + if error: + app.logger.error("Error: %s", error.decode('utf-8')) + return None + return output.strip().decode('utf-8') + + def get_wallpaper_linux(): + try: + output = subprocess.check_output( + ["gsettings", "get", "org.gnome.desktop.background", "picture-uri"], + stderr=subprocess.PIPE + ) + return output.decode('utf-8').strip().replace('file://', '').replace("'", "") + except subprocess.CalledProcessError as e: + app.logger.error("Error: %s", e) + return None + + os_name = platform.system() + wallpaper_path = None + if os_name == 'Windows': + wallpaper_path = get_wallpaper_windows() + elif os_name == 'Darwin': + wallpaper_path = get_wallpaper_macos() + elif os_name == 'Linux': + wallpaper_path = get_wallpaper_linux() + else: + app.logger.error(f"Unsupported OS: {os_name}") + abort(400, description="Unsupported OS") + + if wallpaper_path: + try: + # Ensure the filename is secure + return send_file(wallpaper_path, mimetype='image/png') + except Exception as e: + app.logger.error(f"An error occurred while serving the wallpaper file: {e}") + abort(500, description="Unable to serve the wallpaper file") + else: + abort(404, description="Wallpaper file not found") + + +@app.route('/list_directory', methods=['POST']) +def get_directory_tree(): + def _list_dir_contents(directory): + """ + List the contents of a directory recursively, building a tree structure. + + :param directory: The path of the directory to inspect. + :return: A nested dictionary with the contents of the directory. + """ + tree = {'type': 'directory', 'name': os.path.basename(directory), 'children': []} + try: + # List all files and directories in the current directory + for entry in os.listdir(directory): + full_path = os.path.join(directory, entry) + # If entry is a directory, recurse into it + if os.path.isdir(full_path): + tree['children'].append(_list_dir_contents(full_path)) + else: + tree['children'].append({'type': 'file', 'name': entry}) + except OSError as e: + # If the directory cannot be accessed, return the exception message + tree = {'error': str(e)} + return tree + + # Extract the 'path' parameter from the JSON request + data = request.get_json() + if 'path' not in data: + return jsonify(error="Missing 'path' parameter"), 400 + + start_path = data['path'] + # Ensure the provided path is a directory + if not os.path.isdir(start_path): + return jsonify(error="The provided path is not a directory"), 400 + + # Generate the directory tree starting from the provided path + directory_tree = _list_dir_contents(start_path) + return jsonify(directory_tree=directory_tree) + + +@app.route('/file', methods=['POST']) +def get_file(): + # Retrieve filename from the POST request + if 'file_path' in request.form: + file_path = os.path.expandvars(os.path.expanduser(request.form['file_path'])) + else: + return jsonify({"error": "file_path is required"}), 400 + + try: + # Check if the file exists and get its size + if not os.path.exists(file_path): + return jsonify({"error": "File not found"}), 404 + + file_size = os.path.getsize(file_path) + logger.info(f"Serving file: {file_path} ({file_size} bytes)") + + # Check if the file exists and send it to the user + return send_file(file_path, as_attachment=True) + except FileNotFoundError: + # If the file is not found, return a 404 error + return jsonify({"error": "File not found"}), 404 + except Exception as e: + logger.error(f"Error serving file {file_path}: {e}") + return jsonify({"error": f"Failed to serve file: {str(e)}"}), 500 + + +@app.route("/setup/upload", methods=["POST"]) +def upload_file(): + # Retrieve filename from the POST request + if 'file_path' in request.form and 'file_data' in request.files: + file_path = os.path.expandvars(os.path.expanduser(request.form['file_path'])) + file = request.files["file_data"] + + try: + # Ensure target directory exists + target_dir = os.path.dirname(file_path) + if target_dir: # Only create directory if it's not empty + os.makedirs(target_dir, exist_ok=True) + + # Save file and get size for verification + file.save(file_path) + uploaded_size = os.path.getsize(file_path) + + logger.info(f"File uploaded successfully: {file_path} ({uploaded_size} bytes)") + return f"File Uploaded: {uploaded_size} bytes" + + except Exception as e: + logger.error(f"Error uploading file to {file_path}: {e}") + # Clean up partial file if it exists + if os.path.exists(file_path): + try: + os.remove(file_path) + except: + pass + return jsonify({"error": f"Failed to upload file: {str(e)}"}), 500 + else: + return jsonify({"error": "file_path and file_data are required"}), 400 + + +@app.route('/platform', methods=['GET']) +def get_platform(): + return platform.system() + + +@app.route('/cursor_position', methods=['GET']) +def get_cursor_position(): + pos = pyautogui.position() + return jsonify(pos.x, pos.y) + +@app.route("/setup/change_wallpaper", methods=['POST']) +def change_wallpaper(): + data = request.json + path = data.get('path', None) + + if not path: + return "Path not supplied!", 400 + + path = Path(os.path.expandvars(os.path.expanduser(path))) + + if not path.exists(): + return f"File not found: {path}", 404 + + try: + user_platform = platform.system() + if user_platform == "Windows": + import ctypes + ctypes.windll.user32.SystemParametersInfoW(20, 0, str(path), 3) + elif user_platform == "Linux": + import subprocess + subprocess.run(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", f"file://{path}"]) + elif user_platform == "Darwin": # (Mac OS) + import subprocess + subprocess.run( + ["osascript", "-e", f'tell application "Finder" to set desktop picture to POSIX file "{path}"']) + return "Wallpaper changed successfully" + except Exception as e: + return f"Failed to change wallpaper. Error: {e}", 500 + + +@app.route("/setup/download_file", methods=['POST']) +def download_file(): + data = request.json + url = data.get('url', None) + path = data.get('path', None) + + if not url or not path: + return "Path or URL not supplied!", 400 + + path = Path(os.path.expandvars(os.path.expanduser(path))) + path.parent.mkdir(parents=True, exist_ok=True) + + max_retries = 3 + error: Optional[Exception] = None + + for i in range(max_retries): + try: + logger.info(f"Download attempt {i+1}/{max_retries} for {url}") + response = requests.get(url, stream=True, timeout=300) + response.raise_for_status() + + # Get expected file size if available + total_size = int(response.headers.get('content-length', 0)) + if total_size > 0: + logger.info(f"Expected file size: {total_size / (1024*1024):.2f} MB") + + downloaded_size = 0 + with open(path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + downloaded_size += len(chunk) + if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB + progress = (downloaded_size / total_size) * 100 + logger.info(f"Download progress: {progress:.1f}%") + + # Verify download completeness + actual_size = os.path.getsize(path) + if total_size > 0 and actual_size != total_size: + raise Exception(f"Download incomplete. Expected {total_size} bytes, got {actual_size} bytes") + + logger.info(f"File downloaded successfully: {path} ({actual_size} bytes)") + return f"File downloaded successfully: {actual_size} bytes" + + except (requests.RequestException, Exception) as e: + error = e + logger.error(f"Failed to download {url}: {e}. Retrying... ({max_retries - i - 1} attempts left)") + # Clean up partial download + if path.exists(): + try: + path.unlink() + except: + pass + + return f"Failed to download {url}. No retries left. Error: {error}", 500 + + +@app.route("/setup/open_file", methods=['POST']) +def open_file(): + data = request.json + path = data.get('path', None) + + if not path: + return "Path not supplied!", 400 + + path_obj = Path(os.path.expandvars(os.path.expanduser(path))) + + # Check if it's a file path that exists + is_file_path = path_obj.exists() + + # If it's not a file path, treat it as an application name/command + if not is_file_path: + # Check if it's a valid command by trying to find it in PATH + import shutil + if not shutil.which(path): + return f"Application/file not found: {path}", 404 + + try: + if is_file_path: + # Handle file opening + if platform.system() == "Windows": + os.startfile(path_obj) + else: + open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open" + subprocess.Popen([open_cmd, str(path_obj)]) + file_name = path_obj.name + file_name_without_ext, _ = os.path.splitext(file_name) + else: + # Handle application launching + if platform.system() == "Windows": + subprocess.Popen([path]) + else: + subprocess.Popen([path]) + file_name = path + file_name_without_ext = path + + # Wait for the file/application to open + + start_time = time.time() + window_found = False + + while time.time() - start_time < TIMEOUT: + os_name = platform.system() + if os_name in ['Windows', 'Darwin']: + import pygetwindow as gw + # Check for window title containing file name or file name without extension + windows = gw.getWindowsWithTitle(file_name) + if not windows: + windows = gw.getWindowsWithTitle(file_name_without_ext) + + if windows: + # To be more specific, we can try to activate it + windows[0].activate() + window_found = True + break + elif os_name == 'Linux': + try: + # Using wmctrl to list windows and check if any window title contains the filename + result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True) + window_list = result.stdout.strip().split('\n') + if not result.stdout.strip(): + pass # No windows, just continue waiting + else: + for window in window_list: + if file_name in window or file_name_without_ext in window: + # a window is found, now activate it + window_id = window.split()[0] + subprocess.run(['wmctrl', '-i', '-a', window_id], check=True) + window_found = True + break + if window_found: + break + except (subprocess.CalledProcessError, FileNotFoundError): + # wmctrl might not be installed or the window manager isn't ready. + # We just log it once and let the main loop retry. + if 'wmctrl_failed_once' not in locals(): + logger.warning("wmctrl command is not ready, will keep retrying...") + wmctrl_failed_once = True + pass # Let the outer loop retry + + time.sleep(1) + + if window_found: + return "File opened and window activated successfully" + else: + return f"Failed to find window for {file_name} within {TIMEOUT} seconds.", 500 + + except Exception as e: + return f"Failed to open {path}. Error: {e}", 500 + + +@app.route("/setup/activate_window", methods=['POST']) +def activate_window(): + data = request.json + window_name = data.get('window_name', None) + if not window_name: + return "window_name required", 400 + strict: bool = data.get("strict", False) # compare case-sensitively and match the whole string + by_class_name: bool = data.get("by_class", False) + + os_name = platform.system() + + if os_name == 'Windows': + import pygetwindow as gw + if by_class_name: + return "Get window by class name is not supported on Windows currently.", 500 + windows: List[gw.Window] = gw.getWindowsWithTitle(window_name) + + window: Optional[gw.Window] = None + if len(windows) == 0: + return "Window {:} not found (empty results)".format(window_name), 404 + elif strict: + for wnd in windows: + if wnd.title == wnd: + window = wnd + if window is None: + return "Window {:} not found (strict mode).".format(window_name), 404 + else: + window = windows[0] + window.activate() + + elif os_name == 'Darwin': + import pygetwindow as gw + if by_class_name: + return "Get window by class name is not supported on macOS currently.", 500 + # Find the VS Code window + windows = gw.getWindowsWithTitle(window_name) + + window: Optional[gw.Window] = None + if len(windows) == 0: + return "Window {:} not found (empty results)".format(window_name), 404 + elif strict: + for wnd in windows: + if wnd.title == wnd: + window = wnd + if window is None: + return "Window {:} not found (strict mode).".format(window_name), 404 + else: + window = windows[0] + + # Un-minimize the window and then bring it to the front + window.unminimize() + window.activate() + + elif os_name == 'Linux': + # Attempt to activate VS Code window using wmctrl + subprocess.run(["wmctrl" + , "-{:}{:}a".format("x" if by_class_name else "" + , "F" if strict else "" + ) + , window_name + ] + ) + + else: + return f"Operating system {os_name} not supported.", 400 + + return "Window activated successfully", 200 + + +@app.route("/setup/close_window", methods=["POST"]) +def close_window(): + data = request.json + if "window_name" not in data: + return "window_name required", 400 + window_name: str = data["window_name"] + strict: bool = data.get("strict", False) # compare case-sensitively and match the whole string + by_class_name: bool = data.get("by_class", False) + + os_name: str = platform.system() + if os_name == "Windows": + import pygetwindow as gw + + if by_class_name: + return "Get window by class name is not supported on Windows currently.", 500 + windows: List[gw.Window] = gw.getWindowsWithTitle(window_name) + + window: Optional[gw.Window] = None + if len(windows) == 0: + return "Window {:} not found (empty results)".format(window_name), 404 + elif strict: + for wnd in windows: + if wnd.title == wnd: + window = wnd + if window is None: + return "Window {:} not found (strict mode).".format(window_name), 404 + else: + window = windows[0] + window.close() + elif os_name == "Linux": + subprocess.run(["wmctrl" + , "-{:}{:}c".format("x" if by_class_name else "" + , "F" if strict else "" + ) + , window_name + ] + ) + elif os_name == "Darwin": + import pygetwindow as gw + return "Currently not supported on macOS.", 500 + else: + return "Not supported platform {:}".format(os_name), 500 + + return "Window closed successfully.", 200 + + +@app.route('/start_recording', methods=['POST']) +def start_recording(): + global recording_process + if recording_process and recording_process.poll() is None: + return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400 + + # Clean up previous recording if it exists + if os.path.exists(recording_path): + try: + os.remove(recording_path) + except OSError as e: + logger.error(f"Error removing old recording file: {e}") + return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500 + + if platform_name == "Linux": + d = display.Display() + screen_width = d.screen().width_in_pixels + screen_height = d.screen().height_in_pixels + start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}" + elif platform_name == "Windows": + user32 = ctypes.windll.user32 + screen_width = user32.GetSystemMetrics(0) + screen_height = user32.GetSystemMetrics(1) + # Use gdigrab for Windows screen capture + start_command = f"ffmpeg -y -f gdigrab -draw_mouse 1 -framerate 30 -video_size {screen_width}x{screen_height} -i desktop -c:v libx264 -r 30 {recording_path}" + else: + return jsonify({'status': 'error', 'message': f'Recording not supported on {platform_name}'}), 501 + + # Use stderr=PIPE to capture potential errors from ffmpeg + recording_process = subprocess.Popen(shlex.split(start_command), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True # To get stderr as string + ) + + # Wait a couple of seconds to see if ffmpeg starts successfully + try: + # Wait for 2 seconds. If ffmpeg exits within this time, it's an error. + recording_process.wait(timeout=2) + # If wait() returns, it means the process has terminated. + error_output = recording_process.stderr.read() + return jsonify({ + 'status': 'error', + 'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}' + }), 500 + except subprocess.TimeoutExpired: + # This is the expected outcome: the process is still running after 2 seconds. + return jsonify({'status': 'success', 'message': 'Started recording successfully.'}) + + +@app.route('/end_recording', methods=['POST']) +def end_recording(): + global recording_process + + if not recording_process or recording_process.poll() is not None: + recording_process = None # Clean up stale process object + return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400 + + error_output = "" + try: + # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file. + # On Windows, use CTRL_C_EVENT; on Unix, use SIGINT + if platform_name == "Windows": + # On Windows, we need to terminate the process gracefully + # ffmpeg responds to standard input 'q' to quit gracefully + try: + recording_process.stdin.write(b'q') + recording_process.stdin.flush() + except: + # If stdin is not available, use terminate + recording_process.terminate() + else: + recording_process.send_signal(signal.SIGINT) + # Wait for ffmpeg to terminate. communicate() gets output and waits. + _, error_output = recording_process.communicate(timeout=15) + except subprocess.TimeoutExpired: + logger.error("ffmpeg did not respond to stop signal, killing the process.") + recording_process.kill() + # After killing, communicate to get any remaining output. + _, error_output = recording_process.communicate() + recording_process = None + return jsonify({ + 'status': 'error', + 'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}' + }), 500 + + recording_process = None # Clear the process from global state + + # Check if the recording file was created and is not empty. + if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0: + return send_file(recording_path, as_attachment=True) + else: + logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") + return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") + + +@app.route("/run_python", methods=['POST']) +def run_python(): + data = request.json + code = data.get('code', None) + + if not code: + return jsonify({'status': 'error', 'message': 'Code not supplied!'}), 400 + + # Create a temporary file to save the Python code + import tempfile + import uuid + + # Generate unique filename + temp_filename = f"/tmp/python_exec_{uuid.uuid4().hex}.py" + + try: + # Write code to temporary file + with open(temp_filename, 'w') as f: + f.write(code) + + # Execute the file using subprocess to capture all output + # Use sys.executable to use the same Python interpreter as the Flask server + result = subprocess.run( + [sys.executable, temp_filename], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=30 # 30 second timeout + ) + + # Clean up the temporary file + try: + os.remove(temp_filename) + except: + pass # Ignore cleanup errors + + # Prepare response + output = result.stdout + error_output = result.stderr + + # Combine output and errors if both exist + combined_message = output + if error_output: + combined_message += ('\n' + error_output) if output else error_output + + # Determine status based on return code and errors + if result.returncode != 0: + status = 'error' + if not error_output: + # If no stderr but non-zero return code, add a generic error message + error_output = f"Process exited with code {result.returncode}" + combined_message = combined_message + '\n' + error_output if combined_message else error_output + else: + status = 'success' + + return jsonify({ + 'status': status, + 'message': combined_message, + 'need_more': False, # Not applicable for file execution + 'output': output, # stdout only + 'error': error_output, # stderr only + 'return_code': result.returncode + }) + + except subprocess.TimeoutExpired: + # Clean up the temporary file on timeout + try: + os.remove(temp_filename) + except: + pass + + return jsonify({ + 'status': 'error', + 'message': 'Execution timeout: Code took too long to execute', + 'error': 'TimeoutExpired', + 'need_more': False, + 'output': None, + }), 500 + + except Exception as e: + # Clean up the temporary file on error + try: + os.remove(temp_filename) + except: + pass + + # Capture the exception details + return jsonify({ + 'status': 'error', + 'message': f'Execution error: {str(e)}', + 'error': traceback.format_exc(), + 'need_more': False, + 'output': None, + }), 500 + + +@app.route("/run_bash_script", methods=['POST']) +def run_bash_script(): + data = request.json + script = data.get('script', None) + timeout = data.get('timeout', 100) # Default timeout of 30 seconds + working_dir = data.get('working_dir', None) + + if not script: + return jsonify({ + 'status': 'error', + 'output': 'Script not supplied!', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 400 + + # Expand user directory if provided + if working_dir: + working_dir = os.path.expanduser(working_dir) + if not os.path.exists(working_dir): + return jsonify({ + 'status': 'error', + 'output': f'Working directory does not exist: {working_dir}', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 400 + + # Create a temporary script file + import tempfile + with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as tmp_file: + if "#!/bin/bash" not in script: + script = "#!/bin/bash\n\n" + script + tmp_file.write(script) + tmp_file_path = tmp_file.name + + try: + # Make the script executable + os.chmod(tmp_file_path, 0o755) + + # Execute the script + if platform_name == "Windows": + # On Windows, use Git Bash or WSL if available, otherwise cmd + flags = subprocess.CREATE_NO_WINDOW + # Try to use bash if available (Git Bash, WSL, etc.) + result = subprocess.run( + ['bash', tmp_file_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout + text=True, + timeout=timeout, + cwd=working_dir, + creationflags=flags, + shell=False + ) + else: + # On Unix-like systems, use bash directly + flags = 0 + result = subprocess.run( + ['/bin/bash', tmp_file_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout + text=True, + timeout=timeout, + cwd=working_dir, + creationflags=flags, + shell=False + ) + + # Log the command execution for trajectory recording + _append_event("BashScript", + {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode}, + ts=time.time()) + + return jsonify({ + 'status': 'success' if result.returncode == 0 else 'error', + 'output': result.stdout, # Contains both stdout and stderr merged + 'error': "", # Always empty as requested + 'returncode': result.returncode + }) + + except subprocess.TimeoutExpired: + return jsonify({ + 'status': 'error', + 'output': f'Script execution timed out after {timeout} seconds', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 500 + except FileNotFoundError: + # Bash not found, try with sh + try: + result = subprocess.run( + ['sh', tmp_file_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout + text=True, + timeout=timeout, + cwd=working_dir, + shell=False + ) + + _append_event("BashScript", + {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode}, + ts=time.time()) + + return jsonify({ + 'status': 'success' if result.returncode == 0 else 'error', + 'output': result.stdout, # Contains both stdout and stderr merged + 'error': "", # Always empty as requested + 'returncode': result.returncode, + }) + except Exception as e: + return jsonify({ + 'status': 'error', + 'output': f'Failed to execute script: {str(e)}', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 500 + except Exception as e: + return jsonify({ + 'status': 'error', + 'output': f'Failed to execute script: {str(e)}', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 500 + finally: + # Clean up the temporary file + try: + os.unlink(tmp_file_path) + except: + pass + +if __name__ == '__main__': + app.run(debug=True, host="0.0.0.0") diff --git a/desktop_env/server_win7/start_flask.bat b/desktop_env/server_win7/start_flask.bat new file mode 100644 index 0000000..14f0fff --- /dev/null +++ b/desktop_env/server_win7/start_flask.bat @@ -0,0 +1,3 @@ +@echo off +cd /d D:\python_server +start /B pythonw D:\python_server\main.py diff --git a/desktop_env/server_win7/transfer_to_win7.sh b/desktop_env/server_win7/transfer_to_win7.sh new file mode 100755 index 0000000..8612422 --- /dev/null +++ b/desktop_env/server_win7/transfer_to_win7.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# Mac 端传输脚本 —— 将 Win7 离线包通过 HTTP 共享,Win7 用浏览器或 PowerShell 下载 +# 用法: bash transfer_to_win7.sh + +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PKG_DIR="$SCRIPT_DIR/win7_offline_packages" +WIN7_IP="192.168.1.11" +MAC_IP="192.168.1.10" +PORT=8888 + +echo "========================================" +echo " Mac → Win7 离线包传输工具" +echo " Mac: $MAC_IP Win7: $WIN7_IP" +echo "========================================" +echo "" + +# 检查连通性 +echo "[1/3] 检测 Win7 网络连通性..." +if ping -c 2 -W 1000 "$WIN7_IP" > /dev/null 2>&1; then + echo " [OK] Win7 ($WIN7_IP) 可达" +else + echo " [警告] ping 超时,请确认网线已连接且 IP 配置正确" +fi + +# 打包 +echo "" +echo "[2/3] 打包离线文件..." +cd "$SCRIPT_DIR" +zip -r win7_server_packages.zip win7_offline_packages/ 2>/dev/null +echo " [OK] 已打包 → $(pwd)/win7_server_packages.zip" +echo " [OK] 解压脚本 → $(pwd)/extract_and_install.bat" + +# 启动 HTTP 服务器 +echo "" +echo "[3/3] 启动文件共享服务器..." +echo "" +echo " ┌──────────────────────────────────────────────────────────┐" +echo " │ 在 Win7 浏览器(IE)中打开: │" +echo " │ http://$MAC_IP:$PORT/ │" +echo " │ │" +echo " │ ★ 第一步:下载 extract_and_install.bat(小文件,先下) │" +echo " │ ★ 第二步:下载 win7_server_packages.zip(大文件) │" +echo " │ ★ 第三步:把两个文件放同一目录,双击 bat 文件 │" +echo " │ (会自动解压 + 自动安装所有依赖) │" +echo " │ │" +echo " │ 如果无法双击 bat → 在命令提示符(cmd)中运行: │" +echo " │ cd 下载目录路径 │" +echo " │ extract_and_install.bat │" +echo " └──────────────────────────────────────────────────────────┘" +echo "" +echo " 按 Ctrl+C 停止服务器" +echo "" + +# 在 zip 所在目录启动 HTTP 服务 +cd "$SCRIPT_DIR" +python3 -m http.server $PORT diff --git a/desktop_env/server_win7/win7_offline_packages/MarkupSafe-2.1.5-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/MarkupSafe-2.1.5-cp38-cp38-win32.whl new file mode 100644 index 0000000..3b25326 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/MarkupSafe-2.1.5-cp38-cp38-win32.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/MouseInfo-0.1.3.tar.gz b/desktop_env/server_win7/win7_offline_packages/MouseInfo-0.1.3.tar.gz new file mode 100644 index 0000000..b71965f Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/MouseInfo-0.1.3.tar.gz differ diff --git a/desktop_env/server_win7/win7_offline_packages/PyAutoGUI-0.9.54.tar.gz b/desktop_env/server_win7/win7_offline_packages/PyAutoGUI-0.9.54.tar.gz new file mode 100644 index 0000000..a79e0d1 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/PyAutoGUI-0.9.54.tar.gz differ diff --git a/desktop_env/server_win7/win7_offline_packages/PyGetWindow-0.0.9.tar.gz b/desktop_env/server_win7/win7_offline_packages/PyGetWindow-0.0.9.tar.gz new file mode 100644 index 0000000..085b37e Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/PyGetWindow-0.0.9.tar.gz differ diff --git a/desktop_env/server_win7/win7_offline_packages/blinker-1.8.2-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/blinker-1.8.2-py3-none-any.whl new file mode 100644 index 0000000..f8cb040 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/blinker-1.8.2-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/certifi-2026.2.25-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/certifi-2026.2.25-py3-none-any.whl new file mode 100644 index 0000000..9d7fbf0 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/certifi-2026.2.25-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/charset_normalizer-3.4.5-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/charset_normalizer-3.4.5-cp38-cp38-win32.whl new file mode 100644 index 0000000..6b1c062 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/charset_normalizer-3.4.5-cp38-cp38-win32.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/click-8.1.8-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/click-8.1.8-py3-none-any.whl new file mode 100644 index 0000000..db2c6b3 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/click-8.1.8-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/flask-3.0.3-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/flask-3.0.3-py3-none-any.whl new file mode 100644 index 0000000..cc25189 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/flask-3.0.3-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/idna-3.11-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/idna-3.11-py3-none-any.whl new file mode 100644 index 0000000..28f2c10 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/idna-3.11-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/importlib_metadata-8.5.0-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/importlib_metadata-8.5.0-py3-none-any.whl new file mode 100644 index 0000000..7f0acc0 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/importlib_metadata-8.5.0-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/install_win7.bat b/desktop_env/server_win7/win7_offline_packages/install_win7.bat new file mode 100644 index 0000000..3cca161 --- /dev/null +++ b/desktop_env/server_win7/win7_offline_packages/install_win7.bat @@ -0,0 +1,34 @@ +@echo off +echo ======================================== +echo Offline Install for Win7 32bit Py3.8 +echo ======================================== +echo. + +python --version 2>&1 | findstr /C:"3.8" +if errorlevel 1 ( + echo [ERROR] Python 3.8 not found. + pause + exit /b 1 +) +echo [OK] Python 3.8 detected. + +echo. +echo [Step 1] Installing build tools first... +cd /d "%~dp0" +python -m pip install --no-index --find-links . setuptools wheel +echo [Step 2] Installing all packages... +python -m pip install --no-index --find-links . six zipp importlib_metadata certifi idna urllib3 charset_normalizer requests MarkupSafe itsdangerous click blinker werkzeug jinja2 flask pywin32 numpy Pillow lxml pygame pymsgbox pytweening pyscreeze PyGetWindow MouseInfo PyAutoGUI pynput pywinauto + +echo. +echo [Step 2] Verifying... +python -c "import flask; print('[OK] flask ' + flask.__version__)" +python -c "import pyautogui; print('[OK] pyautogui')" +python -c "import pywinauto; print('[OK] pywinauto')" +python -c "import numpy; print('[OK] numpy ' + numpy.__version__)" +python -c "import PIL; print('[OK] Pillow ' + PIL.__version__)" + +echo. +echo ======================================== +echo Done! Run: python ..\main.py +echo ======================================== +pause diff --git a/desktop_env/server_win7/win7_offline_packages/itsdangerous-2.2.0-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/itsdangerous-2.2.0-py3-none-any.whl new file mode 100644 index 0000000..359ea0c Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/itsdangerous-2.2.0-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/jinja2-3.1.6-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/jinja2-3.1.6-py3-none-any.whl new file mode 100644 index 0000000..5046d77 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/jinja2-3.1.6-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/lxml-6.0.2-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/lxml-6.0.2-cp38-cp38-win32.whl new file mode 100644 index 0000000..ab63b77 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/lxml-6.0.2-cp38-cp38-win32.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/main_win7.py b/desktop_env/server_win7/win7_offline_packages/main_win7.py new file mode 100644 index 0000000..bc9e9be --- /dev/null +++ b/desktop_env/server_win7/win7_offline_packages/main_win7.py @@ -0,0 +1,2008 @@ +import ctypes +import os +import platform +import shlex +import json +import subprocess, signal +import sys +import time +from pathlib import Path +from typing import Any, Optional, Sequence +from typing import List, Dict, Tuple, Literal +import concurrent.futures + +import lxml.etree +import pyautogui +import requests +import re +from PIL import Image, ImageGrab +from flask import Flask, request, jsonify, send_file, abort # , send_from_directory +from lxml.etree import _Element + +platform_name: str = platform.system() + +if platform_name == "Linux": + import Xlib + from Xlib import display, X + from pyxcursor import Xcursor + import pyatspi + from pyatspi import Accessible, StateType, STATE_SHOWING + from pyatspi import Action as ATAction + from pyatspi import Component # , Document + from pyatspi import Text as ATText + from pyatspi import Value as ATValue + + BaseWrapper = Any + +elif platform_name == "Windows": + from pywinauto import Desktop + from pywinauto.base_wrapper import BaseWrapper + import pywinauto.application + import win32ui, win32gui + + Accessible = Any + Xlib = None + display = None + X = None + Xcursor = None + +elif platform_name == "Darwin": + import plistlib + from pyxcursor import Xcursor + + import AppKit + import ApplicationServices + import Foundation + import Quartz + import oa_atomacos + + Accessible = Any + BaseWrapper = Any + Xlib = None + +else: + # Platform not supported + Accessible = None + BaseWrapper = Any + Xlib = None + display = None + X = None + Xcursor = None + +# todo: need to reformat and organize this whole file + +app = Flask(__name__) + +pyautogui.PAUSE = 0 +pyautogui.DARWIN_CATCH_UP_TIME = 0 + +TIMEOUT = 1800 # seconds + +logger = app.logger +recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process +recording_path = "/tmp/recording.mp4" + + +@app.route('/setup/execute', methods=['POST']) +@app.route('/execute', methods=['POST']) +def execute_command(): + data = request.json + # The 'command' key in the JSON request should contain the command to be executed. + shell = data.get('shell', False) + command = data.get('command', "" if shell else []) + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + # Replace 'python' with sys.executable to use the same Python interpreter as the server + if len(command) > 0 and command[0] in ['python', 'python3', 'python.exe', 'python3.exe']: + command[0] = sys.executable + + # On Windows, if the command is `python -c ""`, exec() the code directly in this + # process instead of spawning a subprocess. Subprocesses launched from a windowless + # pythonw.exe parent cannot access the interactive desktop, so pyautogui calls fail + # with a 500 inside a subprocess. Running exec() in the Flask process is safe because + # the Flask server itself already has desktop access (proven by /screenshot working). + if (platform_name == "Windows" + and not shell + and len(command) >= 3 + and command[0] == sys.executable + and command[1] == "-c"): + code_str = command[2] + import io + from contextlib import redirect_stdout, redirect_stderr + stdout_buf = io.StringIO() + stderr_buf = io.StringIO() + try: + exec_globals = {"__builtins__": __builtins__} + with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf): + exec(compile(code_str, "", "exec"), exec_globals) + return jsonify({ + 'status': 'success', + 'output': stdout_buf.getvalue(), + 'error': stderr_buf.getvalue(), + 'returncode': 0 + }) + except Exception as e: + import traceback + return jsonify({ + 'status': 'error', + 'output': stdout_buf.getvalue(), + 'error': traceback.format_exc(), + 'returncode': 1 + }) + + # Execute the command without any safety checks. + try: + if platform_name == "Windows": + flags = subprocess.CREATE_NO_WINDOW + else: + flags = 0 + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=shell, + text=True, + timeout=120, + creationflags=flags, + ) + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode + }) + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': str(e) + }), 500 + + +@app.route('/setup/execute_with_verification', methods=['POST']) +@app.route('/execute_with_verification', methods=['POST']) +def execute_command_with_verification(): + """Execute command and verify the result based on provided verification criteria""" + data = request.json + shell = data.get('shell', False) + command = data.get('command', "" if shell else []) + verification = data.get('verification', {}) + max_wait_time = data.get('max_wait_time', 10) # Maximum wait time in seconds + check_interval = data.get('check_interval', 1) # Check interval in seconds + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + # Execute the main command + try: + if platform_name == "Windows": + flags = subprocess.CREATE_NO_WINDOW + else: + flags = 0 + result = subprocess.run( + command, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + shell=shell, + text=True, + timeout=120, + creationflags=flags, + ) + + # If no verification is needed, return immediately + if not verification: + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode + }) + + # Wait and verify the result + import time + start_time = time.time() + while time.time() - start_time < max_wait_time: + verification_passed = True + + # Check window existence if specified + if 'window_exists' in verification: + window_name = verification['window_exists'] + try: + if platform_name == 'Linux': + wmctrl_result = subprocess.run(['wmctrl', '-l'], + capture_output=True, text=True, check=True) + if window_name.lower() not in wmctrl_result.stdout.lower(): + verification_passed = False + elif platform_name in ['Windows', 'Darwin']: + import pygetwindow as gw + windows = gw.getWindowsWithTitle(window_name) + if not windows: + verification_passed = False + except Exception: + verification_passed = False + + # Check command execution if specified + if 'command_success' in verification: + verify_cmd = verification['command_success'] + try: + verify_result = subprocess.run(verify_cmd, shell=True, + capture_output=True, text=True, timeout=5) + if verify_result.returncode != 0: + verification_passed = False + except Exception: + verification_passed = False + + if verification_passed: + return jsonify({ + 'status': 'success', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode, + 'verification': 'passed', + 'wait_time': time.time() - start_time + }) + + time.sleep(check_interval) + + # Verification failed + return jsonify({ + 'status': 'verification_failed', + 'output': result.stdout, + 'error': result.stderr, + 'returncode': result.returncode, + 'verification': 'failed', + 'wait_time': max_wait_time + }), 500 + + except Exception as e: + return jsonify({ + 'status': 'error', + 'message': str(e) + }), 500 + + +def _get_machine_architecture() -> str: + """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc. + """ + architecture = platform.machine().lower() + if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']: + return 'amd' + elif architecture in ['arm64', 'aarch64', 'aarch32']: + return 'arm' + else: + return 'unknown' + + +@app.route('/setup/launch', methods=["POST"]) +def launch_app(): + data = request.json + shell = data.get("shell", False) + command: List[str] = data.get("command", "" if shell else []) + + if isinstance(command, str) and not shell: + command = shlex.split(command) + + # Expand user directory + for i, arg in enumerate(command): + if arg.startswith("~/"): + command[i] = os.path.expanduser(arg) + + try: + if 'google-chrome' in command and _get_machine_architecture() == 'arm': + index = command.index('google-chrome') + command[index] = 'chromium' # arm64 chrome is not available yet, can only use chromium + + # On Windows, use os.startfile() for direct exe/file launches (equivalent to double-click). + # Popen lacks the Shell context (ShellExecute) that some apps require to initialize correctly. + if platform_name == "Windows" and not shell: + exe = command[0] if isinstance(command, list) else command + exe_dir = os.path.dirname(exe) + # os.startfile uses ShellExecuteEx — same as Explorer double-click + os.startfile(exe) + return "{:} launched successfully via startfile".format(exe) + + # Set cwd to the exe's directory so apps that rely on relative paths work correctly + cwd = None + if not shell and isinstance(command, list) and len(command) > 0: + exe_path = command[0] + exe_dir = os.path.dirname(exe_path) + if exe_dir and os.path.isdir(exe_dir): + cwd = exe_dir + subprocess.Popen(command, shell=shell, cwd=cwd) + return "{:} launched successfully".format(command if shell else " ".join(command)) + except Exception as e: + return jsonify({"status": "error", "message": str(e)}), 500 + + +@app.route('/screenshot', methods=['GET']) +def capture_screen_with_cursor(): + file_path = os.path.join(os.path.dirname(__file__), "screenshots", "screenshot.png") + user_platform = platform.system() + + # Ensure the screenshots directory exists + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + if user_platform == "Windows": + def get_cursor(): + hcursor = win32gui.GetCursorInfo()[1] + hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0)) + hbmp = win32ui.CreateBitmap() + hbmp.CreateCompatibleBitmap(hdc, 36, 36) + hdc = hdc.CreateCompatibleDC() + hdc.SelectObject(hbmp) + hdc.DrawIcon((0,0), hcursor) + + bmpinfo = hbmp.GetInfo() + bmpstr = hbmp.GetBitmapBits(True) + cursor = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1).convert("RGBA") + + win32gui.DestroyIcon(hcursor) + win32gui.DeleteObject(hbmp.GetHandle()) + hdc.DeleteDC() + + pixdata = cursor.load() + + width, height = cursor.size + for y in range(height): + for x in range(width): + if pixdata[x, y] == (0, 0, 0, 255): + pixdata[x, y] = (0, 0, 0, 0) + + hotspot = win32gui.GetIconInfo(hcursor)[1:3] + + return (cursor, hotspot) + + # Win8.1+ uses shcore; Win7 fallback uses GetDeviceCaps via GDI + try: + ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100 + except (OSError, AttributeError): + hdc = ctypes.windll.user32.GetDC(0) + logical_dpi = ctypes.windll.gdi32.GetDeviceCaps(hdc, 88) # LOGPIXELSX + ctypes.windll.user32.ReleaseDC(0, hdc) + ratio = logical_dpi / 96.0 # 96 DPI == 100% scaling + + # get logical screen size + user32 = ctypes.windll.user32 + logical_width = user32.GetSystemMetrics(0) + logical_height = user32.GetSystemMetrics(1) + + # ===== Key fix: get cursor position before taking screenshot ===== + # win32gui.GetCursorPos() returns logical coordinates (consistent with pyautogui) + pos_win = win32gui.GetCursorPos() + logger.info(f"Cursor position (logical coordinates): {pos_win}") + + # Take screenshot immediately to reduce time difference + img = ImageGrab.grab(bbox=None, include_layered_windows=True) + # ============================================= + + # ===== DPI scaling fix ===== + if ratio != 1.0: + physical_width, physical_height = img.size + logger.info(f"Detected DPI scaling: {ratio}x ({ratio*100}%)") + logger.info(f"Physical screenshot size: {physical_width}x{physical_height}") + logger.info(f"Logical resolution: {logical_width}x{logical_height}") + logger.info(f"Resizing screenshot to match logical resolution...") + img = img.resize((logical_width, logical_height), Image.Resampling.LANCZOS) + logger.info(f"Screenshot resized to: {img.size}") + # ========================== + + try: + cursor, (hotspotx, hotspoty) = get_cursor() + + # ===== Cursor position handling ===== + # win32gui.GetCursorPos() and pyautogui both use logical coordinates + # The screenshot has been resized to logical resolution, so use directly + logical_cursor_x = pos_win[0] + logical_cursor_y = pos_win[1] + + pos = (logical_cursor_x - hotspotx, logical_cursor_y - hotspoty) + + logger.info(f"Cursor position (logical coordinates): ({logical_cursor_x}, {logical_cursor_y})") + logger.info(f"Hotspot offset: ({hotspotx}, {hotspoty})") + logger.info(f"Final paste position: {pos}") + # =================================== + + img.paste(cursor, pos, cursor) + except Exception as e: + logger.warning(f"Failed to capture cursor on Windows, screenshot will not include cursor. Error: {e}") + + img.save(file_path) + + elif user_platform == "Linux": + cursor_obj = Xcursor() + imgarray = cursor_obj.getCursorImageArrayFast() + cursor_img = Image.fromarray(imgarray) + screenshot = pyautogui.screenshot() + cursor_x, cursor_y = pyautogui.position() + screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img) + screenshot.save(file_path) + + elif user_platform == "Darwin": # (Mac OS) + subprocess.run(["screencapture", "-C", file_path]) + + else: + logger.warning(f"The platform you're using ({user_platform}) is not currently supported") + + return send_file(file_path, mimetype='image/png') + + + +def _has_active_terminal(desktop: Accessible) -> bool: + """ A quick check whether the terminal window is open and active (Linux only). + """ + for app in desktop: + if app.getRoleName() == "application" and app.name == "gnome-terminal-server": + for frame in app: + if frame.getRoleName() == "frame" and frame.getState().contains(pyatspi.STATE_ACTIVE): + return True + return False + + +def _get_windows_terminal_output() -> Optional[str]: + """ Get terminal output on Windows platform. + Supports Windows Terminal, PowerShell, Command Prompt, and ConHost. + """ + try: + from pywinauto import Desktop + from pywinauto.findwindows import ElementNotFoundError + + desktop = Desktop(backend="uia") + + # Common terminal applications on Windows + terminal_apps = [ + "WindowsTerminal.exe", # Windows Terminal + "powershell.exe", # PowerShell + "pwsh.exe", # PowerShell Core + "cmd.exe", # Command Prompt + "conhost.exe" # Console Host + ] + + # Try to find active terminal windows + for window in desktop.windows(): + try: + # Check if window is visible and not minimized + if not window.is_visible() or window.is_minimized(): + continue + + # Get window process name + process_name = window.element_info.name.lower() + + # Check if this is a terminal window + is_terminal = False + for term_app in terminal_apps: + if term_app.lower() in process_name or \ + any(term_name in process_name for term_name in ['terminal', 'powershell', 'command prompt', 'cmd']): + is_terminal = True + break + + if not is_terminal: + continue + + # Try to get text content from the terminal + # First, try to find console/edit controls that contain the output + try: + # For Windows Terminal and modern consoles + # Look for Edit or Document controls that contain the text + text_controls = window.descendants(control_type="Edit") + if not text_controls: + text_controls = window.descendants(control_type="Document") + if not text_controls: + text_controls = window.descendants(control_type="Text") + + for control in text_controls: + try: + text = control.window_text() + if text and len(text.strip()) > 0: + return text.rstrip() + except: + pass + + # If no text controls found, try to get the window text directly + window_text = window.window_text() + if window_text and len(window_text.strip()) > 0: + # Filter out just the window title + if window_text not in ['Windows PowerShell', 'Command Prompt', 'PowerShell', 'Administrator: Windows PowerShell']: + return window_text.rstrip() + + except Exception as e: + logger.debug(f"Error getting text from window {process_name}: {e}") + continue + + except Exception as e: + logger.debug(f"Error processing window: {e}") + continue + + return None + + except Exception as e: + logger.error(f"Error in _get_windows_terminal_output: {e}") + return None + + +@app.route('/terminal', methods=['GET']) +def get_terminal_output(): + user_platform = platform.system() + output: Optional[str] = None + try: + if user_platform == "Linux": + desktop: Accessible = pyatspi.Registry.getDesktop(0) + if _has_active_terminal(desktop): + desktop_xml: _Element = _create_atspi_node(desktop) + # 1. the terminal window (frame of application is st:active) is open and active + # 2. the terminal tab (terminal status is st:focused) is focused + xpath = '//application[@name="gnome-terminal-server"]/frame[@st:active="true"]//terminal[@st:focused="true"]' + terminals: List[_Element] = desktop_xml.xpath(xpath, namespaces=_accessibility_ns_map_ubuntu) + output = terminals[0].text.rstrip() if len(terminals) == 1 else None + elif user_platform == "Windows": + output = _get_windows_terminal_output() + logger.debug(f"Terminal output retrieved: {output}") + else: # macOS platform is not implemented currently + return "Currently not implemented for platform {:}.".format(platform.platform()), 500 + return jsonify({"output": output, "status": "success"}) + except Exception as e: + logger.error("Failed to get terminal output. Error: %s", e) + return jsonify({"status": "error", "message": str(e)}), 500 + + +_accessibility_ns_map = { + "ubuntu": { + "st": "https://accessibility.ubuntu.example.org/ns/state", + "attr": "https://accessibility.ubuntu.example.org/ns/attributes", + "cp": "https://accessibility.ubuntu.example.org/ns/component", + "doc": "https://accessibility.ubuntu.example.org/ns/document", + "docattr": "https://accessibility.ubuntu.example.org/ns/document/attributes", + "txt": "https://accessibility.ubuntu.example.org/ns/text", + "val": "https://accessibility.ubuntu.example.org/ns/value", + "act": "https://accessibility.ubuntu.example.org/ns/action", + }, + "windows": { + "st": "https://accessibility.windows.example.org/ns/state", + "attr": "https://accessibility.windows.example.org/ns/attributes", + "cp": "https://accessibility.windows.example.org/ns/component", + "doc": "https://accessibility.windows.example.org/ns/document", + "docattr": "https://accessibility.windows.example.org/ns/document/attributes", + "txt": "https://accessibility.windows.example.org/ns/text", + "val": "https://accessibility.windows.example.org/ns/value", + "act": "https://accessibility.windows.example.org/ns/action", + "class": "https://accessibility.windows.example.org/ns/class" + }, + "macos": { + "st": "https://accessibility.macos.example.org/ns/state", + "attr": "https://accessibility.macos.example.org/ns/attributes", + "cp": "https://accessibility.macos.example.org/ns/component", + "doc": "https://accessibility.macos.example.org/ns/document", + "txt": "https://accessibility.macos.example.org/ns/text", + "val": "https://accessibility.macos.example.org/ns/value", + "act": "https://accessibility.macos.example.org/ns/action", + "role": "https://accessibility.macos.example.org/ns/role", + } + +} + +_accessibility_ns_map_ubuntu = _accessibility_ns_map['ubuntu'] +_accessibility_ns_map_windows = _accessibility_ns_map['windows'] +_accessibility_ns_map_macos = _accessibility_ns_map['macos'] + +# A11y tree getter for Ubuntu +libreoffice_version_tuple: Optional[Tuple[int, ...]] = None +MAX_DEPTH = 50 +MAX_WIDTH = 1024 +MAX_CALLS = 5000 + + +def _get_libreoffice_version() -> Tuple[int, ...]: + """Function to get the LibreOffice version as a tuple of integers.""" + result = subprocess.run("libreoffice --version", shell=True, text=True, stdout=subprocess.PIPE) + version_str = result.stdout.split()[1] # Assuming version is the second word in the command output + return tuple(map(int, version_str.split("."))) + + +def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = None) -> _Element: + node_name = node.name + attribute_dict: Dict[str, Any] = {"name": node_name} + + # States + states: List[StateType] = node.getState().get_states() + for st in states: + state_name: str = StateType._enum_lookup[st] + state_name: str = state_name.split("_", maxsplit=1)[1].lower() + if len(state_name) == 0: + continue + attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["st"], state_name)] = "true" + + # Attributes + attributes: Dict[str, str] = node.get_attributes() + for attribute_name, attribute_value in attributes.items(): + if len(attribute_name) == 0: + continue + attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["attr"], attribute_name)] = attribute_value + + # Component + if attribute_dict.get("{{{:}}}visible".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true" \ + and attribute_dict.get("{{{:}}}showing".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true": + try: + component: Component = node.queryComponent() + except NotImplementedError: + pass + else: + bbox: Sequence[int] = component.getExtents(pyatspi.XY_SCREEN) + attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_ubuntu["cp"])] = \ + str(tuple(bbox[0:2])) + attribute_dict["{{{:}}}size".format(_accessibility_ns_map_ubuntu["cp"])] = str(tuple(bbox[2:])) + + text = "" + # Text + try: + text_obj: ATText = node.queryText() + # only text shown on current screen is available + # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount) + text: str = text_obj.getText(0, text_obj.characterCount) + # if flag=="thunderbird": + # appeared in thunderbird (uFFFC) (not only in thunderbird), "Object + # Replacement Character" in Unicode, "used as placeholder in text for + # an otherwise unspecified object; uFFFD is another "Replacement + # Character", just in case + text = text.replace("\ufffc", "").replace("\ufffd", "") + except NotImplementedError: + pass + + # Image, Selection, Value, Action + try: + node.queryImage() + attribute_dict["image"] = "true" + except NotImplementedError: + pass + + try: + node.querySelection() + attribute_dict["selection"] = "true" + except NotImplementedError: + pass + + try: + value: ATValue = node.queryValue() + value_key = f"{{{_accessibility_ns_map_ubuntu['val']}}}" + + for attr_name, attr_func in [ + ("value", lambda: value.currentValue), + ("min", lambda: value.minimumValue), + ("max", lambda: value.maximumValue), + ("step", lambda: value.minimumIncrement) + ]: + try: + attribute_dict[f"{value_key}{attr_name}"] = str(attr_func()) + except: + pass + except NotImplementedError: + pass + + try: + action: ATAction = node.queryAction() + for i in range(action.nActions): + action_name: str = action.getName(i).replace(" ", "-") + attribute_dict[ + "{{{:}}}{:}_desc".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getDescription( + i) + attribute_dict[ + "{{{:}}}{:}_kb".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getKeyBinding(i) + except NotImplementedError: + pass + + # Add from here if we need more attributes in the future... + + raw_role_name: str = node.getRoleName().strip() + node_role_name = (raw_role_name or "unknown").replace(" ", "-") + + if not flag: + if raw_role_name == "document spreadsheet": + flag = "calc" + if raw_role_name == "application" and node.name == "Thunderbird": + flag = "thunderbird" + + xml_node = lxml.etree.Element( + node_role_name, + attrib=attribute_dict, + nsmap=_accessibility_ns_map_ubuntu + ) + + if len(text) > 0: + xml_node.text = text + + if depth == MAX_DEPTH: + logger.warning("Max depth reached") + return xml_node + + if flag == "calc" and node_role_name == "table": + # Maximum column: 1024 if ver<=7.3 else 16384 + # Maximum row: 104 8576 + # Maximun sheet: 1 0000 + + global libreoffice_version_tuple + MAXIMUN_COLUMN = 1024 if libreoffice_version_tuple < (7, 4) else 16384 + MAX_ROW = 104_8576 + + index_base = 0 + first_showing = False + column_base = None + for r in range(MAX_ROW): + for clm in range(column_base or 0, MAXIMUN_COLUMN): + child_node: Accessible = node[index_base + clm] + showing: bool = child_node.getState().contains(STATE_SHOWING) + if showing: + child_node: _Element = _create_atspi_node(child_node, depth + 1, flag) + if not first_showing: + column_base = clm + first_showing = True + xml_node.append(child_node) + elif first_showing and column_base is not None or clm >= 500: + break + if first_showing and clm == column_base or not first_showing and r >= 500: + break + index_base += MAXIMUN_COLUMN + return xml_node + else: + try: + for i, ch in enumerate(node): + if i == MAX_WIDTH: + logger.warning("Max width reached") + break + xml_node.append(_create_atspi_node(ch, depth + 1, flag)) + except: + logger.warning("Error occurred during children traversing. Has Ignored. Node: %s", + lxml.etree.tostring(xml_node, encoding="unicode")) + return xml_node + + +# A11y tree getter for Windows +def _create_pywinauto_node(node, nodes, depth: int = 0, flag: Optional[str] = None) -> _Element: + nodes = nodes or set() + if node in nodes: + return + nodes.add(node) + + attribute_dict: Dict[str, Any] = {"name": node.element_info.name} + + base_properties = {} + try: + base_properties.update( + node.get_properties()) # get all writable/not writable properties, but have bugs when landing on chrome and it's slower! + except: + logger.debug("Failed to call get_properties(), trying to get writable properites") + try: + _element_class = node.__class__ + + class TempElement(node.__class__): + writable_props = pywinauto.base_wrapper.BaseWrapper.writable_props + + # Instantiate the subclass + node.__class__ = TempElement + # Retrieve properties using get_properties() + properties = node.get_properties() + node.__class__ = _element_class + + base_properties.update(properties) # only get all writable properties + logger.debug("get writable properties") + except Exception as e: + logger.error(e) + pass + + # Count-cnt + for attr_name in ["control_count", "button_count", "item_count", "column_count"]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['cnt']}}}{attr_name}"] = base_properties[ + attr_name].lower() + except: + pass + + # Columns-cols + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['cols']}}}columns"] = base_properties["columns"].lower() + except: + pass + + # Id-id + for attr_name in ["control_id", "automation_id", "window_id"]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['id']}}}{attr_name}"] = base_properties[attr_name].lower() + except: + pass + + # States + # 19 sec out of 20 + for attr_name, attr_func in [ + ("enabled", lambda: node.is_enabled()), + ("visible", lambda: node.is_visible()), + # ("active", lambda: node.is_active()), # occupied most of the time: 20s out of 21s for slack, 51.5s out of 54s for WeChat # maybe use for cutting branches + ("minimized", lambda: node.is_minimized()), + ("maximized", lambda: node.is_maximized()), + ("normal", lambda: node.is_normal()), + ("unicode", lambda: node.is_unicode()), + ("collapsed", lambda: node.is_collapsed()), + ("checkable", lambda: node.is_checkable()), + ("checked", lambda: node.is_checked()), + ("focused", lambda: node.is_focused()), + ("keyboard_focused", lambda: node.is_keyboard_focused()), + ("selected", lambda: node.is_selected()), + ("selection_required", lambda: node.is_selection_required()), + ("pressable", lambda: node.is_pressable()), + ("pressed", lambda: node.is_pressed()), + ("expanded", lambda: node.is_expanded()), + ("editable", lambda: node.is_editable()), + ("has_keyboard_focus", lambda: node.has_keyboard_focus()), + ("is_keyboard_focusable", lambda: node.is_keyboard_focusable()), + ]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['st']}}}{attr_name}"] = str(attr_func()).lower() + except: + pass + + # Component + try: + rectangle = node.rectangle() + attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_windows["cp"])] = \ + "({:d}, {:d})".format(rectangle.left, rectangle.top) + attribute_dict["{{{:}}}size".format(_accessibility_ns_map_windows["cp"])] = \ + "({:d}, {:d})".format(rectangle.width(), rectangle.height()) + + except Exception as e: + logger.error("Error accessing rectangle: ", e) + + # Text + text: str = node.window_text() + if text == attribute_dict["name"]: + text = "" + + # Selection + if hasattr(node, "select"): + attribute_dict["selection"] = "true" + + # Value + for attr_name, attr_funcs in [ + ("step", [lambda: node.get_step()]), + ("value", [lambda: node.value(), lambda: node.get_value(), lambda: node.get_position()]), + ("min", [lambda: node.min_value(), lambda: node.get_range_min()]), + ("max", [lambda: node.max_value(), lambda: node.get_range_max()]) + ]: + for attr_func in attr_funcs: + if hasattr(node, attr_func.__name__): + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['val']}}}{attr_name}"] = str(attr_func()) + break # exit once the attribute is set successfully + except: + pass + + attribute_dict["{{{:}}}class".format(_accessibility_ns_map_windows["class"])] = str(type(node)) + + # class_name + for attr_name in ["class_name", "friendly_class_name"]: + try: + attribute_dict[f"{{{_accessibility_ns_map_windows['class']}}}{attr_name}"] = base_properties[ + attr_name].lower() + except: + pass + + node_role_name: str = node.class_name().lower().replace(" ", "-") + node_role_name = "".join( + map(lambda _ch: _ch if _ch.isidentifier() or _ch in {"-"} or _ch.isalnum() else "-", node_role_name)) + + if node_role_name.strip() == "": + node_role_name = "unknown" + if not node_role_name[0].isalpha(): + node_role_name = "tag" + node_role_name + + xml_node = lxml.etree.Element( + node_role_name, + attrib=attribute_dict, + nsmap=_accessibility_ns_map_windows + ) + + if text is not None and len(text) > 0 and text != attribute_dict["name"]: + xml_node.text = text + + if depth == MAX_DEPTH: + logger.warning("Max depth reached") + return xml_node + + # use multi thread to accelerate children fetching + children = node.children() + if children: + with concurrent.futures.ThreadPoolExecutor() as executor: + future_to_child = [executor.submit(_create_pywinauto_node, ch, nodes, depth + 1, flag) for ch in + children[:MAX_WIDTH]] + try: + xml_node.extend([future.result() for future in concurrent.futures.as_completed(future_to_child)]) + except Exception as e: + logger.error(f"Exception occurred: {e}") + return xml_node + + +# A11y tree getter for macOS + +def _create_axui_node(node, nodes: set = None, depth: int = 0, bbox: tuple = None): + nodes = nodes or set() + if node in nodes: + return + nodes.add(node) + + reserved_keys = { + "AXEnabled": "st", + "AXFocused": "st", + "AXFullScreen": "st", + "AXTitle": "attr", + "AXChildrenInNavigationOrder": "attr", + "AXChildren": "attr", + "AXFrame": "attr", + "AXRole": "role", + "AXHelp": "attr", + "AXRoleDescription": "role", + "AXSubrole": "role", + "AXURL": "attr", + "AXValue": "val", + "AXDescription": "attr", + "AXDOMIdentifier": "attr", + "AXSelected": "st", + "AXInvalid": "st", + "AXRows": "attr", + "AXColumns": "attr", + } + attribute_dict = {} + + if depth == 0: + bbox = ( + node["kCGWindowBounds"]["X"], + node["kCGWindowBounds"]["Y"], + node["kCGWindowBounds"]["X"] + node["kCGWindowBounds"]["Width"], + node["kCGWindowBounds"]["Y"] + node["kCGWindowBounds"]["Height"] + ) + app_ref = ApplicationServices.AXUIElementCreateApplication(node["kCGWindowOwnerPID"]) + + attribute_dict["name"] = node["kCGWindowOwnerName"] + if attribute_dict["name"] != "Dock": + error_code, app_wins_ref = ApplicationServices.AXUIElementCopyAttributeValue( + app_ref, "AXWindows", None) + if error_code: + logger.error("MacOS parsing %s encountered Error code: %d", app_ref, error_code) + else: + app_wins_ref = [app_ref] + node = app_wins_ref[0] + + error_code, attr_names = ApplicationServices.AXUIElementCopyAttributeNames(node, None) + + if error_code: + # -25202: AXError.invalidUIElement + # The accessibility object received in this event is invalid. + return + + value = None + + if "AXFrame" in attr_names: + error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, "AXFrame", None) + rep = repr(attr_val) + x_value = re.search(r"x:(-?[\d.]+)", rep) + y_value = re.search(r"y:(-?[\d.]+)", rep) + w_value = re.search(r"w:(-?[\d.]+)", rep) + h_value = re.search(r"h:(-?[\d.]+)", rep) + type_value = re.search(r"type\s?=\s?(\w+)", rep) + value = { + "x": float(x_value.group(1)) if x_value else None, + "y": float(y_value.group(1)) if y_value else None, + "w": float(w_value.group(1)) if w_value else None, + "h": float(h_value.group(1)) if h_value else None, + "type": type_value.group(1) if type_value else None, + } + + if not any(v is None for v in value.values()): + x_min = max(bbox[0], value["x"]) + x_max = min(bbox[2], value["x"] + value["w"]) + y_min = max(bbox[1], value["y"]) + y_max = min(bbox[3], value["y"] + value["h"]) + + if x_min > x_max or y_min > y_max: + # No intersection + return + + role = None + text = None + + for attr_name, ns_key in reserved_keys.items(): + if attr_name not in attr_names: + continue + + if value and attr_name == "AXFrame": + bb = value + if not any(v is None for v in bb.values()): + attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_macos["cp"])] = \ + "({:d}, {:d})".format(int(bb["x"]), int(bb["y"])) + attribute_dict["{{{:}}}size".format(_accessibility_ns_map_macos["cp"])] = \ + "({:d}, {:d})".format(int(bb["w"]), int(bb["h"])) + continue + + error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None) + + full_attr_name = f"{{{_accessibility_ns_map_macos[ns_key]}}}{attr_name}" + + if attr_name == "AXValue" and not text: + text = str(attr_val) + continue + + if attr_name == "AXRoleDescription": + role = attr_val + continue + + # Set the attribute_dict + if not (isinstance(attr_val, ApplicationServices.AXUIElementRef) + or isinstance(attr_val, (AppKit.NSArray, list))): + if attr_val is not None: + attribute_dict[full_attr_name] = str(attr_val) + + node_role_name = role.lower().replace(" ", "_") if role else "unknown_role" + + xml_node = lxml.etree.Element( + node_role_name, + attrib=attribute_dict, + nsmap=_accessibility_ns_map_macos + ) + + if text is not None and len(text) > 0: + xml_node.text = text + + if depth == MAX_DEPTH: + logger.warning("Max depth reached") + return xml_node + + future_to_child = [] + + with concurrent.futures.ThreadPoolExecutor() as executor: + for attr_name, ns_key in reserved_keys.items(): + if attr_name not in attr_names: + continue + + error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None) + if isinstance(attr_val, ApplicationServices.AXUIElementRef): + future_to_child.append(executor.submit(_create_axui_node, attr_val, nodes, depth + 1, bbox)) + + elif isinstance(attr_val, (AppKit.NSArray, list)): + for child in attr_val: + future_to_child.append(executor.submit(_create_axui_node, child, nodes, depth + 1, bbox)) + + try: + for future in concurrent.futures.as_completed(future_to_child): + result = future.result() + if result is not None: + xml_node.append(result) + except Exception as e: + logger.error(f"Exception occurred: {e}") + + return xml_node + + +@app.route("/accessibility", methods=["GET"]) +def get_accessibility_tree(): + os_name: str = platform.system() + + # AT-SPI works for KDE as well + if os_name == "Linux": + global libreoffice_version_tuple + libreoffice_version_tuple = _get_libreoffice_version() + + desktop: Accessible = pyatspi.Registry.getDesktop(0) + xml_node = lxml.etree.Element("desktop-frame", nsmap=_accessibility_ns_map_ubuntu) + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [executor.submit(_create_atspi_node, app_node, 1) for app_node in desktop] + for future in concurrent.futures.as_completed(futures): + xml_tree = future.result() + xml_node.append(xml_tree) + return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")}) + + elif os_name == "Windows": + # Attention: Windows a11y tree is implemented to be read through `pywinauto` module, however, + # two different backends `win32` and `uia` are supported and different results may be returned + desktop: Desktop = Desktop(backend="uia") + xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_windows) + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [executor.submit(_create_pywinauto_node, wnd, {}, 1) for wnd in desktop.windows()] + for future in concurrent.futures.as_completed(futures): + xml_tree = future.result() + xml_node.append(xml_tree) + return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")}) + + elif os_name == "Darwin": + # TODO: Add Dock and MenuBar + xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_macos) + + with concurrent.futures.ThreadPoolExecutor() as executor: + foreground_windows = [ + win for win in Quartz.CGWindowListCopyWindowInfo( + (Quartz.kCGWindowListExcludeDesktopElements | + Quartz.kCGWindowListOptionOnScreenOnly), + Quartz.kCGNullWindowID + ) if win["kCGWindowLayer"] == 0 and win["kCGWindowOwnerName"] != "Window Server" + ] + dock_info = [ + win for win in Quartz.CGWindowListCopyWindowInfo( + Quartz.kCGWindowListOptionAll, + Quartz.kCGNullWindowID + ) if win.get("kCGWindowName", None) == "Dock" + ] + + futures = [ + executor.submit(_create_axui_node, wnd, None, 0) + for wnd in foreground_windows + dock_info + ] + + for future in concurrent.futures.as_completed(futures): + xml_tree = future.result() + if xml_tree is not None: + xml_node.append(xml_tree) + + return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")}) + + else: + return "Currently not implemented for platform {:}.".format(platform.platform()), 500 + + +@app.route('/screen_size', methods=['POST']) +def get_screen_size(): + if platform_name == "Linux": + d = display.Display() + screen_width = d.screen().width_in_pixels + screen_height = d.screen().height_in_pixels + elif platform_name == "Windows": + user32 = ctypes.windll.user32 + screen_width: int = user32.GetSystemMetrics(0) + screen_height: int = user32.GetSystemMetrics(1) + return jsonify( + { + "width": screen_width, + "height": screen_height + } + ) + + +@app.route('/window_size', methods=['POST']) +def get_window_size(): + if 'app_class_name' in request.form: + app_class_name = request.form['app_class_name'] + else: + return jsonify({"error": "app_class_name is required"}), 400 + + if platform_name != "Linux": + return jsonify({"error": "window_size is only supported on Linux"}), 501 + + d = display.Display() + root = d.screen().root + window_ids = root.get_full_property(d.intern_atom('_NET_CLIENT_LIST'), X.AnyPropertyType).value + + for window_id in window_ids: + try: + window = d.create_resource_object('window', window_id) + wm_class = window.get_wm_class() + + if wm_class is None: + continue + + if app_class_name.lower() in [name.lower() for name in wm_class]: + geom = window.get_geometry() + return jsonify( + { + "width": geom.width, + "height": geom.height + } + ) + except Xlib.error.XError: # Ignore windows that give an error + continue + return None + + +@app.route('/desktop_path', methods=['POST']) +def get_desktop_path(): + # Get the home directory in a platform-independent manner using pathlib + home_directory = str(Path.home()) + + # Determine the desktop path based on the operating system + desktop_path = { + "Windows": os.path.join(home_directory, "Desktop"), + "Darwin": os.path.join(home_directory, "Desktop"), # macOS + "Linux": os.path.join(home_directory, "Desktop") + }.get(platform.system(), None) + + # Check if the operating system is supported and the desktop path exists + if desktop_path and os.path.exists(desktop_path): + return jsonify(desktop_path=desktop_path) + else: + return jsonify(error="Unsupported operating system or desktop path not found"), 404 + + +@app.route('/wallpaper', methods=['POST']) +def get_wallpaper(): + def get_wallpaper_windows(): + SPI_GETDESKWALLPAPER = 0x73 + MAX_PATH = 260 + buffer = ctypes.create_unicode_buffer(MAX_PATH) + ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0) + return buffer.value + + def get_wallpaper_macos(): + script = """ + tell application "System Events" to tell every desktop to get picture + """ + process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, error = process.communicate() + if error: + app.logger.error("Error: %s", error.decode('utf-8')) + return None + return output.strip().decode('utf-8') + + def get_wallpaper_linux(): + try: + output = subprocess.check_output( + ["gsettings", "get", "org.gnome.desktop.background", "picture-uri"], + stderr=subprocess.PIPE + ) + return output.decode('utf-8').strip().replace('file://', '').replace("'", "") + except subprocess.CalledProcessError as e: + app.logger.error("Error: %s", e) + return None + + os_name = platform.system() + wallpaper_path = None + if os_name == 'Windows': + wallpaper_path = get_wallpaper_windows() + elif os_name == 'Darwin': + wallpaper_path = get_wallpaper_macos() + elif os_name == 'Linux': + wallpaper_path = get_wallpaper_linux() + else: + app.logger.error(f"Unsupported OS: {os_name}") + abort(400, description="Unsupported OS") + + if wallpaper_path: + try: + # Ensure the filename is secure + return send_file(wallpaper_path, mimetype='image/png') + except Exception as e: + app.logger.error(f"An error occurred while serving the wallpaper file: {e}") + abort(500, description="Unable to serve the wallpaper file") + else: + abort(404, description="Wallpaper file not found") + + +@app.route('/list_directory', methods=['POST']) +def get_directory_tree(): + def _list_dir_contents(directory): + """ + List the contents of a directory recursively, building a tree structure. + + :param directory: The path of the directory to inspect. + :return: A nested dictionary with the contents of the directory. + """ + tree = {'type': 'directory', 'name': os.path.basename(directory), 'children': []} + try: + # List all files and directories in the current directory + for entry in os.listdir(directory): + full_path = os.path.join(directory, entry) + # If entry is a directory, recurse into it + if os.path.isdir(full_path): + tree['children'].append(_list_dir_contents(full_path)) + else: + tree['children'].append({'type': 'file', 'name': entry}) + except OSError as e: + # If the directory cannot be accessed, return the exception message + tree = {'error': str(e)} + return tree + + # Extract the 'path' parameter from the JSON request + data = request.get_json() + if 'path' not in data: + return jsonify(error="Missing 'path' parameter"), 400 + + start_path = data['path'] + # Ensure the provided path is a directory + if not os.path.isdir(start_path): + return jsonify(error="The provided path is not a directory"), 400 + + # Generate the directory tree starting from the provided path + directory_tree = _list_dir_contents(start_path) + return jsonify(directory_tree=directory_tree) + + +@app.route('/file', methods=['POST']) +def get_file(): + # Retrieve filename from the POST request + if 'file_path' in request.form: + file_path = os.path.expandvars(os.path.expanduser(request.form['file_path'])) + else: + return jsonify({"error": "file_path is required"}), 400 + + try: + # Check if the file exists and get its size + if not os.path.exists(file_path): + return jsonify({"error": "File not found"}), 404 + + file_size = os.path.getsize(file_path) + logger.info(f"Serving file: {file_path} ({file_size} bytes)") + + # Check if the file exists and send it to the user + return send_file(file_path, as_attachment=True) + except FileNotFoundError: + # If the file is not found, return a 404 error + return jsonify({"error": "File not found"}), 404 + except Exception as e: + logger.error(f"Error serving file {file_path}: {e}") + return jsonify({"error": f"Failed to serve file: {str(e)}"}), 500 + + +@app.route("/setup/upload", methods=["POST"]) +def upload_file(): + # Retrieve filename from the POST request + if 'file_path' in request.form and 'file_data' in request.files: + file_path = os.path.expandvars(os.path.expanduser(request.form['file_path'])) + file = request.files["file_data"] + + try: + # Ensure target directory exists + target_dir = os.path.dirname(file_path) + if target_dir: # Only create directory if it's not empty + os.makedirs(target_dir, exist_ok=True) + + # Save file and get size for verification + file.save(file_path) + uploaded_size = os.path.getsize(file_path) + + logger.info(f"File uploaded successfully: {file_path} ({uploaded_size} bytes)") + return f"File Uploaded: {uploaded_size} bytes" + + except Exception as e: + logger.error(f"Error uploading file to {file_path}: {e}") + # Clean up partial file if it exists + if os.path.exists(file_path): + try: + os.remove(file_path) + except: + pass + return jsonify({"error": f"Failed to upload file: {str(e)}"}), 500 + else: + return jsonify({"error": "file_path and file_data are required"}), 400 + + +@app.route('/platform', methods=['GET']) +def get_platform(): + return platform.system() + + +@app.route('/cursor_position', methods=['GET']) +def get_cursor_position(): + pos = pyautogui.position() + return jsonify(pos.x, pos.y) + +@app.route("/setup/change_wallpaper", methods=['POST']) +def change_wallpaper(): + data = request.json + path = data.get('path', None) + + if not path: + return "Path not supplied!", 400 + + path = Path(os.path.expandvars(os.path.expanduser(path))) + + if not path.exists(): + return f"File not found: {path}", 404 + + try: + user_platform = platform.system() + if user_platform == "Windows": + import ctypes + ctypes.windll.user32.SystemParametersInfoW(20, 0, str(path), 3) + elif user_platform == "Linux": + import subprocess + subprocess.run(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", f"file://{path}"]) + elif user_platform == "Darwin": # (Mac OS) + import subprocess + subprocess.run( + ["osascript", "-e", f'tell application "Finder" to set desktop picture to POSIX file "{path}"']) + return "Wallpaper changed successfully" + except Exception as e: + return f"Failed to change wallpaper. Error: {e}", 500 + + +@app.route("/setup/download_file", methods=['POST']) +def download_file(): + data = request.json + url = data.get('url', None) + path = data.get('path', None) + + if not url or not path: + return "Path or URL not supplied!", 400 + + path = Path(os.path.expandvars(os.path.expanduser(path))) + path.parent.mkdir(parents=True, exist_ok=True) + + max_retries = 3 + error: Optional[Exception] = None + + for i in range(max_retries): + try: + logger.info(f"Download attempt {i+1}/{max_retries} for {url}") + response = requests.get(url, stream=True, timeout=300) + response.raise_for_status() + + # Get expected file size if available + total_size = int(response.headers.get('content-length', 0)) + if total_size > 0: + logger.info(f"Expected file size: {total_size / (1024*1024):.2f} MB") + + downloaded_size = 0 + with open(path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + downloaded_size += len(chunk) + if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB + progress = (downloaded_size / total_size) * 100 + logger.info(f"Download progress: {progress:.1f}%") + + # Verify download completeness + actual_size = os.path.getsize(path) + if total_size > 0 and actual_size != total_size: + raise Exception(f"Download incomplete. Expected {total_size} bytes, got {actual_size} bytes") + + logger.info(f"File downloaded successfully: {path} ({actual_size} bytes)") + return f"File downloaded successfully: {actual_size} bytes" + + except (requests.RequestException, Exception) as e: + error = e + logger.error(f"Failed to download {url}: {e}. Retrying... ({max_retries - i - 1} attempts left)") + # Clean up partial download + if path.exists(): + try: + path.unlink() + except: + pass + + return f"Failed to download {url}. No retries left. Error: {error}", 500 + + +@app.route("/setup/open_file", methods=['POST']) +def open_file(): + data = request.json + path = data.get('path', None) + + if not path: + return "Path not supplied!", 400 + + path_obj = Path(os.path.expandvars(os.path.expanduser(path))) + + # Check if it's a file path that exists + is_file_path = path_obj.exists() + + # If it's not a file path, treat it as an application name/command + if not is_file_path: + # Check if it's a valid command by trying to find it in PATH + import shutil + if not shutil.which(path): + return f"Application/file not found: {path}", 404 + + try: + if is_file_path: + # Handle file opening + if platform.system() == "Windows": + os.startfile(path_obj) + else: + open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open" + subprocess.Popen([open_cmd, str(path_obj)]) + file_name = path_obj.name + file_name_without_ext, _ = os.path.splitext(file_name) + else: + # Handle application launching + if platform.system() == "Windows": + subprocess.Popen([path]) + else: + subprocess.Popen([path]) + file_name = path + file_name_without_ext = path + + # Wait for the file/application to open + + start_time = time.time() + window_found = False + + while time.time() - start_time < TIMEOUT: + os_name = platform.system() + if os_name in ['Windows', 'Darwin']: + import pygetwindow as gw + # Check for window title containing file name or file name without extension + windows = gw.getWindowsWithTitle(file_name) + if not windows: + windows = gw.getWindowsWithTitle(file_name_without_ext) + + if windows: + # To be more specific, we can try to activate it + windows[0].activate() + window_found = True + break + elif os_name == 'Linux': + try: + # Using wmctrl to list windows and check if any window title contains the filename + result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True) + window_list = result.stdout.strip().split('\n') + if not result.stdout.strip(): + pass # No windows, just continue waiting + else: + for window in window_list: + if file_name in window or file_name_without_ext in window: + # a window is found, now activate it + window_id = window.split()[0] + subprocess.run(['wmctrl', '-i', '-a', window_id], check=True) + window_found = True + break + if window_found: + break + except (subprocess.CalledProcessError, FileNotFoundError): + # wmctrl might not be installed or the window manager isn't ready. + # We just log it once and let the main loop retry. + if 'wmctrl_failed_once' not in locals(): + logger.warning("wmctrl command is not ready, will keep retrying...") + wmctrl_failed_once = True + pass # Let the outer loop retry + + time.sleep(1) + + if window_found: + return "File opened and window activated successfully" + else: + return f"Failed to find window for {file_name} within {TIMEOUT} seconds.", 500 + + except Exception as e: + return f"Failed to open {path}. Error: {e}", 500 + + +@app.route("/setup/activate_window", methods=['POST']) +def activate_window(): + data = request.json + window_name = data.get('window_name', None) + if not window_name: + return "window_name required", 400 + strict: bool = data.get("strict", False) # compare case-sensitively and match the whole string + by_class_name: bool = data.get("by_class", False) + + os_name = platform.system() + + if os_name == 'Windows': + import pygetwindow as gw + if by_class_name: + return "Get window by class name is not supported on Windows currently.", 500 + windows: List[gw.Window] = gw.getWindowsWithTitle(window_name) + + window: Optional[gw.Window] = None + if len(windows) == 0: + return "Window {:} not found (empty results)".format(window_name), 404 + elif strict: + for wnd in windows: + if wnd.title == wnd: + window = wnd + if window is None: + return "Window {:} not found (strict mode).".format(window_name), 404 + else: + window = windows[0] + window.activate() + + elif os_name == 'Darwin': + import pygetwindow as gw + if by_class_name: + return "Get window by class name is not supported on macOS currently.", 500 + # Find the VS Code window + windows = gw.getWindowsWithTitle(window_name) + + window: Optional[gw.Window] = None + if len(windows) == 0: + return "Window {:} not found (empty results)".format(window_name), 404 + elif strict: + for wnd in windows: + if wnd.title == wnd: + window = wnd + if window is None: + return "Window {:} not found (strict mode).".format(window_name), 404 + else: + window = windows[0] + + # Un-minimize the window and then bring it to the front + window.unminimize() + window.activate() + + elif os_name == 'Linux': + # Attempt to activate VS Code window using wmctrl + subprocess.run(["wmctrl" + , "-{:}{:}a".format("x" if by_class_name else "" + , "F" if strict else "" + ) + , window_name + ] + ) + + else: + return f"Operating system {os_name} not supported.", 400 + + return "Window activated successfully", 200 + + +@app.route("/setup/close_window", methods=["POST"]) +def close_window(): + data = request.json + if "window_name" not in data: + return "window_name required", 400 + window_name: str = data["window_name"] + strict: bool = data.get("strict", False) # compare case-sensitively and match the whole string + by_class_name: bool = data.get("by_class", False) + + os_name: str = platform.system() + if os_name == "Windows": + import pygetwindow as gw + + if by_class_name: + return "Get window by class name is not supported on Windows currently.", 500 + windows: List[gw.Window] = gw.getWindowsWithTitle(window_name) + + window: Optional[gw.Window] = None + if len(windows) == 0: + return "Window {:} not found (empty results)".format(window_name), 404 + elif strict: + for wnd in windows: + if wnd.title == wnd: + window = wnd + if window is None: + return "Window {:} not found (strict mode).".format(window_name), 404 + else: + window = windows[0] + window.close() + elif os_name == "Linux": + subprocess.run(["wmctrl" + , "-{:}{:}c".format("x" if by_class_name else "" + , "F" if strict else "" + ) + , window_name + ] + ) + elif os_name == "Darwin": + import pygetwindow as gw + return "Currently not supported on macOS.", 500 + else: + return "Not supported platform {:}".format(os_name), 500 + + return "Window closed successfully.", 200 + + +@app.route('/start_recording', methods=['POST']) +def start_recording(): + global recording_process + if recording_process and recording_process.poll() is None: + return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400 + + # Clean up previous recording if it exists + if os.path.exists(recording_path): + try: + os.remove(recording_path) + except OSError as e: + logger.error(f"Error removing old recording file: {e}") + return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500 + + if platform_name == "Linux": + d = display.Display() + screen_width = d.screen().width_in_pixels + screen_height = d.screen().height_in_pixels + start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}" + elif platform_name == "Windows": + user32 = ctypes.windll.user32 + screen_width = user32.GetSystemMetrics(0) + screen_height = user32.GetSystemMetrics(1) + # Use gdigrab for Windows screen capture + start_command = f"ffmpeg -y -f gdigrab -draw_mouse 1 -framerate 30 -video_size {screen_width}x{screen_height} -i desktop -c:v libx264 -r 30 {recording_path}" + else: + return jsonify({'status': 'error', 'message': f'Recording not supported on {platform_name}'}), 501 + + # Use stderr=PIPE to capture potential errors from ffmpeg + recording_process = subprocess.Popen(shlex.split(start_command), + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True # To get stderr as string + ) + + # Wait a couple of seconds to see if ffmpeg starts successfully + try: + # Wait for 2 seconds. If ffmpeg exits within this time, it's an error. + recording_process.wait(timeout=2) + # If wait() returns, it means the process has terminated. + error_output = recording_process.stderr.read() + return jsonify({ + 'status': 'error', + 'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}' + }), 500 + except subprocess.TimeoutExpired: + # This is the expected outcome: the process is still running after 2 seconds. + return jsonify({'status': 'success', 'message': 'Started recording successfully.'}) + + +@app.route('/end_recording', methods=['POST']) +def end_recording(): + global recording_process + + if not recording_process or recording_process.poll() is not None: + recording_process = None # Clean up stale process object + return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400 + + error_output = "" + try: + # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file. + # On Windows, use CTRL_C_EVENT; on Unix, use SIGINT + if platform_name == "Windows": + # On Windows, we need to terminate the process gracefully + # ffmpeg responds to standard input 'q' to quit gracefully + try: + recording_process.stdin.write(b'q') + recording_process.stdin.flush() + except: + # If stdin is not available, use terminate + recording_process.terminate() + else: + recording_process.send_signal(signal.SIGINT) + # Wait for ffmpeg to terminate. communicate() gets output and waits. + _, error_output = recording_process.communicate(timeout=15) + except subprocess.TimeoutExpired: + logger.error("ffmpeg did not respond to stop signal, killing the process.") + recording_process.kill() + # After killing, communicate to get any remaining output. + _, error_output = recording_process.communicate() + recording_process = None + return jsonify({ + 'status': 'error', + 'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}' + }), 500 + + recording_process = None # Clear the process from global state + + # Check if the recording file was created and is not empty. + if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0: + return send_file(recording_path, as_attachment=True) + else: + logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") + return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}") + + +@app.route("/run_python", methods=['POST']) +def run_python(): + data = request.json + code = data.get('code', None) + + if not code: + return jsonify({'status': 'error', 'message': 'Code not supplied!'}), 400 + + # Create a temporary file to save the Python code + import tempfile + import uuid + + # Generate unique filename + temp_filename = f"/tmp/python_exec_{uuid.uuid4().hex}.py" + + try: + # Write code to temporary file + with open(temp_filename, 'w') as f: + f.write(code) + + # Execute the file using subprocess to capture all output + # Use sys.executable to use the same Python interpreter as the Flask server + result = subprocess.run( + [sys.executable, temp_filename], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + timeout=30 # 30 second timeout + ) + + # Clean up the temporary file + try: + os.remove(temp_filename) + except: + pass # Ignore cleanup errors + + # Prepare response + output = result.stdout + error_output = result.stderr + + # Combine output and errors if both exist + combined_message = output + if error_output: + combined_message += ('\n' + error_output) if output else error_output + + # Determine status based on return code and errors + if result.returncode != 0: + status = 'error' + if not error_output: + # If no stderr but non-zero return code, add a generic error message + error_output = f"Process exited with code {result.returncode}" + combined_message = combined_message + '\n' + error_output if combined_message else error_output + else: + status = 'success' + + return jsonify({ + 'status': status, + 'message': combined_message, + 'need_more': False, # Not applicable for file execution + 'output': output, # stdout only + 'error': error_output, # stderr only + 'return_code': result.returncode + }) + + except subprocess.TimeoutExpired: + # Clean up the temporary file on timeout + try: + os.remove(temp_filename) + except: + pass + + return jsonify({ + 'status': 'error', + 'message': 'Execution timeout: Code took too long to execute', + 'error': 'TimeoutExpired', + 'need_more': False, + 'output': None, + }), 500 + + except Exception as e: + # Clean up the temporary file on error + try: + os.remove(temp_filename) + except: + pass + + # Capture the exception details + return jsonify({ + 'status': 'error', + 'message': f'Execution error: {str(e)}', + 'error': traceback.format_exc(), + 'need_more': False, + 'output': None, + }), 500 + + +@app.route("/run_bash_script", methods=['POST']) +def run_bash_script(): + data = request.json + script = data.get('script', None) + timeout = data.get('timeout', 100) # Default timeout of 30 seconds + working_dir = data.get('working_dir', None) + + if not script: + return jsonify({ + 'status': 'error', + 'output': 'Script not supplied!', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 400 + + # Expand user directory if provided + if working_dir: + working_dir = os.path.expanduser(working_dir) + if not os.path.exists(working_dir): + return jsonify({ + 'status': 'error', + 'output': f'Working directory does not exist: {working_dir}', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 400 + + # Create a temporary script file + import tempfile + with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as tmp_file: + if "#!/bin/bash" not in script: + script = "#!/bin/bash\n\n" + script + tmp_file.write(script) + tmp_file_path = tmp_file.name + + try: + # Make the script executable + os.chmod(tmp_file_path, 0o755) + + # Execute the script + if platform_name == "Windows": + # On Windows, use Git Bash or WSL if available, otherwise cmd + flags = subprocess.CREATE_NO_WINDOW + # Try to use bash if available (Git Bash, WSL, etc.) + result = subprocess.run( + ['bash', tmp_file_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout + text=True, + timeout=timeout, + cwd=working_dir, + creationflags=flags, + shell=False + ) + else: + # On Unix-like systems, use bash directly + flags = 0 + result = subprocess.run( + ['/bin/bash', tmp_file_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout + text=True, + timeout=timeout, + cwd=working_dir, + creationflags=flags, + shell=False + ) + + # Log the command execution for trajectory recording + _append_event("BashScript", + {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode}, + ts=time.time()) + + return jsonify({ + 'status': 'success' if result.returncode == 0 else 'error', + 'output': result.stdout, # Contains both stdout and stderr merged + 'error': "", # Always empty as requested + 'returncode': result.returncode + }) + + except subprocess.TimeoutExpired: + return jsonify({ + 'status': 'error', + 'output': f'Script execution timed out after {timeout} seconds', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 500 + except FileNotFoundError: + # Bash not found, try with sh + try: + result = subprocess.run( + ['sh', tmp_file_path], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, # Merge stderr into stdout + text=True, + timeout=timeout, + cwd=working_dir, + shell=False + ) + + _append_event("BashScript", + {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode}, + ts=time.time()) + + return jsonify({ + 'status': 'success' if result.returncode == 0 else 'error', + 'output': result.stdout, # Contains both stdout and stderr merged + 'error': "", # Always empty as requested + 'returncode': result.returncode, + }) + except Exception as e: + return jsonify({ + 'status': 'error', + 'output': f'Failed to execute script: {str(e)}', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 500 + except Exception as e: + return jsonify({ + 'status': 'error', + 'output': f'Failed to execute script: {str(e)}', + 'error': "", # Always empty as requested + 'returncode': -1 + }), 500 + finally: + # Clean up the temporary file + try: + os.unlink(tmp_file_path) + except: + pass + +if __name__ == '__main__': + app.run(debug=True, host="0.0.0.0") diff --git a/desktop_env/server_win7/win7_offline_packages/numpy-1.24.4-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/numpy-1.24.4-cp38-cp38-win32.whl new file mode 100644 index 0000000..06c0890 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/numpy-1.24.4-cp38-cp38-win32.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/pillow-10.4.0-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/pillow-10.4.0-cp38-cp38-win32.whl new file mode 100644 index 0000000..2415455 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pillow-10.4.0-cp38-cp38-win32.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/pygame-2.6.1-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/pygame-2.6.1-cp38-cp38-win32.whl new file mode 100644 index 0000000..df0f24b Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pygame-2.6.1-cp38-cp38-win32.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/pymsgbox-2.0.1-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/pymsgbox-2.0.1-py3-none-any.whl new file mode 100644 index 0000000..c938c0b Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pymsgbox-2.0.1-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/pynput-1.7.6-py2.py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/pynput-1.7.6-py2.py3-none-any.whl new file mode 100644 index 0000000..1375d49 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pynput-1.7.6-py2.py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/pyscreeze-1.0.1.tar.gz b/desktop_env/server_win7/win7_offline_packages/pyscreeze-1.0.1.tar.gz new file mode 100644 index 0000000..1def580 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pyscreeze-1.0.1.tar.gz differ diff --git a/desktop_env/server_win7/win7_offline_packages/pytweening-1.2.0.tar.gz b/desktop_env/server_win7/win7_offline_packages/pytweening-1.2.0.tar.gz new file mode 100644 index 0000000..a1c0349 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pytweening-1.2.0.tar.gz differ diff --git a/desktop_env/server_win7/win7_offline_packages/pywin32-311-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/pywin32-311-cp38-cp38-win32.whl new file mode 100644 index 0000000..c03f7ea Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pywin32-311-cp38-cp38-win32.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/pywinauto-0.6.9-py2.py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/pywinauto-0.6.9-py2.py3-none-any.whl new file mode 100644 index 0000000..042276c Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pywinauto-0.6.9-py2.py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/requests-2.32.4-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/requests-2.32.4-py3-none-any.whl new file mode 100644 index 0000000..d52fad0 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/requests-2.32.4-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/requirements_win7.txt b/desktop_env/server_win7/win7_offline_packages/requirements_win7.txt new file mode 100644 index 0000000..ad69323 --- /dev/null +++ b/desktop_env/server_win7/win7_offline_packages/requirements_win7.txt @@ -0,0 +1,44 @@ +# Win7 32位 Python3.8 离线安装清单 +# 安装命令: pip install --no-index --find-links . -r requirements_win7.txt + +# 底层依赖先装 +six +zipp +importlib_metadata +certifi +idna +urllib3 +charset_normalizer +requests + +# Flask 依赖链 +MarkupSafe +itsdangerous +click +blinker +werkzeug +jinja2 +flask + +# Win32 底层 +pywin32 + +# 数据处理 +numpy +Pillow +lxml +pygame + +# GUI 控制 +pymsgbox +pytweening +pyscreeze +PyGetWindow +MouseInfo +PyAutoGUI + +# 输入控制 +pynput + +# Win 自动化 +pywinauto diff --git a/desktop_env/server_win7/win7_offline_packages/setuptools-75.3.4-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/setuptools-75.3.4-py3-none-any.whl new file mode 100644 index 0000000..4cacd34 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/setuptools-75.3.4-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/six-1.17.0-py2.py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/six-1.17.0-py2.py3-none-any.whl new file mode 100644 index 0000000..c506fd0 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/six-1.17.0-py2.py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/urllib3-2.2.3-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/urllib3-2.2.3-py3-none-any.whl new file mode 100644 index 0000000..3870568 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/urllib3-2.2.3-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/werkzeug-3.0.6-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/werkzeug-3.0.6-py3-none-any.whl new file mode 100644 index 0000000..15b739b Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/werkzeug-3.0.6-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/wheel-0.45.1-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/wheel-0.45.1-py3-none-any.whl new file mode 100644 index 0000000..589308a Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/wheel-0.45.1-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_offline_packages/zipp-3.20.2-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/zipp-3.20.2-py3-none-any.whl new file mode 100644 index 0000000..d506da9 Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/zipp-3.20.2-py3-none-any.whl differ diff --git a/desktop_env/server_win7/win7_server_packages.zip b/desktop_env/server_win7/win7_server_packages.zip new file mode 100644 index 0000000..31aafe3 Binary files /dev/null and b/desktop_env/server_win7/win7_server_packages.zip differ diff --git a/evaluation_examples/examples/flsol/flsol_task1.json b/evaluation_examples/examples/flsol/flsol_task1.json new file mode 100644 index 0000000..b24d716 --- /dev/null +++ b/evaluation_examples/examples/flsol/flsol_task1.json @@ -0,0 +1,40 @@ +{ + "id": "flsol_task1", + "snapshot": "flsol", + "instruction": "启动 FL Solutions for F-4600 软件,并截图确认主界面已成功打开。", + "source": "custom", + "config": [ + { + "type": "sleep", + "parameters": { + "seconds": 2 + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "flsol" + ], + "evaluator": { + "postconfig": [ + { + "type": "sleep", + "parameters": { + "seconds": 4 + } + } + ], + "func": "vllm_eval", + "expected": { + "description": "FL Solutions for F-4600 主界面应已打开,可见菜单栏(File, Method, Measure 等)和仪器连接状态面板。" + } + }, + "proxy": false, + "fixed_ip": true, + "possibility_of_env_change": "low", + "metadata": { + "input_files": [], + "steps": "1. 双击桌面上的 FL Solutions for F-4600 快捷方式,或通过开始菜单找到并打开 FL Solutions 程序(路径:C:\\Program Files\\FL Solutions\\flsol.exe)。\n2. 等待程序加载完成(约 5-10 秒),观察主界面是否出现,包含顶部菜单栏(File、Method、Measure、View、Tools、Help 等选项)以及仪器状态面板。\n3. 确认主界面已打开后,操作完成。", + "steps_original": "1. 打开 FL Solutions for F-4600 软件。\n2. 等待主界面加载完毕。" + } +} diff --git a/evaluation_examples/examples/flsol/flsol_task2.json b/evaluation_examples/examples/flsol/flsol_task2.json new file mode 100644 index 0000000..1b4d427 --- /dev/null +++ b/evaluation_examples/examples/flsol/flsol_task2.json @@ -0,0 +1,48 @@ +{ + "id": "flsol_task2", + "snapshot": "flsol", + "instruction": "打开 FL Solutions for F-4600,新建一个波长扫描(Wavelength Scan)方法,将激发波长(Excitation Wavelength)设置为 350 nm,扫描范围设为 400–700 nm,扫描速度设为 240 nm/min,然后保存该方法为 test_scan.mth。", + "source": "custom", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "C:\\Program Files\\FL Solutions\\flsol.exe" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 8 + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "flsol" + ], + "evaluator": { + "postconfig": [ + { + "type": "sleep", + "parameters": { + "seconds": 3 + } + } + ], + "func": "vllm_eval", + "expected": { + "description": "屏幕上应显示 FL Solutions 主界面,并且在 Method 或文件相关区域可以看到名为 test_scan.mth 的方法文件已被保存/创建,或在最近文件列表中可以看到该文件名。激发波长应设为 350 nm,扫描范围为 400-700 nm。" + } + }, + "proxy": false, + "fixed_ip": true, + "possibility_of_env_change": "medium", + "metadata": { + "input_files": [], + "steps": "1. 等待 FL Solutions 主界面打开。\n2. 在顶部菜单栏中,单击 'Method'(方法)菜单,然后选择 'New'(新建)或 'Wavelength Scan'(波长扫描)选项,创建一个新的波长扫描方法。\n3. 在弹出的方法设置对话框中,找到 'Excitation Wavelength'(激发波长)输入框,将数值清空后输入 '350',单位为 nm。\n4. 找到扫描范围(Scan Range)设置区域,将起始波长(Start)设为 '400' nm,将结束波长(End)设为 '700' nm。\n5. 找到扫描速度(Scan Speed)下拉菜单或输入框,选择或输入 '240' nm/min。\n6. 确认设置无误后,单击菜单 'Method' → 'Save As'(另存为),在弹出的文件保存对话框中,将文件名输入为 'test_scan',确保文件类型为 .mth,然后单击 '保存'(Save)按钮。\n7. 确认文件已成功保存。", + "steps_original": "1. 打开 FL Solutions 并新建 Wavelength Scan 方法。\n2. 设置 Ex 波长 350 nm,扫描范围 400-700 nm,速度 240 nm/min。\n3. 另存为 test_scan.mth。" + } +} diff --git a/evaluation_examples/examples/flsol/flsol_task3.json b/evaluation_examples/examples/flsol/flsol_task3.json new file mode 100644 index 0000000..1423aa7 --- /dev/null +++ b/evaluation_examples/examples/flsol/flsol_task3.json @@ -0,0 +1,48 @@ +{ + "id": "flsol_task3", + "snapshot": "flsol", + "instruction": "打开 FL Solutions for F-4600,进入仪器参数设置(Instrument Parameters),将光电倍增管电压(PMT Voltage)修改为 700 V,狭缝宽度(Slit Width,激发和发射均设置)修改为 5.0 nm,响应时间(Response)设置为 Auto,然后截图确认参数设置完成。", + "source": "custom", + "config": [ + { + "type": "launch", + "parameters": { + "command": [ + "C:\\Program Files\\FL Solutions\\flsol.exe" + ] + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 8 + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "flsol" + ], + "evaluator": { + "postconfig": [ + { + "type": "sleep", + "parameters": { + "seconds": 3 + } + } + ], + "func": "vllm_eval", + "expected": { + "description": "FL Solutions 的仪器参数设置界面(Instrument Parameters 或 Method 设置窗口)应可见,其中 PMT Voltage 显示为 700 V,激发和发射狭缝宽度(Ex Slit / Em Slit)均显示为 5.0 nm,响应时间(Response)显示为 Auto。" + } + }, + "proxy": false, + "fixed_ip": true, + "possibility_of_env_change": "medium", + "metadata": { + "input_files": [], + "steps": "1. 等待 FL Solutions 主界面打开。\n2. 在顶部菜单栏中,单击 'Method'(方法)菜单,选择 'New'(新建)创建一个新方法,或在现有方法的参数面板中操作。\n3. 在方法设置界面中,找到仪器参数(Instrument Parameters)区域(通常在方法编辑窗口的下方或单独的选项卡中)。\n4. 找到 'PMT Voltage'(光电倍增管电压)输入框,清空当前值并输入 '700',单位为 V。\n5. 找到激发狭缝宽度(EX Slit Width 或 Excitation Slit)输入框或下拉菜单,将值修改为 '5.0',单位为 nm。\n6. 找到发射狭缝宽度(EM Slit Width 或 Emission Slit)输入框或下拉菜单,将值修改为 '5.0',单位为 nm。\n7. 找到响应时间(Response)下拉菜单,单击展开,选择 'Auto'(自动)选项。\n8. 确认所有参数设置完成后,截图当前界面。", + "steps_original": "1. 打开 FL Solutions,进入方法/仪器参数设置。\n2. 设置 PMT 电压 700V,Ex/Em 狭缝宽度各 5.0 nm,Response 设为 Auto。" + } +} diff --git a/evaluation_examples/examples/flsol/flsol_task4_measure.json b/evaluation_examples/examples/flsol/flsol_task4_measure.json new file mode 100644 index 0000000..4bb5de2 --- /dev/null +++ b/evaluation_examples/examples/flsol/flsol_task4_measure.json @@ -0,0 +1,44 @@ +{ + "id": "flsol_task4_measure", + "snapshot": "flsol", + "instruction": "使用 FL Solutions for F-4600 软件执行一次荧光测量。", + "source": "custom", + "config": [ + { + "type": "launch", + "parameters": { + "command": ["C:\\Program Files\\FL Solutions\\flsol.exe"], + "shell": false + } + }, + { + "type": "sleep", + "parameters": { + "seconds": 15 + } + } + ], + "trajectory": "trajectories/", + "related_apps": [ + "flsol" + ], + "evaluator": { + "postconfig": [ + { + "type": "sleep", + "parameters": { + "seconds": 5 + } + } + ], + "func": "vllm_eval" + }, + "proxy": false, + "fixed_ip": true, + "possibility_of_env_change": "low", + "metadata": { + "input_files": [], + "steps": "1. 软件已自动打开,等待 FL Solutions 主界面完全加载,确认标题栏显示 'FL Solutions - F-4600 FL Spectrophotometer on USB' 且仪器状态正常(无报错弹窗)。\n2. 直接按键盘 F4 键触发 Measure(这是最可靠的方式,F4 是 Measure 的快捷键)。\n3. 如果 F4 无反应,则通过菜单执行:点击菜单栏 'Spectrophotometer'(第四个菜单项,位于 View 和 Tools 之间),在下拉菜单中点击 'Measure'(快捷键 F4)。\n4. 等待测量过程完成,软件会在图表区域实时绘制扫描曲线,测量完成后曲线绘制停止。\n5. 确认图表区域有测量结果曲线后,操作完成。", + "steps_original": "1. 等待 FL Solutions 和 F-4600 仪器完全初始化,状态变为 Ready。\n2. 点击 Measure 按钮执行测量。\n3. 等待测量完成,图表区域出现曲线。" + } +} diff --git a/evaluation_examples/test_flsol.json b/evaluation_examples/test_flsol.json new file mode 100644 index 0000000..467c9cc --- /dev/null +++ b/evaluation_examples/test_flsol.json @@ -0,0 +1,5 @@ +{ + "flsol": [ + "flsol_task4_measure" + ] +} diff --git a/run_flsol_win7.sh b/run_flsol_win7.sh new file mode 100755 index 0000000..861d0d3 --- /dev/null +++ b/run_flsol_win7.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# ============================================================================= +# FL Solutions for F-4600 评测脚本(Win7 网线直连版) +# provider: direct —— 直接访问 Flask 服务,无需任何 VM/SSH +# ============================================================================= + +# ---------- Win7 直连 IP ---------- +export DIRECT_VM_IP="192.168.1.11" # Win7 实机 Flask 地址 + +# ---------- LLM API 配置 ---------- +export OPENAI_API_KEY="sk-EQGuvk0rS7EG4Cu22cF6D5Cc3a324c88B2E2D432Bc59Bb17" +export OPENAI_BASE_URL="https://vip.apiyi.com/v1" + +# ---------- 评测参数(对齐 run_proxmox.sh)---------- +MODEL="gpt-5.4" +EVAL_MODEL="gemini-3.1-pro-preview" +MAX_STEPS=50 +SLEEP_AFTER_EXEC=3 +TEMPERATURE=0 +TOP_P=0.9 +MAX_TOKENS=16384 +MAX_TRAJECTORY_LENGTH=3 +OBSERVATION_TYPE="screenshot_a11y_tree" +ACTION_SPACE="pyautogui" +SCREEN_WIDTH=1280 +SCREEN_HEIGHT=1024 +RESULT_DIR="/Users/lizhanyuan/Downloads/results2/flsol" +TEST_META="evaluation_examples/test_flsol.json" +DOMAIN="flsol" +INJECT_STEPS=true + +# ---------- 预检查 ---------- +cd "$(dirname "$0")" + +echo "=== FL Solutions F-4600 评测预检查 ===" +echo "" + +echo -n "Flask Server (${DIRECT_VM_IP}:5000)... " +HTTP_CODE=$(curl -s --connect-timeout 5 "http://${DIRECT_VM_IP}:5000/screenshot" \ + -o /dev/null -w "%{http_code}" 2>/dev/null) +if [ "$HTTP_CODE" = "200" ]; then + echo "OK" +else + echo "FAIL (HTTP ${HTTP_CODE})" + echo "[ERROR] Win7 Flask Server 不可达,请先在 Win7 运行: python D:\python_server\main.py" + exit 1 +fi + +mkdir -p "${RESULT_DIR}" logs + +echo "" +echo "=== 开始评测 ===" +echo " Provider: direct (无 VM 管理,直连 Flask)" +echo " Win7 IP: ${DIRECT_VM_IP}" +echo " Model: ${MODEL}" +echo " Eval: ${EVAL_MODEL}" +echo " Task: flsol_task4_measure" +echo " Obs Type: ${OBSERVATION_TYPE} (screenshot only, Win7 a11y unstable)" +echo " Max Steps: ${MAX_STEPS}" +echo " Max Tokens: ${MAX_TOKENS}" +echo " Results: ${RESULT_DIR}" +echo "" + +if [ "${INJECT_STEPS}" = true ]; then + INJECT_FLAG="--inject_steps" +else + INJECT_FLAG="--no_inject_steps" +fi + +python3 run.py \ + --provider_name "direct" \ + --path_to_vm "ignored" \ + --observation_type "${OBSERVATION_TYPE}" \ + --action_space "${ACTION_SPACE}" \ + --model "${MODEL}" \ + --eval_model "${EVAL_MODEL}" \ + --temperature "${TEMPERATURE}" \ + --top_p "${TOP_P}" \ + --max_tokens "${MAX_TOKENS}" \ + --max_trajectory_length "${MAX_TRAJECTORY_LENGTH}" \ + --screen_width "${SCREEN_WIDTH}" \ + --screen_height "${SCREEN_HEIGHT}" \ + --sleep_after_execution "${SLEEP_AFTER_EXEC}" \ + --max_steps "${MAX_STEPS}" \ + --result_dir "${RESULT_DIR}" \ + --test_all_meta_path "${TEST_META}" \ + --domain "${DOMAIN}" \ + ${INJECT_FLAG} + +echo "" +echo "=== 评测完成 ===" +echo "结果保存在: ${RESULT_DIR}"