diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py
index d27aa00..f9c17ec 100644
--- a/desktop_env/desktop_env.py
+++ b/desktop_env/desktop_env.py
@@ -164,6 +164,9 @@ class DesktopEnv(gym.Env):
             self.is_environment_used = False
         elif self.provider_name in {"vmware", "virtualbox", "proxmox"}:
             self.is_environment_used = True
+        elif self.provider_name == "direct":
+            # Physical machine: never revert snapshot, never stop emulator
+            self.is_environment_used = False
         else:
             raise ValueError(f"Invalid provider name: {self.provider_name}")
 
diff --git a/desktop_env/providers/__init__.py b/desktop_env/providers/__init__.py
index 1555d83..06a9ae7 100644
--- a/desktop_env/providers/__init__.py
+++ b/desktop_env/providers/__init__.py
@@ -43,5 +43,9 @@ def create_vm_manager_and_provider(provider_name: str, region: str, use_proxy: b
         from desktop_env.providers.proxmox.manager import ProxmoxVMManager
         from desktop_env.providers.proxmox.provider import ProxmoxProvider
         return ProxmoxVMManager(), ProxmoxProvider(region)
+    elif provider_name == "direct":
+        from desktop_env.providers.direct.manager import DirectVMManager
+        from desktop_env.providers.direct.provider import DirectProvider
+        return DirectVMManager(), DirectProvider(region)
     else:
         raise NotImplementedError(f"{provider_name} not implemented!")
diff --git a/desktop_env/providers/direct/__init__.py b/desktop_env/providers/direct/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/desktop_env/providers/direct/manager.py b/desktop_env/providers/direct/manager.py
new file mode 100644
index 0000000..26fc8a6
--- /dev/null
+++ b/desktop_env/providers/direct/manager.py
@@ -0,0 +1,32 @@
+import logging
+from desktop_env.providers.base import VMManager
+
+logger = logging.getLogger("desktopenv.providers.direct.DirectVMManager")
+
+
+class DirectVMManager(VMManager):
+    """No-op manager for direct-IP connections (physical machines / bare-metal)."""
+
+    def __init__(self, registry_path=""):
+        pass
+
+    def initialize_registry(self, **kwargs):
+        pass
+
+    def add_vm(self, vm_path, **kwargs):
+        pass
+
+    def delete_vm(self, vm_path, **kwargs):
+        pass
+
+    def occupy_vm(self, vm_path, pid, **kwargs):
+        pass
+
+    def list_free_vms(self, **kwargs):
+        return []
+
+    def check_and_clean(self, **kwargs):
+        pass
+
+    def get_vm_path(self, os_type="Windows", region=None, screen_size=(1920, 1080), **kwargs):
+        return os.environ.get("DIRECT_VM_IP", "192.168.1.11")
diff --git a/desktop_env/providers/direct/provider.py b/desktop_env/providers/direct/provider.py
new file mode 100644
index 0000000..af46db5
--- /dev/null
+++ b/desktop_env/providers/direct/provider.py
@@ -0,0 +1,63 @@
+import logging
+import os
+import time
+
+import requests
+
+from desktop_env.providers.base import Provider
+
+logger = logging.getLogger("desktopenv.providers.direct.DirectProvider")
+logger.setLevel(logging.INFO)
+
+RETRY_INTERVAL = 3
+MAX_WAIT_READY = 60
+
+
+class DirectProvider(Provider):
+    """
+    Provider for directly connected machines (physical / bare-metal).
+    No VM lifecycle management — the machine is assumed to be already running.
+    The Flask server IP is read from the environment variable DIRECT_VM_IP.
+
+    Usage:
+        export DIRECT_VM_IP=192.168.1.11
+        python run.py --provider_name direct --path_to_vm ignored ...
+    """
+
+    def __init__(self, region: str = None):
+        super().__init__(region)
+        self.vm_ip = os.environ.get("DIRECT_VM_IP", "192.168.1.11")
+
+    def start_emulator(self, path_to_vm: str, headless: bool, os_type: str = "Windows"):
+        """No-op: machine is already on. Just verify Flask server is reachable."""
+        logger.info(f"[direct] Using physical machine at {self.vm_ip}:5000 (no VM lifecycle)")
+        self._wait_for_vm_ready(self.vm_ip)
+
+    def _wait_for_vm_ready(self, ip: str, timeout: int = MAX_WAIT_READY) -> bool:
+        url = f"http://{ip}:5000/screenshot"
+        deadline = time.time() + timeout
+        while time.time() < deadline:
+            try:
+                r = requests.get(url, timeout=5)
+                if r.status_code == 200:
+                    logger.info(f"[direct] Flask server ready at {url}")
+                    return True
+            except Exception:
+                pass
+            logger.info(f"[direct] Waiting for Flask server at {url}...")
+            time.sleep(RETRY_INTERVAL)
+        logger.warning(f"[direct] Flask server at {url} not ready within {timeout}s — continuing anyway")
+        return False
+
+    def get_ip_address(self, path_to_vm: str) -> str:
+        return self.vm_ip
+
+    def save_state(self, path_to_vm: str, snapshot_name: str):
+        logger.info("[direct] save_state: no-op (physical machine)")
+
+    def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str) -> str:
+        logger.info("[direct] revert_to_snapshot: no-op (physical machine)")
+        return path_to_vm
+
+    def stop_emulator(self, path_to_vm: str):
+        logger.info("[direct] stop_emulator: no-op (physical machine)")
diff --git a/desktop_env/server_win7/extract_and_install.bat b/desktop_env/server_win7/extract_and_install.bat
new file mode 100644
index 0000000..8606d41
--- /dev/null
+++ b/desktop_env/server_win7/extract_and_install.bat
@@ -0,0 +1,29 @@
+@echo off
+echo ========================================
+echo  Unzip + Install (Win7 32bit Python3.8)
+echo ========================================
+echo.
+
+set "ZIPFILE=%~dp0win7_server_packages.zip"
+set "DESTDIR=%~dp0"
+
+if not exist "%ZIPFILE%" (
+    echo [ERROR] win7_server_packages.zip not found!
+    echo Please put this bat and the zip in the same folder.
+    pause
+    exit /b 1
+)
+
+echo [1/3] Unzipping via Shell.Application (Win7 compatible) ...
+powershell -NoProfile -ExecutionPolicy Bypass -Command "$s=New-Object -ComObject Shell.Application; $d=$s.NameSpace('%DESTDIR%'); $z=$s.NameSpace('%ZIPFILE%'); $d.CopyHere($z.Items(), 20); Start-Sleep -s 5"
+
+echo.
+if not exist "%~dp0win7_offline_packages\install_win7.bat" (
+    echo [ERROR] Unzip failed. Folder win7_offline_packages not found.
+    pause
+    exit /b 1
+)
+
+echo [2/3] Unzip OK. Starting install ...
+echo.
+call "%~dp0win7_offline_packages\install_win7.bat"
diff --git a/desktop_env/server_win7/main.py b/desktop_env/server_win7/main.py
new file mode 100644
index 0000000..bc9e9be
--- /dev/null
+++ b/desktop_env/server_win7/main.py
@@ -0,0 +1,2008 @@
+import ctypes
+import os
+import platform
+import shlex
+import json
+import subprocess, signal
+import sys
+import time
+from pathlib import Path
+from typing import Any, Optional, Sequence
+from typing import List, Dict, Tuple, Literal
+import concurrent.futures
+
+import lxml.etree
+import pyautogui
+import requests
+import re
+from PIL import Image, ImageGrab
+from flask import Flask, request, jsonify, send_file, abort  # , send_from_directory
+from lxml.etree import _Element
+
+platform_name: str = platform.system()
+
+if platform_name == "Linux":
+    import Xlib
+    from Xlib import display, X
+    from pyxcursor import Xcursor
+    import pyatspi
+    from pyatspi import Accessible, StateType, STATE_SHOWING
+    from pyatspi import Action as ATAction
+    from pyatspi import Component  # , Document
+    from pyatspi import Text as ATText
+    from pyatspi import Value as ATValue
+
+    BaseWrapper = Any
+
+elif platform_name == "Windows":
+    from pywinauto import Desktop
+    from pywinauto.base_wrapper import BaseWrapper
+    import pywinauto.application
+    import win32ui, win32gui
+
+    Accessible = Any
+    Xlib = None
+    display = None
+    X = None
+    Xcursor = None
+
+elif platform_name == "Darwin":
+    import plistlib
+    from pyxcursor import Xcursor
+
+    import AppKit
+    import ApplicationServices
+    import Foundation
+    import Quartz
+    import oa_atomacos
+
+    Accessible = Any
+    BaseWrapper = Any
+    Xlib = None
+
+else:
+    # Platform not supported
+    Accessible = None
+    BaseWrapper = Any
+    Xlib = None
+    display = None
+    X = None
+    Xcursor = None
+
+# todo: need to reformat and organize this whole file
+
+app = Flask(__name__)
+
+pyautogui.PAUSE = 0
+pyautogui.DARWIN_CATCH_UP_TIME = 0
+
+TIMEOUT = 1800  # seconds
+
+logger = app.logger
+recording_process = None  # fixme: this is a temporary solution for recording, need to be changed to support multiple-process
+recording_path = "/tmp/recording.mp4"
+
+
+@app.route('/setup/execute', methods=['POST'])
+@app.route('/execute', methods=['POST'])
+def execute_command():
+    data = request.json
+    # The 'command' key in the JSON request should contain the command to be executed.
+    shell = data.get('shell', False)
+    command = data.get('command', "" if shell else [])
+
+    if isinstance(command, str) and not shell:
+        command = shlex.split(command)
+
+    # Expand user directory
+    for i, arg in enumerate(command):
+        if arg.startswith("~/"):
+            command[i] = os.path.expanduser(arg)
+
+    # Replace 'python' with sys.executable to use the same Python interpreter as the server
+    if len(command) > 0 and command[0] in ['python', 'python3', 'python.exe', 'python3.exe']:
+        command[0] = sys.executable
+
+    # On Windows, if the command is `python -c "<code>"`, exec() the code directly in this
+    # process instead of spawning a subprocess.  Subprocesses launched from a windowless
+    # pythonw.exe parent cannot access the interactive desktop, so pyautogui calls fail
+    # with a 500 inside a subprocess.  Running exec() in the Flask process is safe because
+    # the Flask server itself already has desktop access (proven by /screenshot working).
+    if (platform_name == "Windows"
+            and not shell
+            and len(command) >= 3
+            and command[0] == sys.executable
+            and command[1] == "-c"):
+        code_str = command[2]
+        import io
+        from contextlib import redirect_stdout, redirect_stderr
+        stdout_buf = io.StringIO()
+        stderr_buf = io.StringIO()
+        try:
+            exec_globals = {"__builtins__": __builtins__}
+            with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf):
+                exec(compile(code_str, "<pyautogui_exec>", "exec"), exec_globals)
+            return jsonify({
+                'status': 'success',
+                'output': stdout_buf.getvalue(),
+                'error': stderr_buf.getvalue(),
+                'returncode': 0
+            })
+        except Exception as e:
+            import traceback
+            return jsonify({
+                'status': 'error',
+                'output': stdout_buf.getvalue(),
+                'error': traceback.format_exc(),
+                'returncode': 1
+            })
+
+    # Execute the command without any safety checks.
+    try:
+        if platform_name == "Windows":
+            flags = subprocess.CREATE_NO_WINDOW
+        else:
+            flags = 0
+        result = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=shell,
+            text=True,
+            timeout=120,
+            creationflags=flags,
+        )
+        return jsonify({
+            'status': 'success',
+            'output': result.stdout,
+            'error': result.stderr,
+            'returncode': result.returncode
+        })
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': str(e)
+        }), 500
+
+
+@app.route('/setup/execute_with_verification', methods=['POST'])
+@app.route('/execute_with_verification', methods=['POST'])
+def execute_command_with_verification():
+    """Execute command and verify the result based on provided verification criteria"""
+    data = request.json
+    shell = data.get('shell', False)
+    command = data.get('command', "" if shell else [])
+    verification = data.get('verification', {})
+    max_wait_time = data.get('max_wait_time', 10)  # Maximum wait time in seconds
+    check_interval = data.get('check_interval', 1)  # Check interval in seconds
+
+    if isinstance(command, str) and not shell:
+        command = shlex.split(command)
+
+    # Expand user directory
+    for i, arg in enumerate(command):
+        if arg.startswith("~/"):
+            command[i] = os.path.expanduser(arg)
+
+    # Execute the main command
+    try:
+        if platform_name == "Windows":
+            flags = subprocess.CREATE_NO_WINDOW
+        else:
+            flags = 0
+        result = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=shell,
+            text=True,
+            timeout=120,
+            creationflags=flags,
+        )
+
+        # If no verification is needed, return immediately
+        if not verification:
+            return jsonify({
+                'status': 'success',
+                'output': result.stdout,
+                'error': result.stderr,
+                'returncode': result.returncode
+            })
+
+        # Wait and verify the result
+        import time
+        start_time = time.time()
+        while time.time() - start_time < max_wait_time:
+            verification_passed = True
+
+            # Check window existence if specified
+            if 'window_exists' in verification:
+                window_name = verification['window_exists']
+                try:
+                    if platform_name == 'Linux':
+                        wmctrl_result = subprocess.run(['wmctrl', '-l'],
+                                                     capture_output=True, text=True, check=True)
+                        if window_name.lower() not in wmctrl_result.stdout.lower():
+                            verification_passed = False
+                    elif platform_name in ['Windows', 'Darwin']:
+                        import pygetwindow as gw
+                        windows = gw.getWindowsWithTitle(window_name)
+                        if not windows:
+                            verification_passed = False
+                except Exception:
+                    verification_passed = False
+
+            # Check command execution if specified
+            if 'command_success' in verification:
+                verify_cmd = verification['command_success']
+                try:
+                    verify_result = subprocess.run(verify_cmd, shell=True,
+                                                 capture_output=True, text=True, timeout=5)
+                    if verify_result.returncode != 0:
+                        verification_passed = False
+                except Exception:
+                    verification_passed = False
+
+            if verification_passed:
+                return jsonify({
+                    'status': 'success',
+                    'output': result.stdout,
+                    'error': result.stderr,
+                    'returncode': result.returncode,
+                    'verification': 'passed',
+                    'wait_time': time.time() - start_time
+                })
+
+            time.sleep(check_interval)
+
+        # Verification failed
+        return jsonify({
+            'status': 'verification_failed',
+            'output': result.stdout,
+            'error': result.stderr,
+            'returncode': result.returncode,
+            'verification': 'failed',
+            'wait_time': max_wait_time
+        }), 500
+
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': str(e)
+        }), 500
+
+
+def _get_machine_architecture() -> str:
+    """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
+    """
+    architecture = platform.machine().lower()
+    if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']:
+        return 'amd'
+    elif architecture in ['arm64', 'aarch64', 'aarch32']:
+        return 'arm'
+    else:
+        return 'unknown'
+
+
+@app.route('/setup/launch', methods=["POST"])
+def launch_app():
+    data = request.json
+    shell = data.get("shell", False)
+    command: List[str] = data.get("command", "" if shell else [])
+
+    if isinstance(command, str) and not shell:
+        command = shlex.split(command)
+
+    # Expand user directory
+    for i, arg in enumerate(command):
+        if arg.startswith("~/"):
+            command[i] = os.path.expanduser(arg)
+
+    try:
+        if 'google-chrome' in command and _get_machine_architecture() == 'arm':
+            index = command.index('google-chrome')
+            command[index] = 'chromium'  # arm64 chrome is not available yet, can only use chromium
+
+        # On Windows, use os.startfile() for direct exe/file launches (equivalent to double-click).
+        # Popen lacks the Shell context (ShellExecute) that some apps require to initialize correctly.
+        if platform_name == "Windows" and not shell:
+            exe = command[0] if isinstance(command, list) else command
+            exe_dir = os.path.dirname(exe)
+            # os.startfile uses ShellExecuteEx — same as Explorer double-click
+            os.startfile(exe)
+            return "{:} launched successfully via startfile".format(exe)
+
+        # Set cwd to the exe's directory so apps that rely on relative paths work correctly
+        cwd = None
+        if not shell and isinstance(command, list) and len(command) > 0:
+            exe_path = command[0]
+            exe_dir = os.path.dirname(exe_path)
+            if exe_dir and os.path.isdir(exe_dir):
+                cwd = exe_dir
+        subprocess.Popen(command, shell=shell, cwd=cwd)
+        return "{:} launched successfully".format(command if shell else " ".join(command))
+    except Exception as e:
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
+@app.route('/screenshot', methods=['GET'])
+def capture_screen_with_cursor():
+    file_path = os.path.join(os.path.dirname(__file__), "screenshots", "screenshot.png")
+    user_platform = platform.system()
+
+    # Ensure the screenshots directory exists
+    os.makedirs(os.path.dirname(file_path), exist_ok=True)
+
+    if user_platform == "Windows":
+        def get_cursor():
+            hcursor = win32gui.GetCursorInfo()[1]
+            hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0))
+            hbmp = win32ui.CreateBitmap()
+            hbmp.CreateCompatibleBitmap(hdc, 36, 36)
+            hdc = hdc.CreateCompatibleDC()
+            hdc.SelectObject(hbmp)
+            hdc.DrawIcon((0,0), hcursor)
+
+            bmpinfo = hbmp.GetInfo()
+            bmpstr = hbmp.GetBitmapBits(True)
+            cursor = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1).convert("RGBA")
+
+            win32gui.DestroyIcon(hcursor)
+            win32gui.DeleteObject(hbmp.GetHandle())
+            hdc.DeleteDC()
+
+            pixdata = cursor.load()
+
+            width, height = cursor.size
+            for y in range(height):
+                for x in range(width):
+                    if pixdata[x, y] == (0, 0, 0, 255):
+                        pixdata[x, y] = (0, 0, 0, 0)
+
+            hotspot = win32gui.GetIconInfo(hcursor)[1:3]
+
+            return (cursor, hotspot)
+
+        # Win8.1+ uses shcore; Win7 fallback uses GetDeviceCaps via GDI
+        try:
+            ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100
+        except (OSError, AttributeError):
+            hdc = ctypes.windll.user32.GetDC(0)
+            logical_dpi = ctypes.windll.gdi32.GetDeviceCaps(hdc, 88)  # LOGPIXELSX
+            ctypes.windll.user32.ReleaseDC(0, hdc)
+            ratio = logical_dpi / 96.0  # 96 DPI == 100% scaling
+
+        # get logical screen size
+        user32 = ctypes.windll.user32
+        logical_width = user32.GetSystemMetrics(0)
+        logical_height = user32.GetSystemMetrics(1)
+
+        # ===== Key fix: get cursor position before taking screenshot =====
+        # win32gui.GetCursorPos() returns logical coordinates (consistent with pyautogui)
+        pos_win = win32gui.GetCursorPos()
+        logger.info(f"Cursor position (logical coordinates): {pos_win}")
+
+        # Take screenshot immediately to reduce time difference
+        img = ImageGrab.grab(bbox=None, include_layered_windows=True)
+        # =============================================
+
+        # ===== DPI scaling fix =====
+        if ratio != 1.0:
+            physical_width, physical_height = img.size
+            logger.info(f"Detected DPI scaling: {ratio}x ({ratio*100}%)")
+            logger.info(f"Physical screenshot size: {physical_width}x{physical_height}")
+            logger.info(f"Logical resolution: {logical_width}x{logical_height}")
+            logger.info(f"Resizing screenshot to match logical resolution...")
+            img = img.resize((logical_width, logical_height), Image.Resampling.LANCZOS)
+            logger.info(f"Screenshot resized to: {img.size}")
+        # ==========================
+
+        try:
+            cursor, (hotspotx, hotspoty) = get_cursor()
+
+            # ===== Cursor position handling =====
+            # win32gui.GetCursorPos() and pyautogui both use logical coordinates
+            # The screenshot has been resized to logical resolution, so use directly
+            logical_cursor_x = pos_win[0]
+            logical_cursor_y = pos_win[1]
+
+            pos = (logical_cursor_x - hotspotx, logical_cursor_y - hotspoty)
+
+            logger.info(f"Cursor position (logical coordinates): ({logical_cursor_x}, {logical_cursor_y})")
+            logger.info(f"Hotspot offset: ({hotspotx}, {hotspoty})")
+            logger.info(f"Final paste position: {pos}")
+            # ===================================
+
+            img.paste(cursor, pos, cursor)
+        except Exception as e:
+            logger.warning(f"Failed to capture cursor on Windows, screenshot will not include cursor. Error: {e}")
+
+        img.save(file_path)
+
+    elif user_platform == "Linux":
+        cursor_obj = Xcursor()
+        imgarray = cursor_obj.getCursorImageArrayFast()
+        cursor_img = Image.fromarray(imgarray)
+        screenshot = pyautogui.screenshot()
+        cursor_x, cursor_y = pyautogui.position()
+        screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img)
+        screenshot.save(file_path)
+
+    elif user_platform == "Darwin":  # (Mac OS)
+        subprocess.run(["screencapture", "-C", file_path])
+
+    else:
+        logger.warning(f"The platform you're using ({user_platform}) is not currently supported")
+
+    return send_file(file_path, mimetype='image/png')
+
+
+
+def _has_active_terminal(desktop: Accessible) -> bool:
+    """ A quick check whether the terminal window is open and active (Linux only).
+    """
+    for app in desktop:
+        if app.getRoleName() == "application" and app.name == "gnome-terminal-server":
+            for frame in app:
+                if frame.getRoleName() == "frame" and frame.getState().contains(pyatspi.STATE_ACTIVE):
+                    return True
+    return False
+
+
+def _get_windows_terminal_output() -> Optional[str]:
+    """ Get terminal output on Windows platform.
+    Supports Windows Terminal, PowerShell, Command Prompt, and ConHost.
+    """
+    try:
+        from pywinauto import Desktop
+        from pywinauto.findwindows import ElementNotFoundError
+
+        desktop = Desktop(backend="uia")
+
+        # Common terminal applications on Windows
+        terminal_apps = [
+            "WindowsTerminal.exe",  # Windows Terminal
+            "powershell.exe",       # PowerShell
+            "pwsh.exe",             # PowerShell Core
+            "cmd.exe",              # Command Prompt
+            "conhost.exe"           # Console Host
+        ]
+
+        # Try to find active terminal windows
+        for window in desktop.windows():
+            try:
+                # Check if window is visible and not minimized
+                if not window.is_visible() or window.is_minimized():
+                    continue
+
+                # Get window process name
+                process_name = window.element_info.name.lower()
+
+                # Check if this is a terminal window
+                is_terminal = False
+                for term_app in terminal_apps:
+                    if term_app.lower() in process_name or \
+                       any(term_name in process_name for term_name in ['terminal', 'powershell', 'command prompt', 'cmd']):
+                        is_terminal = True
+                        break
+
+                if not is_terminal:
+                    continue
+
+                # Try to get text content from the terminal
+                # First, try to find console/edit controls that contain the output
+                try:
+                    # For Windows Terminal and modern consoles
+                    # Look for Edit or Document controls that contain the text
+                    text_controls = window.descendants(control_type="Edit")
+                    if not text_controls:
+                        text_controls = window.descendants(control_type="Document")
+                    if not text_controls:
+                        text_controls = window.descendants(control_type="Text")
+
+                    for control in text_controls:
+                        try:
+                            text = control.window_text()
+                            if text and len(text.strip()) > 0:
+                                return text.rstrip()
+                        except:
+                            pass
+
+                    # If no text controls found, try to get the window text directly
+                    window_text = window.window_text()
+                    if window_text and len(window_text.strip()) > 0:
+                        # Filter out just the window title
+                        if window_text not in ['Windows PowerShell', 'Command Prompt', 'PowerShell', 'Administrator: Windows PowerShell']:
+                            return window_text.rstrip()
+
+                except Exception as e:
+                    logger.debug(f"Error getting text from window {process_name}: {e}")
+                    continue
+
+            except Exception as e:
+                logger.debug(f"Error processing window: {e}")
+                continue
+
+        return None
+
+    except Exception as e:
+        logger.error(f"Error in _get_windows_terminal_output: {e}")
+        return None
+
+
+@app.route('/terminal', methods=['GET'])
+def get_terminal_output():
+    user_platform = platform.system()
+    output: Optional[str] = None
+    try:
+        if user_platform == "Linux":
+            desktop: Accessible = pyatspi.Registry.getDesktop(0)
+            if _has_active_terminal(desktop):
+                desktop_xml: _Element = _create_atspi_node(desktop)
+                # 1. the terminal window (frame of application is st:active) is open and active
+                # 2. the terminal tab (terminal status is st:focused) is focused
+                xpath = '//application[@name="gnome-terminal-server"]/frame[@st:active="true"]//terminal[@st:focused="true"]'
+                terminals: List[_Element] = desktop_xml.xpath(xpath, namespaces=_accessibility_ns_map_ubuntu)
+                output = terminals[0].text.rstrip() if len(terminals) == 1 else None
+        elif user_platform == "Windows":
+            output = _get_windows_terminal_output()
+            logger.debug(f"Terminal output retrieved: {output}")
+        else:  # macOS platform is not implemented currently
+            return "Currently not implemented for platform {:}.".format(platform.platform()), 500
+        return jsonify({"output": output, "status": "success"})
+    except Exception as e:
+        logger.error("Failed to get terminal output. Error: %s", e)
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
+_accessibility_ns_map = {
+    "ubuntu": {
+        "st": "https://accessibility.ubuntu.example.org/ns/state",
+        "attr": "https://accessibility.ubuntu.example.org/ns/attributes",
+        "cp": "https://accessibility.ubuntu.example.org/ns/component",
+        "doc": "https://accessibility.ubuntu.example.org/ns/document",
+        "docattr": "https://accessibility.ubuntu.example.org/ns/document/attributes",
+        "txt": "https://accessibility.ubuntu.example.org/ns/text",
+        "val": "https://accessibility.ubuntu.example.org/ns/value",
+        "act": "https://accessibility.ubuntu.example.org/ns/action",
+    },
+    "windows": {
+        "st": "https://accessibility.windows.example.org/ns/state",
+        "attr": "https://accessibility.windows.example.org/ns/attributes",
+        "cp": "https://accessibility.windows.example.org/ns/component",
+        "doc": "https://accessibility.windows.example.org/ns/document",
+        "docattr": "https://accessibility.windows.example.org/ns/document/attributes",
+        "txt": "https://accessibility.windows.example.org/ns/text",
+        "val": "https://accessibility.windows.example.org/ns/value",
+        "act": "https://accessibility.windows.example.org/ns/action",
+        "class": "https://accessibility.windows.example.org/ns/class"
+    },
+    "macos": {
+        "st": "https://accessibility.macos.example.org/ns/state",
+        "attr": "https://accessibility.macos.example.org/ns/attributes",
+        "cp": "https://accessibility.macos.example.org/ns/component",
+        "doc": "https://accessibility.macos.example.org/ns/document",
+        "txt": "https://accessibility.macos.example.org/ns/text",
+        "val": "https://accessibility.macos.example.org/ns/value",
+        "act": "https://accessibility.macos.example.org/ns/action",
+        "role": "https://accessibility.macos.example.org/ns/role",
+    }
+
+}
+
+_accessibility_ns_map_ubuntu = _accessibility_ns_map['ubuntu']
+_accessibility_ns_map_windows = _accessibility_ns_map['windows']
+_accessibility_ns_map_macos = _accessibility_ns_map['macos']
+
+# A11y tree getter for Ubuntu
+libreoffice_version_tuple: Optional[Tuple[int, ...]] = None
+MAX_DEPTH = 50
+MAX_WIDTH = 1024
+MAX_CALLS = 5000
+
+
+def _get_libreoffice_version() -> Tuple[int, ...]:
+    """Function to get the LibreOffice version as a tuple of integers."""
+    result = subprocess.run("libreoffice --version", shell=True, text=True, stdout=subprocess.PIPE)
+    version_str = result.stdout.split()[1]  # Assuming version is the second word in the command output
+    return tuple(map(int, version_str.split(".")))
+
+
+def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = None) -> _Element:
+    node_name = node.name
+    attribute_dict: Dict[str, Any] = {"name": node_name}
+
+    #  States
+    states: List[StateType] = node.getState().get_states()
+    for st in states:
+        state_name: str = StateType._enum_lookup[st]
+        state_name: str = state_name.split("_", maxsplit=1)[1].lower()
+        if len(state_name) == 0:
+            continue
+        attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["st"], state_name)] = "true"
+
+    #  Attributes
+    attributes: Dict[str, str] = node.get_attributes()
+    for attribute_name, attribute_value in attributes.items():
+        if len(attribute_name) == 0:
+            continue
+        attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["attr"], attribute_name)] = attribute_value
+
+    #  Component
+    if attribute_dict.get("{{{:}}}visible".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true" \
+            and attribute_dict.get("{{{:}}}showing".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true":
+        try:
+            component: Component = node.queryComponent()
+        except NotImplementedError:
+            pass
+        else:
+            bbox: Sequence[int] = component.getExtents(pyatspi.XY_SCREEN)
+            attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_ubuntu["cp"])] = \
+                str(tuple(bbox[0:2]))
+            attribute_dict["{{{:}}}size".format(_accessibility_ns_map_ubuntu["cp"])] = str(tuple(bbox[2:]))
+
+    text = ""
+    #  Text
+    try:
+        text_obj: ATText = node.queryText()
+        # only text shown on current screen is available
+        # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
+        text: str = text_obj.getText(0, text_obj.characterCount)
+        # if flag=="thunderbird":
+        # appeared in thunderbird (uFFFC) (not only in thunderbird), "Object
+        # Replacement Character" in Unicode, "used as placeholder in text for
+        # an otherwise unspecified object; uFFFD is another "Replacement
+        # Character", just in case
+        text = text.replace("\ufffc", "").replace("\ufffd", "")
+    except NotImplementedError:
+        pass
+
+    #  Image, Selection, Value, Action
+    try:
+        node.queryImage()
+        attribute_dict["image"] = "true"
+    except NotImplementedError:
+        pass
+
+    try:
+        node.querySelection()
+        attribute_dict["selection"] = "true"
+    except NotImplementedError:
+        pass
+
+    try:
+        value: ATValue = node.queryValue()
+        value_key = f"{{{_accessibility_ns_map_ubuntu['val']}}}"
+
+        for attr_name, attr_func in [
+            ("value", lambda: value.currentValue),
+            ("min", lambda: value.minimumValue),
+            ("max", lambda: value.maximumValue),
+            ("step", lambda: value.minimumIncrement)
+        ]:
+            try:
+                attribute_dict[f"{value_key}{attr_name}"] = str(attr_func())
+            except:
+                pass
+    except NotImplementedError:
+        pass
+
+    try:
+        action: ATAction = node.queryAction()
+        for i in range(action.nActions):
+            action_name: str = action.getName(i).replace(" ", "-")
+            attribute_dict[
+                "{{{:}}}{:}_desc".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getDescription(
+                i)
+            attribute_dict[
+                "{{{:}}}{:}_kb".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getKeyBinding(i)
+    except NotImplementedError:
+        pass
+
+    # Add from here if we need more attributes in the future...
+
+    raw_role_name: str = node.getRoleName().strip()
+    node_role_name = (raw_role_name or "unknown").replace(" ", "-")
+
+    if not flag:
+        if raw_role_name == "document spreadsheet":
+            flag = "calc"
+        if raw_role_name == "application" and node.name == "Thunderbird":
+            flag = "thunderbird"
+
+    xml_node = lxml.etree.Element(
+        node_role_name,
+        attrib=attribute_dict,
+        nsmap=_accessibility_ns_map_ubuntu
+    )
+
+    if len(text) > 0:
+        xml_node.text = text
+
+    if depth == MAX_DEPTH:
+        logger.warning("Max depth reached")
+        return xml_node
+
+    if flag == "calc" and node_role_name == "table":
+        # Maximum column: 1024 if ver<=7.3 else 16384
+        # Maximum row: 104 8576
+        # Maximun sheet: 1 0000
+
+        global libreoffice_version_tuple
+        MAXIMUN_COLUMN = 1024 if libreoffice_version_tuple < (7, 4) else 16384
+        MAX_ROW = 104_8576
+
+        index_base = 0
+        first_showing = False
+        column_base = None
+        for r in range(MAX_ROW):
+            for clm in range(column_base or 0, MAXIMUN_COLUMN):
+                child_node: Accessible = node[index_base + clm]
+                showing: bool = child_node.getState().contains(STATE_SHOWING)
+                if showing:
+                    child_node: _Element = _create_atspi_node(child_node, depth + 1, flag)
+                    if not first_showing:
+                        column_base = clm
+                        first_showing = True
+                    xml_node.append(child_node)
+                elif first_showing and column_base is not None or clm >= 500:
+                    break
+            if first_showing and clm == column_base or not first_showing and r >= 500:
+                break
+            index_base += MAXIMUN_COLUMN
+        return xml_node
+    else:
+        try:
+            for i, ch in enumerate(node):
+                if i == MAX_WIDTH:
+                    logger.warning("Max width reached")
+                    break
+                xml_node.append(_create_atspi_node(ch, depth + 1, flag))
+        except:
+            logger.warning("Error occurred during children traversing. Has Ignored. Node: %s",
+                           lxml.etree.tostring(xml_node, encoding="unicode"))
+        return xml_node
+
+
+# A11y tree getter for Windows
+def _create_pywinauto_node(node, nodes, depth: int = 0, flag: Optional[str] = None) -> _Element:
+    nodes = nodes or set()
+    if node in nodes:
+        return
+    nodes.add(node)
+
+    attribute_dict: Dict[str, Any] = {"name": node.element_info.name}
+
+    base_properties = {}
+    try:
+        base_properties.update(
+            node.get_properties())  # get all writable/not writable properties, but have bugs when landing on chrome and it's slower!
+    except:
+        logger.debug("Failed to call get_properties(), trying to get writable properites")
+        try:
+            _element_class = node.__class__
+
+            class TempElement(node.__class__):
+                writable_props = pywinauto.base_wrapper.BaseWrapper.writable_props
+
+            # Instantiate the subclass
+            node.__class__ = TempElement
+            # Retrieve properties using get_properties()
+            properties = node.get_properties()
+            node.__class__ = _element_class
+
+            base_properties.update(properties)  # only get all writable properties
+            logger.debug("get writable properties")
+        except Exception as e:
+            logger.error(e)
+            pass
+
+    # Count-cnt
+    for attr_name in ["control_count", "button_count", "item_count", "column_count"]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['cnt']}}}{attr_name}"] = base_properties[
+                attr_name].lower()
+        except:
+            pass
+
+    # Columns-cols
+    try:
+        attribute_dict[f"{{{_accessibility_ns_map_windows['cols']}}}columns"] = base_properties["columns"].lower()
+    except:
+        pass
+
+    # Id-id
+    for attr_name in ["control_id", "automation_id", "window_id"]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['id']}}}{attr_name}"] = base_properties[attr_name].lower()
+        except:
+            pass
+
+    #  States
+    # 19 sec out of 20
+    for attr_name, attr_func in [
+        ("enabled", lambda: node.is_enabled()),
+        ("visible", lambda: node.is_visible()),
+        # ("active", lambda: node.is_active()), # occupied most of the time: 20s out of 21s for slack, 51.5s out of 54s for WeChat # maybe use for cutting branches
+        ("minimized", lambda: node.is_minimized()),
+        ("maximized", lambda: node.is_maximized()),
+        ("normal", lambda: node.is_normal()),
+        ("unicode", lambda: node.is_unicode()),
+        ("collapsed", lambda: node.is_collapsed()),
+        ("checkable", lambda: node.is_checkable()),
+        ("checked", lambda: node.is_checked()),
+        ("focused", lambda: node.is_focused()),
+        ("keyboard_focused", lambda: node.is_keyboard_focused()),
+        ("selected", lambda: node.is_selected()),
+        ("selection_required", lambda: node.is_selection_required()),
+        ("pressable", lambda: node.is_pressable()),
+        ("pressed", lambda: node.is_pressed()),
+        ("expanded", lambda: node.is_expanded()),
+        ("editable", lambda: node.is_editable()),
+        ("has_keyboard_focus", lambda: node.has_keyboard_focus()),
+        ("is_keyboard_focusable", lambda: node.is_keyboard_focusable()),
+    ]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['st']}}}{attr_name}"] = str(attr_func()).lower()
+        except:
+            pass
+
+    #  Component
+    try:
+        rectangle = node.rectangle()
+        attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_windows["cp"])] = \
+            "({:d}, {:d})".format(rectangle.left, rectangle.top)
+        attribute_dict["{{{:}}}size".format(_accessibility_ns_map_windows["cp"])] = \
+            "({:d}, {:d})".format(rectangle.width(), rectangle.height())
+
+    except Exception as e:
+        logger.error("Error accessing rectangle: ", e)
+
+    #  Text
+    text: str = node.window_text()
+    if text == attribute_dict["name"]:
+        text = ""
+
+    #  Selection
+    if hasattr(node, "select"):
+        attribute_dict["selection"] = "true"
+
+    # Value
+    for attr_name, attr_funcs in [
+        ("step", [lambda: node.get_step()]),
+        ("value", [lambda: node.value(), lambda: node.get_value(), lambda: node.get_position()]),
+        ("min", [lambda: node.min_value(), lambda: node.get_range_min()]),
+        ("max", [lambda: node.max_value(), lambda: node.get_range_max()])
+    ]:
+        for attr_func in attr_funcs:
+            if hasattr(node, attr_func.__name__):
+                try:
+                    attribute_dict[f"{{{_accessibility_ns_map_windows['val']}}}{attr_name}"] = str(attr_func())
+                    break  # exit once the attribute is set successfully
+                except:
+                    pass
+
+    attribute_dict["{{{:}}}class".format(_accessibility_ns_map_windows["class"])] = str(type(node))
+
+    # class_name
+    for attr_name in ["class_name", "friendly_class_name"]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['class']}}}{attr_name}"] = base_properties[
+                attr_name].lower()
+        except:
+            pass
+
+    node_role_name: str = node.class_name().lower().replace(" ", "-")
+    node_role_name = "".join(
+        map(lambda _ch: _ch if _ch.isidentifier() or _ch in {"-"} or _ch.isalnum() else "-", node_role_name))
+
+    if node_role_name.strip() == "":
+        node_role_name = "unknown"
+    if not node_role_name[0].isalpha():
+        node_role_name = "tag" + node_role_name
+
+    xml_node = lxml.etree.Element(
+        node_role_name,
+        attrib=attribute_dict,
+        nsmap=_accessibility_ns_map_windows
+    )
+
+    if text is not None and len(text) > 0 and text != attribute_dict["name"]:
+        xml_node.text = text
+
+    if depth == MAX_DEPTH:
+        logger.warning("Max depth reached")
+        return xml_node
+
+    # use multi thread to accelerate children fetching
+    children = node.children()
+    if children:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            future_to_child = [executor.submit(_create_pywinauto_node, ch, nodes, depth + 1, flag) for ch in
+                               children[:MAX_WIDTH]]
+        try:
+            xml_node.extend([future.result() for future in concurrent.futures.as_completed(future_to_child)])
+        except Exception as e:
+            logger.error(f"Exception occurred: {e}")
+    return xml_node
+
+
+# A11y tree getter for macOS
+
+def _create_axui_node(node, nodes: set = None, depth: int = 0, bbox: tuple = None):
+    nodes = nodes or set()
+    if node in nodes:
+        return
+    nodes.add(node)
+
+    reserved_keys = {
+        "AXEnabled": "st",
+        "AXFocused": "st",
+        "AXFullScreen": "st",
+        "AXTitle": "attr",
+        "AXChildrenInNavigationOrder": "attr",
+        "AXChildren": "attr",
+        "AXFrame": "attr",
+        "AXRole": "role",
+        "AXHelp": "attr",
+        "AXRoleDescription": "role",
+        "AXSubrole": "role",
+        "AXURL": "attr",
+        "AXValue": "val",
+        "AXDescription": "attr",
+        "AXDOMIdentifier": "attr",
+        "AXSelected": "st",
+        "AXInvalid": "st",
+        "AXRows": "attr",
+        "AXColumns": "attr",
+    }
+    attribute_dict = {}
+
+    if depth == 0:
+        bbox = (
+            node["kCGWindowBounds"]["X"],
+            node["kCGWindowBounds"]["Y"],
+            node["kCGWindowBounds"]["X"] + node["kCGWindowBounds"]["Width"],
+            node["kCGWindowBounds"]["Y"] + node["kCGWindowBounds"]["Height"]
+        )
+        app_ref = ApplicationServices.AXUIElementCreateApplication(node["kCGWindowOwnerPID"])
+
+        attribute_dict["name"] = node["kCGWindowOwnerName"]
+        if attribute_dict["name"] != "Dock":
+            error_code, app_wins_ref = ApplicationServices.AXUIElementCopyAttributeValue(
+                app_ref, "AXWindows", None)
+            if error_code:
+                logger.error("MacOS parsing %s encountered Error code: %d", app_ref, error_code)
+        else:
+            app_wins_ref = [app_ref]
+        node = app_wins_ref[0]
+
+    error_code, attr_names = ApplicationServices.AXUIElementCopyAttributeNames(node, None)
+
+    if error_code:
+        # -25202: AXError.invalidUIElement
+        #         The accessibility object received in this event is invalid.
+        return
+
+    value = None
+
+    if "AXFrame" in attr_names:
+        error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, "AXFrame", None)
+        rep = repr(attr_val)
+        x_value = re.search(r"x:(-?[\d.]+)", rep)
+        y_value = re.search(r"y:(-?[\d.]+)", rep)
+        w_value = re.search(r"w:(-?[\d.]+)", rep)
+        h_value = re.search(r"h:(-?[\d.]+)", rep)
+        type_value = re.search(r"type\s?=\s?(\w+)", rep)
+        value = {
+            "x": float(x_value.group(1)) if x_value else None,
+            "y": float(y_value.group(1)) if y_value else None,
+            "w": float(w_value.group(1)) if w_value else None,
+            "h": float(h_value.group(1)) if h_value else None,
+            "type": type_value.group(1) if type_value else None,
+        }
+
+        if not any(v is None for v in value.values()):
+            x_min = max(bbox[0], value["x"])
+            x_max = min(bbox[2], value["x"] + value["w"])
+            y_min = max(bbox[1], value["y"])
+            y_max = min(bbox[3], value["y"] + value["h"])
+
+            if x_min > x_max or y_min > y_max:
+                # No intersection
+                return
+
+    role = None
+    text = None
+
+    for attr_name, ns_key in reserved_keys.items():
+        if attr_name not in attr_names:
+            continue
+
+        if value and attr_name == "AXFrame":
+            bb = value
+            if not any(v is None for v in bb.values()):
+                attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_macos["cp"])] = \
+                    "({:d}, {:d})".format(int(bb["x"]), int(bb["y"]))
+                attribute_dict["{{{:}}}size".format(_accessibility_ns_map_macos["cp"])] = \
+                    "({:d}, {:d})".format(int(bb["w"]), int(bb["h"]))
+            continue
+
+        error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None)
+
+        full_attr_name = f"{{{_accessibility_ns_map_macos[ns_key]}}}{attr_name}"
+
+        if attr_name == "AXValue" and not text:
+            text = str(attr_val)
+            continue
+
+        if attr_name == "AXRoleDescription":
+            role = attr_val
+            continue
+
+        # Set the attribute_dict
+        if not (isinstance(attr_val, ApplicationServices.AXUIElementRef)
+                or isinstance(attr_val, (AppKit.NSArray, list))):
+            if attr_val is not None:
+                attribute_dict[full_attr_name] = str(attr_val)
+
+    node_role_name = role.lower().replace(" ", "_") if role else "unknown_role"
+
+    xml_node = lxml.etree.Element(
+        node_role_name,
+        attrib=attribute_dict,
+        nsmap=_accessibility_ns_map_macos
+    )
+
+    if text is not None and len(text) > 0:
+        xml_node.text = text
+
+    if depth == MAX_DEPTH:
+        logger.warning("Max depth reached")
+        return xml_node
+
+    future_to_child = []
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        for attr_name, ns_key in reserved_keys.items():
+            if attr_name not in attr_names:
+                continue
+
+            error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None)
+            if isinstance(attr_val, ApplicationServices.AXUIElementRef):
+                future_to_child.append(executor.submit(_create_axui_node, attr_val, nodes, depth + 1, bbox))
+
+            elif isinstance(attr_val, (AppKit.NSArray, list)):
+                for child in attr_val:
+                    future_to_child.append(executor.submit(_create_axui_node, child, nodes, depth + 1, bbox))
+
+        try:
+            for future in concurrent.futures.as_completed(future_to_child):
+                result = future.result()
+                if result is not None:
+                    xml_node.append(result)
+        except Exception as e:
+            logger.error(f"Exception occurred: {e}")
+
+    return xml_node
+
+
+@app.route("/accessibility", methods=["GET"])
+def get_accessibility_tree():
+    os_name: str = platform.system()
+
+    # AT-SPI works for KDE as well
+    if os_name == "Linux":
+        global libreoffice_version_tuple
+        libreoffice_version_tuple = _get_libreoffice_version()
+
+        desktop: Accessible = pyatspi.Registry.getDesktop(0)
+        xml_node = lxml.etree.Element("desktop-frame", nsmap=_accessibility_ns_map_ubuntu)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [executor.submit(_create_atspi_node, app_node, 1) for app_node in desktop]
+            for future in concurrent.futures.as_completed(futures):
+                xml_tree = future.result()
+                xml_node.append(xml_tree)
+        return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")})
+
+    elif os_name == "Windows":
+        # Attention: Windows a11y tree is implemented to be read through `pywinauto` module, however,
+        # two different backends `win32` and `uia` are supported and different results may be returned
+        desktop: Desktop = Desktop(backend="uia")
+        xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_windows)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [executor.submit(_create_pywinauto_node, wnd, {}, 1) for wnd in desktop.windows()]
+            for future in concurrent.futures.as_completed(futures):
+                xml_tree = future.result()
+                xml_node.append(xml_tree)
+        return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")})
+
+    elif os_name == "Darwin":
+        # TODO: Add Dock and MenuBar
+        xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_macos)
+
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            foreground_windows = [
+                win for win in Quartz.CGWindowListCopyWindowInfo(
+                    (Quartz.kCGWindowListExcludeDesktopElements |
+                     Quartz.kCGWindowListOptionOnScreenOnly),
+                    Quartz.kCGNullWindowID
+                ) if win["kCGWindowLayer"] == 0 and win["kCGWindowOwnerName"] != "Window Server"
+            ]
+            dock_info = [
+                win for win in Quartz.CGWindowListCopyWindowInfo(
+                    Quartz.kCGWindowListOptionAll,
+                    Quartz.kCGNullWindowID
+                ) if win.get("kCGWindowName", None) == "Dock"
+            ]
+
+            futures = [
+                executor.submit(_create_axui_node, wnd, None, 0)
+                for wnd in foreground_windows + dock_info
+            ]
+
+            for future in concurrent.futures.as_completed(futures):
+                xml_tree = future.result()
+                if xml_tree is not None:
+                    xml_node.append(xml_tree)
+
+        return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")})
+
+    else:
+        return "Currently not implemented for platform {:}.".format(platform.platform()), 500
+
+
+@app.route('/screen_size', methods=['POST'])
+def get_screen_size():
+    if platform_name == "Linux":
+        d = display.Display()
+        screen_width = d.screen().width_in_pixels
+        screen_height = d.screen().height_in_pixels
+    elif platform_name == "Windows":
+        user32 = ctypes.windll.user32
+        screen_width: int = user32.GetSystemMetrics(0)
+        screen_height: int = user32.GetSystemMetrics(1)
+    return jsonify(
+        {
+            "width": screen_width,
+            "height": screen_height
+        }
+    )
+
+
+@app.route('/window_size', methods=['POST'])
+def get_window_size():
+    if 'app_class_name' in request.form:
+        app_class_name = request.form['app_class_name']
+    else:
+        return jsonify({"error": "app_class_name is required"}), 400
+
+    if platform_name != "Linux":
+        return jsonify({"error": "window_size is only supported on Linux"}), 501
+
+    d = display.Display()
+    root = d.screen().root
+    window_ids = root.get_full_property(d.intern_atom('_NET_CLIENT_LIST'), X.AnyPropertyType).value
+
+    for window_id in window_ids:
+        try:
+            window = d.create_resource_object('window', window_id)
+            wm_class = window.get_wm_class()
+
+            if wm_class is None:
+                continue
+
+            if app_class_name.lower() in [name.lower() for name in wm_class]:
+                geom = window.get_geometry()
+                return jsonify(
+                    {
+                        "width": geom.width,
+                        "height": geom.height
+                    }
+                )
+        except Xlib.error.XError:  # Ignore windows that give an error
+            continue
+    return None
+
+
+@app.route('/desktop_path', methods=['POST'])
+def get_desktop_path():
+    # Get the home directory in a platform-independent manner using pathlib
+    home_directory = str(Path.home())
+
+    # Determine the desktop path based on the operating system
+    desktop_path = {
+        "Windows": os.path.join(home_directory, "Desktop"),
+        "Darwin": os.path.join(home_directory, "Desktop"),  # macOS
+        "Linux": os.path.join(home_directory, "Desktop")
+    }.get(platform.system(), None)
+
+    # Check if the operating system is supported and the desktop path exists
+    if desktop_path and os.path.exists(desktop_path):
+        return jsonify(desktop_path=desktop_path)
+    else:
+        return jsonify(error="Unsupported operating system or desktop path not found"), 404
+
+
+@app.route('/wallpaper', methods=['POST'])
+def get_wallpaper():
+    def get_wallpaper_windows():
+        SPI_GETDESKWALLPAPER = 0x73
+        MAX_PATH = 260
+        buffer = ctypes.create_unicode_buffer(MAX_PATH)
+        ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
+        return buffer.value
+
+    def get_wallpaper_macos():
+        script = """
+        tell application "System Events" to tell every desktop to get picture
+        """
+        process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        output, error = process.communicate()
+        if error:
+            app.logger.error("Error: %s", error.decode('utf-8'))
+            return None
+        return output.strip().decode('utf-8')
+
+    def get_wallpaper_linux():
+        try:
+            output = subprocess.check_output(
+                ["gsettings", "get", "org.gnome.desktop.background", "picture-uri"],
+                stderr=subprocess.PIPE
+            )
+            return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
+        except subprocess.CalledProcessError as e:
+            app.logger.error("Error: %s", e)
+            return None
+
+    os_name = platform.system()
+    wallpaper_path = None
+    if os_name == 'Windows':
+        wallpaper_path = get_wallpaper_windows()
+    elif os_name == 'Darwin':
+        wallpaper_path = get_wallpaper_macos()
+    elif os_name == 'Linux':
+        wallpaper_path = get_wallpaper_linux()
+    else:
+        app.logger.error(f"Unsupported OS: {os_name}")
+        abort(400, description="Unsupported OS")
+
+    if wallpaper_path:
+        try:
+            # Ensure the filename is secure
+            return send_file(wallpaper_path, mimetype='image/png')
+        except Exception as e:
+            app.logger.error(f"An error occurred while serving the wallpaper file: {e}")
+            abort(500, description="Unable to serve the wallpaper file")
+    else:
+        abort(404, description="Wallpaper file not found")
+
+
+@app.route('/list_directory', methods=['POST'])
+def get_directory_tree():
+    def _list_dir_contents(directory):
+        """
+        List the contents of a directory recursively, building a tree structure.
+
+        :param directory: The path of the directory to inspect.
+        :return: A nested dictionary with the contents of the directory.
+        """
+        tree = {'type': 'directory', 'name': os.path.basename(directory), 'children': []}
+        try:
+            # List all files and directories in the current directory
+            for entry in os.listdir(directory):
+                full_path = os.path.join(directory, entry)
+                # If entry is a directory, recurse into it
+                if os.path.isdir(full_path):
+                    tree['children'].append(_list_dir_contents(full_path))
+                else:
+                    tree['children'].append({'type': 'file', 'name': entry})
+        except OSError as e:
+            # If the directory cannot be accessed, return the exception message
+            tree = {'error': str(e)}
+        return tree
+
+    # Extract the 'path' parameter from the JSON request
+    data = request.get_json()
+    if 'path' not in data:
+        return jsonify(error="Missing 'path' parameter"), 400
+
+    start_path = data['path']
+    # Ensure the provided path is a directory
+    if not os.path.isdir(start_path):
+        return jsonify(error="The provided path is not a directory"), 400
+
+    # Generate the directory tree starting from the provided path
+    directory_tree = _list_dir_contents(start_path)
+    return jsonify(directory_tree=directory_tree)
+
+
+@app.route('/file', methods=['POST'])
+def get_file():
+    # Retrieve filename from the POST request
+    if 'file_path' in request.form:
+        file_path = os.path.expandvars(os.path.expanduser(request.form['file_path']))
+    else:
+        return jsonify({"error": "file_path is required"}), 400
+
+    try:
+        # Check if the file exists and get its size
+        if not os.path.exists(file_path):
+            return jsonify({"error": "File not found"}), 404
+
+        file_size = os.path.getsize(file_path)
+        logger.info(f"Serving file: {file_path} ({file_size} bytes)")
+
+        # Check if the file exists and send it to the user
+        return send_file(file_path, as_attachment=True)
+    except FileNotFoundError:
+        # If the file is not found, return a 404 error
+        return jsonify({"error": "File not found"}), 404
+    except Exception as e:
+        logger.error(f"Error serving file {file_path}: {e}")
+        return jsonify({"error": f"Failed to serve file: {str(e)}"}), 500
+
+
+@app.route("/setup/upload", methods=["POST"])
+def upload_file():
+    # Retrieve filename from the POST request
+    if 'file_path' in request.form and 'file_data' in request.files:
+        file_path = os.path.expandvars(os.path.expanduser(request.form['file_path']))
+        file = request.files["file_data"]
+
+        try:
+            # Ensure target directory exists
+            target_dir = os.path.dirname(file_path)
+            if target_dir:  # Only create directory if it's not empty
+                os.makedirs(target_dir, exist_ok=True)
+
+            # Save file and get size for verification
+            file.save(file_path)
+            uploaded_size = os.path.getsize(file_path)
+
+            logger.info(f"File uploaded successfully: {file_path} ({uploaded_size} bytes)")
+            return f"File Uploaded: {uploaded_size} bytes"
+
+        except Exception as e:
+            logger.error(f"Error uploading file to {file_path}: {e}")
+            # Clean up partial file if it exists
+            if os.path.exists(file_path):
+                try:
+                    os.remove(file_path)
+                except:
+                    pass
+            return jsonify({"error": f"Failed to upload file: {str(e)}"}), 500
+    else:
+        return jsonify({"error": "file_path and file_data are required"}), 400
+
+
+@app.route('/platform', methods=['GET'])
+def get_platform():
+    return platform.system()
+
+
+@app.route('/cursor_position', methods=['GET'])
+def get_cursor_position():
+    pos = pyautogui.position()
+    return jsonify(pos.x, pos.y)
+
+@app.route("/setup/change_wallpaper", methods=['POST'])
+def change_wallpaper():
+    data = request.json
+    path = data.get('path', None)
+
+    if not path:
+        return "Path not supplied!", 400
+
+    path = Path(os.path.expandvars(os.path.expanduser(path)))
+
+    if not path.exists():
+        return f"File not found: {path}", 404
+
+    try:
+        user_platform = platform.system()
+        if user_platform == "Windows":
+            import ctypes
+            ctypes.windll.user32.SystemParametersInfoW(20, 0, str(path), 3)
+        elif user_platform == "Linux":
+            import subprocess
+            subprocess.run(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", f"file://{path}"])
+        elif user_platform == "Darwin":  # (Mac OS)
+            import subprocess
+            subprocess.run(
+                ["osascript", "-e", f'tell application "Finder" to set desktop picture to POSIX file "{path}"'])
+        return "Wallpaper changed successfully"
+    except Exception as e:
+        return f"Failed to change wallpaper. Error: {e}", 500
+
+
+@app.route("/setup/download_file", methods=['POST'])
+def download_file():
+    data = request.json
+    url = data.get('url', None)
+    path = data.get('path', None)
+
+    if not url or not path:
+        return "Path or URL not supplied!", 400
+
+    path = Path(os.path.expandvars(os.path.expanduser(path)))
+    path.parent.mkdir(parents=True, exist_ok=True)
+
+    max_retries = 3
+    error: Optional[Exception] = None
+
+    for i in range(max_retries):
+        try:
+            logger.info(f"Download attempt {i+1}/{max_retries} for {url}")
+            response = requests.get(url, stream=True, timeout=300)
+            response.raise_for_status()
+
+            # Get expected file size if available
+            total_size = int(response.headers.get('content-length', 0))
+            if total_size > 0:
+                logger.info(f"Expected file size: {total_size / (1024*1024):.2f} MB")
+
+            downloaded_size = 0
+            with open(path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded_size += len(chunk)
+                        if total_size > 0 and downloaded_size % (1024*1024) == 0:  # Log every MB
+                            progress = (downloaded_size / total_size) * 100
+                            logger.info(f"Download progress: {progress:.1f}%")
+
+            # Verify download completeness
+            actual_size = os.path.getsize(path)
+            if total_size > 0 and actual_size != total_size:
+                raise Exception(f"Download incomplete. Expected {total_size} bytes, got {actual_size} bytes")
+
+            logger.info(f"File downloaded successfully: {path} ({actual_size} bytes)")
+            return f"File downloaded successfully: {actual_size} bytes"
+
+        except (requests.RequestException, Exception) as e:
+            error = e
+            logger.error(f"Failed to download {url}: {e}. Retrying... ({max_retries - i - 1} attempts left)")
+            # Clean up partial download
+            if path.exists():
+                try:
+                    path.unlink()
+                except:
+                    pass
+
+    return f"Failed to download {url}. No retries left. Error: {error}", 500
+
+
+@app.route("/setup/open_file", methods=['POST'])
+def open_file():
+    data = request.json
+    path = data.get('path', None)
+
+    if not path:
+        return "Path not supplied!", 400
+
+    path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
+
+    # Check if it's a file path that exists
+    is_file_path = path_obj.exists()
+
+    # If it's not a file path, treat it as an application name/command
+    if not is_file_path:
+        # Check if it's a valid command by trying to find it in PATH
+        import shutil
+        if not shutil.which(path):
+            return f"Application/file not found: {path}", 404
+
+    try:
+        if is_file_path:
+            # Handle file opening
+            if platform.system() == "Windows":
+                os.startfile(path_obj)
+            else:
+                open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
+                subprocess.Popen([open_cmd, str(path_obj)])
+            file_name = path_obj.name
+            file_name_without_ext, _ = os.path.splitext(file_name)
+        else:
+            # Handle application launching
+            if platform.system() == "Windows":
+                subprocess.Popen([path])
+            else:
+                subprocess.Popen([path])
+            file_name = path
+            file_name_without_ext = path
+
+        # Wait for the file/application to open
+
+        start_time = time.time()
+        window_found = False
+
+        while time.time() - start_time < TIMEOUT:
+            os_name = platform.system()
+            if os_name in ['Windows', 'Darwin']:
+                import pygetwindow as gw
+                # Check for window title containing file name or file name without extension
+                windows = gw.getWindowsWithTitle(file_name)
+                if not windows:
+                    windows = gw.getWindowsWithTitle(file_name_without_ext)
+
+                if windows:
+                    # To be more specific, we can try to activate it
+                    windows[0].activate()
+                    window_found = True
+                    break
+            elif os_name == 'Linux':
+                try:
+                    # Using wmctrl to list windows and check if any window title contains the filename
+                    result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True)
+                    window_list = result.stdout.strip().split('\n')
+                    if not result.stdout.strip():
+                        pass  # No windows, just continue waiting
+                    else:
+                        for window in window_list:
+                            if file_name in window or file_name_without_ext in window:
+                                # a window is found, now activate it
+                                window_id = window.split()[0]
+                                subprocess.run(['wmctrl', '-i', '-a', window_id], check=True)
+                                window_found = True
+                                break
+                        if window_found:
+                            break
+                except (subprocess.CalledProcessError, FileNotFoundError):
+                    # wmctrl might not be installed or the window manager isn't ready.
+                    # We just log it once and let the main loop retry.
+                    if 'wmctrl_failed_once' not in locals():
+                        logger.warning("wmctrl command is not ready, will keep retrying...")
+                        wmctrl_failed_once = True
+                    pass  # Let the outer loop retry
+
+            time.sleep(1)
+
+        if window_found:
+            return "File opened and window activated successfully"
+        else:
+            return f"Failed to find window for {file_name} within {TIMEOUT} seconds.", 500
+
+    except Exception as e:
+        return f"Failed to open {path}. Error: {e}", 500
+
+
+@app.route("/setup/activate_window", methods=['POST'])
+def activate_window():
+    data = request.json
+    window_name = data.get('window_name', None)
+    if not window_name:
+        return "window_name required", 400
+    strict: bool = data.get("strict", False)  # compare case-sensitively and match the whole string
+    by_class_name: bool = data.get("by_class", False)
+
+    os_name = platform.system()
+
+    if os_name == 'Windows':
+        import pygetwindow as gw
+        if by_class_name:
+            return "Get window by class name is not supported on Windows currently.", 500
+        windows: List[gw.Window] = gw.getWindowsWithTitle(window_name)
+
+        window: Optional[gw.Window] = None
+        if len(windows) == 0:
+            return "Window {:} not found (empty results)".format(window_name), 404
+        elif strict:
+            for wnd in windows:
+                if wnd.title == wnd:
+                    window = wnd
+            if window is None:
+                return "Window {:} not found (strict mode).".format(window_name), 404
+        else:
+            window = windows[0]
+        window.activate()
+
+    elif os_name == 'Darwin':
+        import pygetwindow as gw
+        if by_class_name:
+            return "Get window by class name is not supported on macOS currently.", 500
+        # Find the VS Code window
+        windows = gw.getWindowsWithTitle(window_name)
+
+        window: Optional[gw.Window] = None
+        if len(windows) == 0:
+            return "Window {:} not found (empty results)".format(window_name), 404
+        elif strict:
+            for wnd in windows:
+                if wnd.title == wnd:
+                    window = wnd
+            if window is None:
+                return "Window {:} not found (strict mode).".format(window_name), 404
+        else:
+            window = windows[0]
+
+        # Un-minimize the window and then bring it to the front
+        window.unminimize()
+        window.activate()
+
+    elif os_name == 'Linux':
+        # Attempt to activate VS Code window using wmctrl
+        subprocess.run(["wmctrl"
+                           , "-{:}{:}a".format("x" if by_class_name else ""
+                                               , "F" if strict else ""
+                                               )
+                           , window_name
+                        ]
+                       )
+
+    else:
+        return f"Operating system {os_name} not supported.", 400
+
+    return "Window activated successfully", 200
+
+
+@app.route("/setup/close_window", methods=["POST"])
+def close_window():
+    data = request.json
+    if "window_name" not in data:
+        return "window_name required", 400
+    window_name: str = data["window_name"]
+    strict: bool = data.get("strict", False)  # compare case-sensitively and match the whole string
+    by_class_name: bool = data.get("by_class", False)
+
+    os_name: str = platform.system()
+    if os_name == "Windows":
+        import pygetwindow as gw
+
+        if by_class_name:
+            return "Get window by class name is not supported on Windows currently.", 500
+        windows: List[gw.Window] = gw.getWindowsWithTitle(window_name)
+
+        window: Optional[gw.Window] = None
+        if len(windows) == 0:
+            return "Window {:} not found (empty results)".format(window_name), 404
+        elif strict:
+            for wnd in windows:
+                if wnd.title == wnd:
+                    window = wnd
+            if window is None:
+                return "Window {:} not found (strict mode).".format(window_name), 404
+        else:
+            window = windows[0]
+        window.close()
+    elif os_name == "Linux":
+        subprocess.run(["wmctrl"
+                           , "-{:}{:}c".format("x" if by_class_name else ""
+                                               , "F" if strict else ""
+                                               )
+                           , window_name
+                        ]
+                       )
+    elif os_name == "Darwin":
+        import pygetwindow as gw
+        return "Currently not supported on macOS.", 500
+    else:
+        return "Not supported platform {:}".format(os_name), 500
+
+    return "Window closed successfully.", 200
+
+
+@app.route('/start_recording', methods=['POST'])
+def start_recording():
+    global recording_process
+    if recording_process and recording_process.poll() is None:
+        return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400
+
+    # Clean up previous recording if it exists
+    if os.path.exists(recording_path):
+        try:
+            os.remove(recording_path)
+        except OSError as e:
+            logger.error(f"Error removing old recording file: {e}")
+            return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500
+
+    if platform_name == "Linux":
+        d = display.Display()
+        screen_width = d.screen().width_in_pixels
+        screen_height = d.screen().height_in_pixels
+        start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
+    elif platform_name == "Windows":
+        user32 = ctypes.windll.user32
+        screen_width = user32.GetSystemMetrics(0)
+        screen_height = user32.GetSystemMetrics(1)
+        # Use gdigrab for Windows screen capture
+        start_command = f"ffmpeg -y -f gdigrab -draw_mouse 1 -framerate 30 -video_size {screen_width}x{screen_height} -i desktop -c:v libx264 -r 30 {recording_path}"
+    else:
+        return jsonify({'status': 'error', 'message': f'Recording not supported on {platform_name}'}), 501
+
+    # Use stderr=PIPE to capture potential errors from ffmpeg
+    recording_process = subprocess.Popen(shlex.split(start_command),
+                                         stdout=subprocess.DEVNULL,
+                                         stderr=subprocess.PIPE,
+                                         text=True  # To get stderr as string
+                                         )
+
+    # Wait a couple of seconds to see if ffmpeg starts successfully
+    try:
+        # Wait for 2 seconds. If ffmpeg exits within this time, it's an error.
+        recording_process.wait(timeout=2)
+        # If wait() returns, it means the process has terminated.
+        error_output = recording_process.stderr.read()
+        return jsonify({
+            'status': 'error',
+            'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}'
+        }), 500
+    except subprocess.TimeoutExpired:
+        # This is the expected outcome: the process is still running after 2 seconds.
+        return jsonify({'status': 'success', 'message': 'Started recording successfully.'})
+
+
+@app.route('/end_recording', methods=['POST'])
+def end_recording():
+    global recording_process
+
+    if not recording_process or recording_process.poll() is not None:
+        recording_process = None  # Clean up stale process object
+        return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
+
+    error_output = ""
+    try:
+        # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
+        # On Windows, use CTRL_C_EVENT; on Unix, use SIGINT
+        if platform_name == "Windows":
+            # On Windows, we need to terminate the process gracefully
+            # ffmpeg responds to standard input 'q' to quit gracefully
+            try:
+                recording_process.stdin.write(b'q')
+                recording_process.stdin.flush()
+            except:
+                # If stdin is not available, use terminate
+                recording_process.terminate()
+        else:
+            recording_process.send_signal(signal.SIGINT)
+        # Wait for ffmpeg to terminate. communicate() gets output and waits.
+        _, error_output = recording_process.communicate(timeout=15)
+    except subprocess.TimeoutExpired:
+        logger.error("ffmpeg did not respond to stop signal, killing the process.")
+        recording_process.kill()
+        # After killing, communicate to get any remaining output.
+        _, error_output = recording_process.communicate()
+        recording_process = None
+        return jsonify({
+            'status': 'error',
+            'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
+        }), 500
+
+    recording_process = None  # Clear the process from global state
+
+    # Check if the recording file was created and is not empty.
+    if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
+        return send_file(recording_path, as_attachment=True)
+    else:
+        logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
+        return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
+
+
+@app.route("/run_python", methods=['POST'])
+def run_python():
+    data = request.json
+    code = data.get('code', None)
+
+    if not code:
+        return jsonify({'status': 'error', 'message': 'Code not supplied!'}), 400
+
+    # Create a temporary file to save the Python code
+    import tempfile
+    import uuid
+
+    # Generate unique filename
+    temp_filename = f"/tmp/python_exec_{uuid.uuid4().hex}.py"
+
+    try:
+        # Write code to temporary file
+        with open(temp_filename, 'w') as f:
+            f.write(code)
+
+        # Execute the file using subprocess to capture all output
+        # Use sys.executable to use the same Python interpreter as the Flask server
+        result = subprocess.run(
+            [sys.executable, temp_filename],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            timeout=30  # 30 second timeout
+        )
+
+        # Clean up the temporary file
+        try:
+            os.remove(temp_filename)
+        except:
+            pass  # Ignore cleanup errors
+
+        # Prepare response
+        output = result.stdout
+        error_output = result.stderr
+
+        # Combine output and errors if both exist
+        combined_message = output
+        if error_output:
+            combined_message += ('\n' + error_output) if output else error_output
+
+        # Determine status based on return code and errors
+        if result.returncode != 0:
+            status = 'error'
+            if not error_output:
+                # If no stderr but non-zero return code, add a generic error message
+                error_output = f"Process exited with code {result.returncode}"
+                combined_message = combined_message + '\n' + error_output if combined_message else error_output
+        else:
+            status = 'success'
+
+        return jsonify({
+            'status': status,
+            'message': combined_message,
+            'need_more': False,      # Not applicable for file execution
+            'output': output,        # stdout only
+            'error': error_output,   # stderr only
+            'return_code': result.returncode
+        })
+
+    except subprocess.TimeoutExpired:
+        # Clean up the temporary file on timeout
+        try:
+            os.remove(temp_filename)
+        except:
+            pass
+
+        return jsonify({
+            'status': 'error',
+            'message': 'Execution timeout: Code took too long to execute',
+            'error': 'TimeoutExpired',
+            'need_more': False,
+            'output': None,
+        }), 500
+
+    except Exception as e:
+        # Clean up the temporary file on error
+        try:
+            os.remove(temp_filename)
+        except:
+            pass
+
+        # Capture the exception details
+        return jsonify({
+            'status': 'error',
+            'message': f'Execution error: {str(e)}',
+            'error': traceback.format_exc(),
+            'need_more': False,
+            'output': None,
+        }), 500
+
+
+@app.route("/run_bash_script", methods=['POST'])
+def run_bash_script():
+    data = request.json
+    script = data.get('script', None)
+    timeout = data.get('timeout', 100)  # Default timeout of 30 seconds
+    working_dir = data.get('working_dir', None)
+
+    if not script:
+        return jsonify({
+            'status': 'error',
+            'output': 'Script not supplied!',
+            'error': "",  # Always empty as requested
+            'returncode': -1
+        }), 400
+
+    # Expand user directory if provided
+    if working_dir:
+        working_dir = os.path.expanduser(working_dir)
+        if not os.path.exists(working_dir):
+            return jsonify({
+                'status': 'error',
+                'output': f'Working directory does not exist: {working_dir}',
+                'error': "",  # Always empty as requested
+                'returncode': -1
+            }), 400
+
+    # Create a temporary script file
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as tmp_file:
+        if "#!/bin/bash" not in script:
+            script = "#!/bin/bash\n\n" + script
+        tmp_file.write(script)
+        tmp_file_path = tmp_file.name
+
+    try:
+        # Make the script executable
+        os.chmod(tmp_file_path, 0o755)
+
+        # Execute the script
+        if platform_name == "Windows":
+            # On Windows, use Git Bash or WSL if available, otherwise cmd
+            flags = subprocess.CREATE_NO_WINDOW
+            # Try to use bash if available (Git Bash, WSL, etc.)
+            result = subprocess.run(
+                ['bash', tmp_file_path],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,  # Merge stderr into stdout
+                text=True,
+                timeout=timeout,
+                cwd=working_dir,
+                creationflags=flags,
+                shell=False
+            )
+        else:
+            # On Unix-like systems, use bash directly
+            flags = 0
+            result = subprocess.run(
+                ['/bin/bash', tmp_file_path],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,  # Merge stderr into stdout
+                text=True,
+                timeout=timeout,
+                cwd=working_dir,
+                creationflags=flags,
+                shell=False
+            )
+
+        # Log the command execution for trajectory recording
+        _append_event("BashScript",
+                      {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode},
+                      ts=time.time())
+
+        return jsonify({
+            'status': 'success' if result.returncode == 0 else 'error',
+            'output': result.stdout,  # Contains both stdout and stderr merged
+            'error': "",  # Always empty as requested
+            'returncode': result.returncode
+        })
+
+    except subprocess.TimeoutExpired:
+        return jsonify({
+            'status': 'error',
+            'output': f'Script execution timed out after {timeout} seconds',
+            'error': "",  # Always empty as requested
+            'returncode': -1
+        }), 500
+    except FileNotFoundError:
+        # Bash not found, try with sh
+        try:
+            result = subprocess.run(
+                ['sh', tmp_file_path],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,  # Merge stderr into stdout
+                text=True,
+                timeout=timeout,
+                cwd=working_dir,
+                shell=False
+            )
+
+            _append_event("BashScript",
+                          {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode},
+                          ts=time.time())
+
+            return jsonify({
+                'status': 'success' if result.returncode == 0 else 'error',
+                'output': result.stdout,  # Contains both stdout and stderr merged
+                'error': "",  # Always empty as requested
+                'returncode': result.returncode,
+            })
+        except Exception as e:
+            return jsonify({
+                'status': 'error',
+                'output': f'Failed to execute script: {str(e)}',
+                'error': "",  # Always empty as requested
+                'returncode': -1
+            }), 500
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'output': f'Failed to execute script: {str(e)}',
+            'error': "",  # Always empty as requested
+            'returncode': -1
+        }), 500
+    finally:
+        # Clean up the temporary file
+        try:
+            os.unlink(tmp_file_path)
+        except:
+            pass
+
+if __name__ == '__main__':
+    app.run(debug=True, host="0.0.0.0")
diff --git a/desktop_env/server_win7/start_flask.bat b/desktop_env/server_win7/start_flask.bat
new file mode 100644
index 0000000..14f0fff
--- /dev/null
+++ b/desktop_env/server_win7/start_flask.bat
@@ -0,0 +1,3 @@
+@echo off
+cd /d D:\python_server
+start /B pythonw D:\python_server\main.py
diff --git a/desktop_env/server_win7/transfer_to_win7.sh b/desktop_env/server_win7/transfer_to_win7.sh
new file mode 100755
index 0000000..8612422
--- /dev/null
+++ b/desktop_env/server_win7/transfer_to_win7.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# Mac 端传输脚本 —— 将 Win7 离线包通过 HTTP 共享，Win7 用浏览器或 PowerShell 下载
+# 用法: bash transfer_to_win7.sh
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PKG_DIR="$SCRIPT_DIR/win7_offline_packages"
+WIN7_IP="192.168.1.11"
+MAC_IP="192.168.1.10"
+PORT=8888
+
+echo "========================================"
+echo " Mac → Win7 离线包传输工具"
+echo " Mac: $MAC_IP    Win7: $WIN7_IP"
+echo "========================================"
+echo ""
+
+# 检查连通性
+echo "[1/3] 检测 Win7 网络连通性..."
+if ping -c 2 -W 1000 "$WIN7_IP" > /dev/null 2>&1; then
+    echo "  [OK] Win7 ($WIN7_IP) 可达"
+else
+    echo "  [警告] ping 超时，请确认网线已连接且 IP 配置正确"
+fi
+
+# 打包
+echo ""
+echo "[2/3] 打包离线文件..."
+cd "$SCRIPT_DIR"
+zip -r win7_server_packages.zip win7_offline_packages/ 2>/dev/null
+echo "  [OK] 已打包 → $(pwd)/win7_server_packages.zip"
+echo "  [OK] 解压脚本 → $(pwd)/extract_and_install.bat"
+
+# 启动 HTTP 服务器
+echo ""
+echo "[3/3] 启动文件共享服务器..."
+echo ""
+echo "  ┌──────────────────────────────────────────────────────────┐"
+echo "  │  在 Win7 浏览器（IE）中打开：                            │"
+echo "  │  http://$MAC_IP:$PORT/                                    │"
+echo "  │                                                           │"
+echo "  │  ★ 第一步：下载 extract_and_install.bat（小文件，先下）  │"
+echo "  │  ★ 第二步：下载 win7_server_packages.zip（大文件）       │"
+echo "  │  ★ 第三步：把两个文件放同一目录，双击 bat 文件           │"
+echo "  │            (会自动解压 + 自动安装所有依赖)               │"
+echo "  │                                                           │"
+echo "  │  如果无法双击 bat → 在命令提示符(cmd)中运行：            │"
+echo "  │  cd 下载目录路径                                          │"
+echo "  │  extract_and_install.bat                                 │"
+echo "  └──────────────────────────────────────────────────────────┘"
+echo ""
+echo "  按 Ctrl+C 停止服务器"
+echo ""
+
+# 在 zip 所在目录启动 HTTP 服务
+cd "$SCRIPT_DIR"
+python3 -m http.server $PORT
diff --git a/desktop_env/server_win7/win7_offline_packages/MarkupSafe-2.1.5-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/MarkupSafe-2.1.5-cp38-cp38-win32.whl
new file mode 100644
index 0000000..3b25326
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/MarkupSafe-2.1.5-cp38-cp38-win32.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/MouseInfo-0.1.3.tar.gz b/desktop_env/server_win7/win7_offline_packages/MouseInfo-0.1.3.tar.gz
new file mode 100644
index 0000000..b71965f
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/MouseInfo-0.1.3.tar.gz differ
diff --git a/desktop_env/server_win7/win7_offline_packages/PyAutoGUI-0.9.54.tar.gz b/desktop_env/server_win7/win7_offline_packages/PyAutoGUI-0.9.54.tar.gz
new file mode 100644
index 0000000..a79e0d1
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/PyAutoGUI-0.9.54.tar.gz differ
diff --git a/desktop_env/server_win7/win7_offline_packages/PyGetWindow-0.0.9.tar.gz b/desktop_env/server_win7/win7_offline_packages/PyGetWindow-0.0.9.tar.gz
new file mode 100644
index 0000000..085b37e
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/PyGetWindow-0.0.9.tar.gz differ
diff --git a/desktop_env/server_win7/win7_offline_packages/blinker-1.8.2-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/blinker-1.8.2-py3-none-any.whl
new file mode 100644
index 0000000..f8cb040
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/blinker-1.8.2-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/certifi-2026.2.25-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/certifi-2026.2.25-py3-none-any.whl
new file mode 100644
index 0000000..9d7fbf0
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/certifi-2026.2.25-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/charset_normalizer-3.4.5-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/charset_normalizer-3.4.5-cp38-cp38-win32.whl
new file mode 100644
index 0000000..6b1c062
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/charset_normalizer-3.4.5-cp38-cp38-win32.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/click-8.1.8-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/click-8.1.8-py3-none-any.whl
new file mode 100644
index 0000000..db2c6b3
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/click-8.1.8-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/flask-3.0.3-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/flask-3.0.3-py3-none-any.whl
new file mode 100644
index 0000000..cc25189
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/flask-3.0.3-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/idna-3.11-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/idna-3.11-py3-none-any.whl
new file mode 100644
index 0000000..28f2c10
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/idna-3.11-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/importlib_metadata-8.5.0-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/importlib_metadata-8.5.0-py3-none-any.whl
new file mode 100644
index 0000000..7f0acc0
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/importlib_metadata-8.5.0-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/install_win7.bat b/desktop_env/server_win7/win7_offline_packages/install_win7.bat
new file mode 100644
index 0000000..3cca161
--- /dev/null
+++ b/desktop_env/server_win7/win7_offline_packages/install_win7.bat
@@ -0,0 +1,34 @@
+@echo off
+echo ========================================
+echo  Offline Install for Win7 32bit Py3.8
+echo ========================================
+echo.
+
+python --version 2>&1 | findstr /C:"3.8"
+if errorlevel 1 (
+    echo [ERROR] Python 3.8 not found.
+    pause
+    exit /b 1
+)
+echo [OK] Python 3.8 detected.
+
+echo.
+echo [Step 1] Installing build tools first...
+cd /d "%~dp0"
+python -m pip install --no-index --find-links . setuptools wheel
+echo [Step 2] Installing all packages...
+python -m pip install --no-index --find-links . six zipp importlib_metadata certifi idna urllib3 charset_normalizer requests MarkupSafe itsdangerous click blinker werkzeug jinja2 flask pywin32 numpy Pillow lxml pygame pymsgbox pytweening pyscreeze PyGetWindow MouseInfo PyAutoGUI pynput pywinauto
+
+echo.
+echo [Step 2] Verifying...
+python -c "import flask; print('[OK] flask ' + flask.__version__)"
+python -c "import pyautogui; print('[OK] pyautogui')"
+python -c "import pywinauto; print('[OK] pywinauto')"
+python -c "import numpy; print('[OK] numpy ' + numpy.__version__)"
+python -c "import PIL; print('[OK] Pillow ' + PIL.__version__)"
+
+echo.
+echo ========================================
+echo  Done! Run: python ..\main.py
+echo ========================================
+pause
diff --git a/desktop_env/server_win7/win7_offline_packages/itsdangerous-2.2.0-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/itsdangerous-2.2.0-py3-none-any.whl
new file mode 100644
index 0000000..359ea0c
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/itsdangerous-2.2.0-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/jinja2-3.1.6-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/jinja2-3.1.6-py3-none-any.whl
new file mode 100644
index 0000000..5046d77
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/jinja2-3.1.6-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/lxml-6.0.2-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/lxml-6.0.2-cp38-cp38-win32.whl
new file mode 100644
index 0000000..ab63b77
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/lxml-6.0.2-cp38-cp38-win32.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/main_win7.py b/desktop_env/server_win7/win7_offline_packages/main_win7.py
new file mode 100644
index 0000000..bc9e9be
--- /dev/null
+++ b/desktop_env/server_win7/win7_offline_packages/main_win7.py
@@ -0,0 +1,2008 @@
+import ctypes
+import os
+import platform
+import shlex
+import json
+import subprocess, signal
+import sys
+import time
+from pathlib import Path
+from typing import Any, Optional, Sequence
+from typing import List, Dict, Tuple, Literal
+import concurrent.futures
+
+import lxml.etree
+import pyautogui
+import requests
+import re
+from PIL import Image, ImageGrab
+from flask import Flask, request, jsonify, send_file, abort  # , send_from_directory
+from lxml.etree import _Element
+
+platform_name: str = platform.system()
+
+if platform_name == "Linux":
+    import Xlib
+    from Xlib import display, X
+    from pyxcursor import Xcursor
+    import pyatspi
+    from pyatspi import Accessible, StateType, STATE_SHOWING
+    from pyatspi import Action as ATAction
+    from pyatspi import Component  # , Document
+    from pyatspi import Text as ATText
+    from pyatspi import Value as ATValue
+
+    BaseWrapper = Any
+
+elif platform_name == "Windows":
+    from pywinauto import Desktop
+    from pywinauto.base_wrapper import BaseWrapper
+    import pywinauto.application
+    import win32ui, win32gui
+
+    Accessible = Any
+    Xlib = None
+    display = None
+    X = None
+    Xcursor = None
+
+elif platform_name == "Darwin":
+    import plistlib
+    from pyxcursor import Xcursor
+
+    import AppKit
+    import ApplicationServices
+    import Foundation
+    import Quartz
+    import oa_atomacos
+
+    Accessible = Any
+    BaseWrapper = Any
+    Xlib = None
+
+else:
+    # Platform not supported
+    Accessible = None
+    BaseWrapper = Any
+    Xlib = None
+    display = None
+    X = None
+    Xcursor = None
+
+# todo: need to reformat and organize this whole file
+
+app = Flask(__name__)
+
+pyautogui.PAUSE = 0
+pyautogui.DARWIN_CATCH_UP_TIME = 0
+
+TIMEOUT = 1800  # seconds
+
+logger = app.logger
+recording_process = None  # fixme: this is a temporary solution for recording, need to be changed to support multiple-process
+recording_path = "/tmp/recording.mp4"
+
+
+@app.route('/setup/execute', methods=['POST'])
+@app.route('/execute', methods=['POST'])
+def execute_command():
+    data = request.json
+    # The 'command' key in the JSON request should contain the command to be executed.
+    shell = data.get('shell', False)
+    command = data.get('command', "" if shell else [])
+
+    if isinstance(command, str) and not shell:
+        command = shlex.split(command)
+
+    # Expand user directory
+    for i, arg in enumerate(command):
+        if arg.startswith("~/"):
+            command[i] = os.path.expanduser(arg)
+
+    # Replace 'python' with sys.executable to use the same Python interpreter as the server
+    if len(command) > 0 and command[0] in ['python', 'python3', 'python.exe', 'python3.exe']:
+        command[0] = sys.executable
+
+    # On Windows, if the command is `python -c "<code>"`, exec() the code directly in this
+    # process instead of spawning a subprocess.  Subprocesses launched from a windowless
+    # pythonw.exe parent cannot access the interactive desktop, so pyautogui calls fail
+    # with a 500 inside a subprocess.  Running exec() in the Flask process is safe because
+    # the Flask server itself already has desktop access (proven by /screenshot working).
+    if (platform_name == "Windows"
+            and not shell
+            and len(command) >= 3
+            and command[0] == sys.executable
+            and command[1] == "-c"):
+        code_str = command[2]
+        import io
+        from contextlib import redirect_stdout, redirect_stderr
+        stdout_buf = io.StringIO()
+        stderr_buf = io.StringIO()
+        try:
+            exec_globals = {"__builtins__": __builtins__}
+            with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf):
+                exec(compile(code_str, "<pyautogui_exec>", "exec"), exec_globals)
+            return jsonify({
+                'status': 'success',
+                'output': stdout_buf.getvalue(),
+                'error': stderr_buf.getvalue(),
+                'returncode': 0
+            })
+        except Exception as e:
+            import traceback
+            return jsonify({
+                'status': 'error',
+                'output': stdout_buf.getvalue(),
+                'error': traceback.format_exc(),
+                'returncode': 1
+            })
+
+    # Execute the command without any safety checks.
+    try:
+        if platform_name == "Windows":
+            flags = subprocess.CREATE_NO_WINDOW
+        else:
+            flags = 0
+        result = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=shell,
+            text=True,
+            timeout=120,
+            creationflags=flags,
+        )
+        return jsonify({
+            'status': 'success',
+            'output': result.stdout,
+            'error': result.stderr,
+            'returncode': result.returncode
+        })
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': str(e)
+        }), 500
+
+
+@app.route('/setup/execute_with_verification', methods=['POST'])
+@app.route('/execute_with_verification', methods=['POST'])
+def execute_command_with_verification():
+    """Execute command and verify the result based on provided verification criteria"""
+    data = request.json
+    shell = data.get('shell', False)
+    command = data.get('command', "" if shell else [])
+    verification = data.get('verification', {})
+    max_wait_time = data.get('max_wait_time', 10)  # Maximum wait time in seconds
+    check_interval = data.get('check_interval', 1)  # Check interval in seconds
+
+    if isinstance(command, str) and not shell:
+        command = shlex.split(command)
+
+    # Expand user directory
+    for i, arg in enumerate(command):
+        if arg.startswith("~/"):
+            command[i] = os.path.expanduser(arg)
+
+    # Execute the main command
+    try:
+        if platform_name == "Windows":
+            flags = subprocess.CREATE_NO_WINDOW
+        else:
+            flags = 0
+        result = subprocess.run(
+            command,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            shell=shell,
+            text=True,
+            timeout=120,
+            creationflags=flags,
+        )
+
+        # If no verification is needed, return immediately
+        if not verification:
+            return jsonify({
+                'status': 'success',
+                'output': result.stdout,
+                'error': result.stderr,
+                'returncode': result.returncode
+            })
+
+        # Wait and verify the result
+        import time
+        start_time = time.time()
+        while time.time() - start_time < max_wait_time:
+            verification_passed = True
+
+            # Check window existence if specified
+            if 'window_exists' in verification:
+                window_name = verification['window_exists']
+                try:
+                    if platform_name == 'Linux':
+                        wmctrl_result = subprocess.run(['wmctrl', '-l'],
+                                                     capture_output=True, text=True, check=True)
+                        if window_name.lower() not in wmctrl_result.stdout.lower():
+                            verification_passed = False
+                    elif platform_name in ['Windows', 'Darwin']:
+                        import pygetwindow as gw
+                        windows = gw.getWindowsWithTitle(window_name)
+                        if not windows:
+                            verification_passed = False
+                except Exception:
+                    verification_passed = False
+
+            # Check command execution if specified
+            if 'command_success' in verification:
+                verify_cmd = verification['command_success']
+                try:
+                    verify_result = subprocess.run(verify_cmd, shell=True,
+                                                 capture_output=True, text=True, timeout=5)
+                    if verify_result.returncode != 0:
+                        verification_passed = False
+                except Exception:
+                    verification_passed = False
+
+            if verification_passed:
+                return jsonify({
+                    'status': 'success',
+                    'output': result.stdout,
+                    'error': result.stderr,
+                    'returncode': result.returncode,
+                    'verification': 'passed',
+                    'wait_time': time.time() - start_time
+                })
+
+            time.sleep(check_interval)
+
+        # Verification failed
+        return jsonify({
+            'status': 'verification_failed',
+            'output': result.stdout,
+            'error': result.stderr,
+            'returncode': result.returncode,
+            'verification': 'failed',
+            'wait_time': max_wait_time
+        }), 500
+
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'message': str(e)
+        }), 500
+
+
+def _get_machine_architecture() -> str:
+    """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
+    """
+    architecture = platform.machine().lower()
+    if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']:
+        return 'amd'
+    elif architecture in ['arm64', 'aarch64', 'aarch32']:
+        return 'arm'
+    else:
+        return 'unknown'
+
+
+@app.route('/setup/launch', methods=["POST"])
+def launch_app():
+    data = request.json
+    shell = data.get("shell", False)
+    command: List[str] = data.get("command", "" if shell else [])
+
+    if isinstance(command, str) and not shell:
+        command = shlex.split(command)
+
+    # Expand user directory
+    for i, arg in enumerate(command):
+        if arg.startswith("~/"):
+            command[i] = os.path.expanduser(arg)
+
+    try:
+        if 'google-chrome' in command and _get_machine_architecture() == 'arm':
+            index = command.index('google-chrome')
+            command[index] = 'chromium'  # arm64 chrome is not available yet, can only use chromium
+
+        # On Windows, use os.startfile() for direct exe/file launches (equivalent to double-click).
+        # Popen lacks the Shell context (ShellExecute) that some apps require to initialize correctly.
+        if platform_name == "Windows" and not shell:
+            exe = command[0] if isinstance(command, list) else command
+            exe_dir = os.path.dirname(exe)
+            # os.startfile uses ShellExecuteEx — same as Explorer double-click
+            os.startfile(exe)
+            return "{:} launched successfully via startfile".format(exe)
+
+        # Set cwd to the exe's directory so apps that rely on relative paths work correctly
+        cwd = None
+        if not shell and isinstance(command, list) and len(command) > 0:
+            exe_path = command[0]
+            exe_dir = os.path.dirname(exe_path)
+            if exe_dir and os.path.isdir(exe_dir):
+                cwd = exe_dir
+        subprocess.Popen(command, shell=shell, cwd=cwd)
+        return "{:} launched successfully".format(command if shell else " ".join(command))
+    except Exception as e:
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
+@app.route('/screenshot', methods=['GET'])
+def capture_screen_with_cursor():
+    file_path = os.path.join(os.path.dirname(__file__), "screenshots", "screenshot.png")
+    user_platform = platform.system()
+
+    # Ensure the screenshots directory exists
+    os.makedirs(os.path.dirname(file_path), exist_ok=True)
+
+    if user_platform == "Windows":
+        def get_cursor():
+            hcursor = win32gui.GetCursorInfo()[1]
+            hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0))
+            hbmp = win32ui.CreateBitmap()
+            hbmp.CreateCompatibleBitmap(hdc, 36, 36)
+            hdc = hdc.CreateCompatibleDC()
+            hdc.SelectObject(hbmp)
+            hdc.DrawIcon((0,0), hcursor)
+
+            bmpinfo = hbmp.GetInfo()
+            bmpstr = hbmp.GetBitmapBits(True)
+            cursor = Image.frombuffer('RGB', (bmpinfo['bmWidth'], bmpinfo['bmHeight']), bmpstr, 'raw', 'BGRX', 0, 1).convert("RGBA")
+
+            win32gui.DestroyIcon(hcursor)
+            win32gui.DeleteObject(hbmp.GetHandle())
+            hdc.DeleteDC()
+
+            pixdata = cursor.load()
+
+            width, height = cursor.size
+            for y in range(height):
+                for x in range(width):
+                    if pixdata[x, y] == (0, 0, 0, 255):
+                        pixdata[x, y] = (0, 0, 0, 0)
+
+            hotspot = win32gui.GetIconInfo(hcursor)[1:3]
+
+            return (cursor, hotspot)
+
+        # Win8.1+ uses shcore; Win7 fallback uses GetDeviceCaps via GDI
+        try:
+            ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100
+        except (OSError, AttributeError):
+            hdc = ctypes.windll.user32.GetDC(0)
+            logical_dpi = ctypes.windll.gdi32.GetDeviceCaps(hdc, 88)  # LOGPIXELSX
+            ctypes.windll.user32.ReleaseDC(0, hdc)
+            ratio = logical_dpi / 96.0  # 96 DPI == 100% scaling
+
+        # get logical screen size
+        user32 = ctypes.windll.user32
+        logical_width = user32.GetSystemMetrics(0)
+        logical_height = user32.GetSystemMetrics(1)
+
+        # ===== Key fix: get cursor position before taking screenshot =====
+        # win32gui.GetCursorPos() returns logical coordinates (consistent with pyautogui)
+        pos_win = win32gui.GetCursorPos()
+        logger.info(f"Cursor position (logical coordinates): {pos_win}")
+
+        # Take screenshot immediately to reduce time difference
+        img = ImageGrab.grab(bbox=None, include_layered_windows=True)
+        # =============================================
+
+        # ===== DPI scaling fix =====
+        if ratio != 1.0:
+            physical_width, physical_height = img.size
+            logger.info(f"Detected DPI scaling: {ratio}x ({ratio*100}%)")
+            logger.info(f"Physical screenshot size: {physical_width}x{physical_height}")
+            logger.info(f"Logical resolution: {logical_width}x{logical_height}")
+            logger.info(f"Resizing screenshot to match logical resolution...")
+            img = img.resize((logical_width, logical_height), Image.Resampling.LANCZOS)
+            logger.info(f"Screenshot resized to: {img.size}")
+        # ==========================
+
+        try:
+            cursor, (hotspotx, hotspoty) = get_cursor()
+
+            # ===== Cursor position handling =====
+            # win32gui.GetCursorPos() and pyautogui both use logical coordinates
+            # The screenshot has been resized to logical resolution, so use directly
+            logical_cursor_x = pos_win[0]
+            logical_cursor_y = pos_win[1]
+
+            pos = (logical_cursor_x - hotspotx, logical_cursor_y - hotspoty)
+
+            logger.info(f"Cursor position (logical coordinates): ({logical_cursor_x}, {logical_cursor_y})")
+            logger.info(f"Hotspot offset: ({hotspotx}, {hotspoty})")
+            logger.info(f"Final paste position: {pos}")
+            # ===================================
+
+            img.paste(cursor, pos, cursor)
+        except Exception as e:
+            logger.warning(f"Failed to capture cursor on Windows, screenshot will not include cursor. Error: {e}")
+
+        img.save(file_path)
+
+    elif user_platform == "Linux":
+        cursor_obj = Xcursor()
+        imgarray = cursor_obj.getCursorImageArrayFast()
+        cursor_img = Image.fromarray(imgarray)
+        screenshot = pyautogui.screenshot()
+        cursor_x, cursor_y = pyautogui.position()
+        screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img)
+        screenshot.save(file_path)
+
+    elif user_platform == "Darwin":  # (Mac OS)
+        subprocess.run(["screencapture", "-C", file_path])
+
+    else:
+        logger.warning(f"The platform you're using ({user_platform}) is not currently supported")
+
+    return send_file(file_path, mimetype='image/png')
+
+
+
+def _has_active_terminal(desktop: Accessible) -> bool:
+    """ A quick check whether the terminal window is open and active (Linux only).
+    """
+    for app in desktop:
+        if app.getRoleName() == "application" and app.name == "gnome-terminal-server":
+            for frame in app:
+                if frame.getRoleName() == "frame" and frame.getState().contains(pyatspi.STATE_ACTIVE):
+                    return True
+    return False
+
+
+def _get_windows_terminal_output() -> Optional[str]:
+    """ Get terminal output on Windows platform.
+    Supports Windows Terminal, PowerShell, Command Prompt, and ConHost.
+    """
+    try:
+        from pywinauto import Desktop
+        from pywinauto.findwindows import ElementNotFoundError
+
+        desktop = Desktop(backend="uia")
+
+        # Common terminal applications on Windows
+        terminal_apps = [
+            "WindowsTerminal.exe",  # Windows Terminal
+            "powershell.exe",       # PowerShell
+            "pwsh.exe",             # PowerShell Core
+            "cmd.exe",              # Command Prompt
+            "conhost.exe"           # Console Host
+        ]
+
+        # Try to find active terminal windows
+        for window in desktop.windows():
+            try:
+                # Check if window is visible and not minimized
+                if not window.is_visible() or window.is_minimized():
+                    continue
+
+                # Get window process name
+                process_name = window.element_info.name.lower()
+
+                # Check if this is a terminal window
+                is_terminal = False
+                for term_app in terminal_apps:
+                    if term_app.lower() in process_name or \
+                       any(term_name in process_name for term_name in ['terminal', 'powershell', 'command prompt', 'cmd']):
+                        is_terminal = True
+                        break
+
+                if not is_terminal:
+                    continue
+
+                # Try to get text content from the terminal
+                # First, try to find console/edit controls that contain the output
+                try:
+                    # For Windows Terminal and modern consoles
+                    # Look for Edit or Document controls that contain the text
+                    text_controls = window.descendants(control_type="Edit")
+                    if not text_controls:
+                        text_controls = window.descendants(control_type="Document")
+                    if not text_controls:
+                        text_controls = window.descendants(control_type="Text")
+
+                    for control in text_controls:
+                        try:
+                            text = control.window_text()
+                            if text and len(text.strip()) > 0:
+                                return text.rstrip()
+                        except:
+                            pass
+
+                    # If no text controls found, try to get the window text directly
+                    window_text = window.window_text()
+                    if window_text and len(window_text.strip()) > 0:
+                        # Filter out just the window title
+                        if window_text not in ['Windows PowerShell', 'Command Prompt', 'PowerShell', 'Administrator: Windows PowerShell']:
+                            return window_text.rstrip()
+
+                except Exception as e:
+                    logger.debug(f"Error getting text from window {process_name}: {e}")
+                    continue
+
+            except Exception as e:
+                logger.debug(f"Error processing window: {e}")
+                continue
+
+        return None
+
+    except Exception as e:
+        logger.error(f"Error in _get_windows_terminal_output: {e}")
+        return None
+
+
+@app.route('/terminal', methods=['GET'])
+def get_terminal_output():
+    user_platform = platform.system()
+    output: Optional[str] = None
+    try:
+        if user_platform == "Linux":
+            desktop: Accessible = pyatspi.Registry.getDesktop(0)
+            if _has_active_terminal(desktop):
+                desktop_xml: _Element = _create_atspi_node(desktop)
+                # 1. the terminal window (frame of application is st:active) is open and active
+                # 2. the terminal tab (terminal status is st:focused) is focused
+                xpath = '//application[@name="gnome-terminal-server"]/frame[@st:active="true"]//terminal[@st:focused="true"]'
+                terminals: List[_Element] = desktop_xml.xpath(xpath, namespaces=_accessibility_ns_map_ubuntu)
+                output = terminals[0].text.rstrip() if len(terminals) == 1 else None
+        elif user_platform == "Windows":
+            output = _get_windows_terminal_output()
+            logger.debug(f"Terminal output retrieved: {output}")
+        else:  # macOS platform is not implemented currently
+            return "Currently not implemented for platform {:}.".format(platform.platform()), 500
+        return jsonify({"output": output, "status": "success"})
+    except Exception as e:
+        logger.error("Failed to get terminal output. Error: %s", e)
+        return jsonify({"status": "error", "message": str(e)}), 500
+
+
+_accessibility_ns_map = {
+    "ubuntu": {
+        "st": "https://accessibility.ubuntu.example.org/ns/state",
+        "attr": "https://accessibility.ubuntu.example.org/ns/attributes",
+        "cp": "https://accessibility.ubuntu.example.org/ns/component",
+        "doc": "https://accessibility.ubuntu.example.org/ns/document",
+        "docattr": "https://accessibility.ubuntu.example.org/ns/document/attributes",
+        "txt": "https://accessibility.ubuntu.example.org/ns/text",
+        "val": "https://accessibility.ubuntu.example.org/ns/value",
+        "act": "https://accessibility.ubuntu.example.org/ns/action",
+    },
+    "windows": {
+        "st": "https://accessibility.windows.example.org/ns/state",
+        "attr": "https://accessibility.windows.example.org/ns/attributes",
+        "cp": "https://accessibility.windows.example.org/ns/component",
+        "doc": "https://accessibility.windows.example.org/ns/document",
+        "docattr": "https://accessibility.windows.example.org/ns/document/attributes",
+        "txt": "https://accessibility.windows.example.org/ns/text",
+        "val": "https://accessibility.windows.example.org/ns/value",
+        "act": "https://accessibility.windows.example.org/ns/action",
+        "class": "https://accessibility.windows.example.org/ns/class"
+    },
+    "macos": {
+        "st": "https://accessibility.macos.example.org/ns/state",
+        "attr": "https://accessibility.macos.example.org/ns/attributes",
+        "cp": "https://accessibility.macos.example.org/ns/component",
+        "doc": "https://accessibility.macos.example.org/ns/document",
+        "txt": "https://accessibility.macos.example.org/ns/text",
+        "val": "https://accessibility.macos.example.org/ns/value",
+        "act": "https://accessibility.macos.example.org/ns/action",
+        "role": "https://accessibility.macos.example.org/ns/role",
+    }
+
+}
+
+_accessibility_ns_map_ubuntu = _accessibility_ns_map['ubuntu']
+_accessibility_ns_map_windows = _accessibility_ns_map['windows']
+_accessibility_ns_map_macos = _accessibility_ns_map['macos']
+
+# A11y tree getter for Ubuntu
+libreoffice_version_tuple: Optional[Tuple[int, ...]] = None
+MAX_DEPTH = 50
+MAX_WIDTH = 1024
+MAX_CALLS = 5000
+
+
+def _get_libreoffice_version() -> Tuple[int, ...]:
+    """Function to get the LibreOffice version as a tuple of integers."""
+    result = subprocess.run("libreoffice --version", shell=True, text=True, stdout=subprocess.PIPE)
+    version_str = result.stdout.split()[1]  # Assuming version is the second word in the command output
+    return tuple(map(int, version_str.split(".")))
+
+
+def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = None) -> _Element:
+    node_name = node.name
+    attribute_dict: Dict[str, Any] = {"name": node_name}
+
+    #  States
+    states: List[StateType] = node.getState().get_states()
+    for st in states:
+        state_name: str = StateType._enum_lookup[st]
+        state_name: str = state_name.split("_", maxsplit=1)[1].lower()
+        if len(state_name) == 0:
+            continue
+        attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["st"], state_name)] = "true"
+
+    #  Attributes
+    attributes: Dict[str, str] = node.get_attributes()
+    for attribute_name, attribute_value in attributes.items():
+        if len(attribute_name) == 0:
+            continue
+        attribute_dict["{{{:}}}{:}".format(_accessibility_ns_map_ubuntu["attr"], attribute_name)] = attribute_value
+
+    #  Component
+    if attribute_dict.get("{{{:}}}visible".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true" \
+            and attribute_dict.get("{{{:}}}showing".format(_accessibility_ns_map_ubuntu["st"]), "false") == "true":
+        try:
+            component: Component = node.queryComponent()
+        except NotImplementedError:
+            pass
+        else:
+            bbox: Sequence[int] = component.getExtents(pyatspi.XY_SCREEN)
+            attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_ubuntu["cp"])] = \
+                str(tuple(bbox[0:2]))
+            attribute_dict["{{{:}}}size".format(_accessibility_ns_map_ubuntu["cp"])] = str(tuple(bbox[2:]))
+
+    text = ""
+    #  Text
+    try:
+        text_obj: ATText = node.queryText()
+        # only text shown on current screen is available
+        # attribute_dict["txt:text"] = text_obj.getText(0, text_obj.characterCount)
+        text: str = text_obj.getText(0, text_obj.characterCount)
+        # if flag=="thunderbird":
+        # appeared in thunderbird (uFFFC) (not only in thunderbird), "Object
+        # Replacement Character" in Unicode, "used as placeholder in text for
+        # an otherwise unspecified object; uFFFD is another "Replacement
+        # Character", just in case
+        text = text.replace("\ufffc", "").replace("\ufffd", "")
+    except NotImplementedError:
+        pass
+
+    #  Image, Selection, Value, Action
+    try:
+        node.queryImage()
+        attribute_dict["image"] = "true"
+    except NotImplementedError:
+        pass
+
+    try:
+        node.querySelection()
+        attribute_dict["selection"] = "true"
+    except NotImplementedError:
+        pass
+
+    try:
+        value: ATValue = node.queryValue()
+        value_key = f"{{{_accessibility_ns_map_ubuntu['val']}}}"
+
+        for attr_name, attr_func in [
+            ("value", lambda: value.currentValue),
+            ("min", lambda: value.minimumValue),
+            ("max", lambda: value.maximumValue),
+            ("step", lambda: value.minimumIncrement)
+        ]:
+            try:
+                attribute_dict[f"{value_key}{attr_name}"] = str(attr_func())
+            except:
+                pass
+    except NotImplementedError:
+        pass
+
+    try:
+        action: ATAction = node.queryAction()
+        for i in range(action.nActions):
+            action_name: str = action.getName(i).replace(" ", "-")
+            attribute_dict[
+                "{{{:}}}{:}_desc".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getDescription(
+                i)
+            attribute_dict[
+                "{{{:}}}{:}_kb".format(_accessibility_ns_map_ubuntu["act"], action_name)] = action.getKeyBinding(i)
+    except NotImplementedError:
+        pass
+
+    # Add from here if we need more attributes in the future...
+
+    raw_role_name: str = node.getRoleName().strip()
+    node_role_name = (raw_role_name or "unknown").replace(" ", "-")
+
+    if not flag:
+        if raw_role_name == "document spreadsheet":
+            flag = "calc"
+        if raw_role_name == "application" and node.name == "Thunderbird":
+            flag = "thunderbird"
+
+    xml_node = lxml.etree.Element(
+        node_role_name,
+        attrib=attribute_dict,
+        nsmap=_accessibility_ns_map_ubuntu
+    )
+
+    if len(text) > 0:
+        xml_node.text = text
+
+    if depth == MAX_DEPTH:
+        logger.warning("Max depth reached")
+        return xml_node
+
+    if flag == "calc" and node_role_name == "table":
+        # Maximum column: 1024 if ver<=7.3 else 16384
+        # Maximum row: 104 8576
+        # Maximun sheet: 1 0000
+
+        global libreoffice_version_tuple
+        MAXIMUN_COLUMN = 1024 if libreoffice_version_tuple < (7, 4) else 16384
+        MAX_ROW = 104_8576
+
+        index_base = 0
+        first_showing = False
+        column_base = None
+        for r in range(MAX_ROW):
+            for clm in range(column_base or 0, MAXIMUN_COLUMN):
+                child_node: Accessible = node[index_base + clm]
+                showing: bool = child_node.getState().contains(STATE_SHOWING)
+                if showing:
+                    child_node: _Element = _create_atspi_node(child_node, depth + 1, flag)
+                    if not first_showing:
+                        column_base = clm
+                        first_showing = True
+                    xml_node.append(child_node)
+                elif first_showing and column_base is not None or clm >= 500:
+                    break
+            if first_showing and clm == column_base or not first_showing and r >= 500:
+                break
+            index_base += MAXIMUN_COLUMN
+        return xml_node
+    else:
+        try:
+            for i, ch in enumerate(node):
+                if i == MAX_WIDTH:
+                    logger.warning("Max width reached")
+                    break
+                xml_node.append(_create_atspi_node(ch, depth + 1, flag))
+        except:
+            logger.warning("Error occurred during children traversing. Has Ignored. Node: %s",
+                           lxml.etree.tostring(xml_node, encoding="unicode"))
+        return xml_node
+
+
+# A11y tree getter for Windows
+def _create_pywinauto_node(node, nodes, depth: int = 0, flag: Optional[str] = None) -> _Element:
+    nodes = nodes or set()
+    if node in nodes:
+        return
+    nodes.add(node)
+
+    attribute_dict: Dict[str, Any] = {"name": node.element_info.name}
+
+    base_properties = {}
+    try:
+        base_properties.update(
+            node.get_properties())  # get all writable/not writable properties, but have bugs when landing on chrome and it's slower!
+    except:
+        logger.debug("Failed to call get_properties(), trying to get writable properites")
+        try:
+            _element_class = node.__class__
+
+            class TempElement(node.__class__):
+                writable_props = pywinauto.base_wrapper.BaseWrapper.writable_props
+
+            # Instantiate the subclass
+            node.__class__ = TempElement
+            # Retrieve properties using get_properties()
+            properties = node.get_properties()
+            node.__class__ = _element_class
+
+            base_properties.update(properties)  # only get all writable properties
+            logger.debug("get writable properties")
+        except Exception as e:
+            logger.error(e)
+            pass
+
+    # Count-cnt
+    for attr_name in ["control_count", "button_count", "item_count", "column_count"]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['cnt']}}}{attr_name}"] = base_properties[
+                attr_name].lower()
+        except:
+            pass
+
+    # Columns-cols
+    try:
+        attribute_dict[f"{{{_accessibility_ns_map_windows['cols']}}}columns"] = base_properties["columns"].lower()
+    except:
+        pass
+
+    # Id-id
+    for attr_name in ["control_id", "automation_id", "window_id"]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['id']}}}{attr_name}"] = base_properties[attr_name].lower()
+        except:
+            pass
+
+    #  States
+    # 19 sec out of 20
+    for attr_name, attr_func in [
+        ("enabled", lambda: node.is_enabled()),
+        ("visible", lambda: node.is_visible()),
+        # ("active", lambda: node.is_active()), # occupied most of the time: 20s out of 21s for slack, 51.5s out of 54s for WeChat # maybe use for cutting branches
+        ("minimized", lambda: node.is_minimized()),
+        ("maximized", lambda: node.is_maximized()),
+        ("normal", lambda: node.is_normal()),
+        ("unicode", lambda: node.is_unicode()),
+        ("collapsed", lambda: node.is_collapsed()),
+        ("checkable", lambda: node.is_checkable()),
+        ("checked", lambda: node.is_checked()),
+        ("focused", lambda: node.is_focused()),
+        ("keyboard_focused", lambda: node.is_keyboard_focused()),
+        ("selected", lambda: node.is_selected()),
+        ("selection_required", lambda: node.is_selection_required()),
+        ("pressable", lambda: node.is_pressable()),
+        ("pressed", lambda: node.is_pressed()),
+        ("expanded", lambda: node.is_expanded()),
+        ("editable", lambda: node.is_editable()),
+        ("has_keyboard_focus", lambda: node.has_keyboard_focus()),
+        ("is_keyboard_focusable", lambda: node.is_keyboard_focusable()),
+    ]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['st']}}}{attr_name}"] = str(attr_func()).lower()
+        except:
+            pass
+
+    #  Component
+    try:
+        rectangle = node.rectangle()
+        attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_windows["cp"])] = \
+            "({:d}, {:d})".format(rectangle.left, rectangle.top)
+        attribute_dict["{{{:}}}size".format(_accessibility_ns_map_windows["cp"])] = \
+            "({:d}, {:d})".format(rectangle.width(), rectangle.height())
+
+    except Exception as e:
+        logger.error("Error accessing rectangle: ", e)
+
+    #  Text
+    text: str = node.window_text()
+    if text == attribute_dict["name"]:
+        text = ""
+
+    #  Selection
+    if hasattr(node, "select"):
+        attribute_dict["selection"] = "true"
+
+    # Value
+    for attr_name, attr_funcs in [
+        ("step", [lambda: node.get_step()]),
+        ("value", [lambda: node.value(), lambda: node.get_value(), lambda: node.get_position()]),
+        ("min", [lambda: node.min_value(), lambda: node.get_range_min()]),
+        ("max", [lambda: node.max_value(), lambda: node.get_range_max()])
+    ]:
+        for attr_func in attr_funcs:
+            if hasattr(node, attr_func.__name__):
+                try:
+                    attribute_dict[f"{{{_accessibility_ns_map_windows['val']}}}{attr_name}"] = str(attr_func())
+                    break  # exit once the attribute is set successfully
+                except:
+                    pass
+
+    attribute_dict["{{{:}}}class".format(_accessibility_ns_map_windows["class"])] = str(type(node))
+
+    # class_name
+    for attr_name in ["class_name", "friendly_class_name"]:
+        try:
+            attribute_dict[f"{{{_accessibility_ns_map_windows['class']}}}{attr_name}"] = base_properties[
+                attr_name].lower()
+        except:
+            pass
+
+    node_role_name: str = node.class_name().lower().replace(" ", "-")
+    node_role_name = "".join(
+        map(lambda _ch: _ch if _ch.isidentifier() or _ch in {"-"} or _ch.isalnum() else "-", node_role_name))
+
+    if node_role_name.strip() == "":
+        node_role_name = "unknown"
+    if not node_role_name[0].isalpha():
+        node_role_name = "tag" + node_role_name
+
+    xml_node = lxml.etree.Element(
+        node_role_name,
+        attrib=attribute_dict,
+        nsmap=_accessibility_ns_map_windows
+    )
+
+    if text is not None and len(text) > 0 and text != attribute_dict["name"]:
+        xml_node.text = text
+
+    if depth == MAX_DEPTH:
+        logger.warning("Max depth reached")
+        return xml_node
+
+    # use multi thread to accelerate children fetching
+    children = node.children()
+    if children:
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            future_to_child = [executor.submit(_create_pywinauto_node, ch, nodes, depth + 1, flag) for ch in
+                               children[:MAX_WIDTH]]
+        try:
+            xml_node.extend([future.result() for future in concurrent.futures.as_completed(future_to_child)])
+        except Exception as e:
+            logger.error(f"Exception occurred: {e}")
+    return xml_node
+
+
+# A11y tree getter for macOS
+
+def _create_axui_node(node, nodes: set = None, depth: int = 0, bbox: tuple = None):
+    nodes = nodes or set()
+    if node in nodes:
+        return
+    nodes.add(node)
+
+    reserved_keys = {
+        "AXEnabled": "st",
+        "AXFocused": "st",
+        "AXFullScreen": "st",
+        "AXTitle": "attr",
+        "AXChildrenInNavigationOrder": "attr",
+        "AXChildren": "attr",
+        "AXFrame": "attr",
+        "AXRole": "role",
+        "AXHelp": "attr",
+        "AXRoleDescription": "role",
+        "AXSubrole": "role",
+        "AXURL": "attr",
+        "AXValue": "val",
+        "AXDescription": "attr",
+        "AXDOMIdentifier": "attr",
+        "AXSelected": "st",
+        "AXInvalid": "st",
+        "AXRows": "attr",
+        "AXColumns": "attr",
+    }
+    attribute_dict = {}
+
+    if depth == 0:
+        bbox = (
+            node["kCGWindowBounds"]["X"],
+            node["kCGWindowBounds"]["Y"],
+            node["kCGWindowBounds"]["X"] + node["kCGWindowBounds"]["Width"],
+            node["kCGWindowBounds"]["Y"] + node["kCGWindowBounds"]["Height"]
+        )
+        app_ref = ApplicationServices.AXUIElementCreateApplication(node["kCGWindowOwnerPID"])
+
+        attribute_dict["name"] = node["kCGWindowOwnerName"]
+        if attribute_dict["name"] != "Dock":
+            error_code, app_wins_ref = ApplicationServices.AXUIElementCopyAttributeValue(
+                app_ref, "AXWindows", None)
+            if error_code:
+                logger.error("MacOS parsing %s encountered Error code: %d", app_ref, error_code)
+        else:
+            app_wins_ref = [app_ref]
+        node = app_wins_ref[0]
+
+    error_code, attr_names = ApplicationServices.AXUIElementCopyAttributeNames(node, None)
+
+    if error_code:
+        # -25202: AXError.invalidUIElement
+        #         The accessibility object received in this event is invalid.
+        return
+
+    value = None
+
+    if "AXFrame" in attr_names:
+        error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, "AXFrame", None)
+        rep = repr(attr_val)
+        x_value = re.search(r"x:(-?[\d.]+)", rep)
+        y_value = re.search(r"y:(-?[\d.]+)", rep)
+        w_value = re.search(r"w:(-?[\d.]+)", rep)
+        h_value = re.search(r"h:(-?[\d.]+)", rep)
+        type_value = re.search(r"type\s?=\s?(\w+)", rep)
+        value = {
+            "x": float(x_value.group(1)) if x_value else None,
+            "y": float(y_value.group(1)) if y_value else None,
+            "w": float(w_value.group(1)) if w_value else None,
+            "h": float(h_value.group(1)) if h_value else None,
+            "type": type_value.group(1) if type_value else None,
+        }
+
+        if not any(v is None for v in value.values()):
+            x_min = max(bbox[0], value["x"])
+            x_max = min(bbox[2], value["x"] + value["w"])
+            y_min = max(bbox[1], value["y"])
+            y_max = min(bbox[3], value["y"] + value["h"])
+
+            if x_min > x_max or y_min > y_max:
+                # No intersection
+                return
+
+    role = None
+    text = None
+
+    for attr_name, ns_key in reserved_keys.items():
+        if attr_name not in attr_names:
+            continue
+
+        if value and attr_name == "AXFrame":
+            bb = value
+            if not any(v is None for v in bb.values()):
+                attribute_dict["{{{:}}}screencoord".format(_accessibility_ns_map_macos["cp"])] = \
+                    "({:d}, {:d})".format(int(bb["x"]), int(bb["y"]))
+                attribute_dict["{{{:}}}size".format(_accessibility_ns_map_macos["cp"])] = \
+                    "({:d}, {:d})".format(int(bb["w"]), int(bb["h"]))
+            continue
+
+        error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None)
+
+        full_attr_name = f"{{{_accessibility_ns_map_macos[ns_key]}}}{attr_name}"
+
+        if attr_name == "AXValue" and not text:
+            text = str(attr_val)
+            continue
+
+        if attr_name == "AXRoleDescription":
+            role = attr_val
+            continue
+
+        # Set the attribute_dict
+        if not (isinstance(attr_val, ApplicationServices.AXUIElementRef)
+                or isinstance(attr_val, (AppKit.NSArray, list))):
+            if attr_val is not None:
+                attribute_dict[full_attr_name] = str(attr_val)
+
+    node_role_name = role.lower().replace(" ", "_") if role else "unknown_role"
+
+    xml_node = lxml.etree.Element(
+        node_role_name,
+        attrib=attribute_dict,
+        nsmap=_accessibility_ns_map_macos
+    )
+
+    if text is not None and len(text) > 0:
+        xml_node.text = text
+
+    if depth == MAX_DEPTH:
+        logger.warning("Max depth reached")
+        return xml_node
+
+    future_to_child = []
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        for attr_name, ns_key in reserved_keys.items():
+            if attr_name not in attr_names:
+                continue
+
+            error_code, attr_val = ApplicationServices.AXUIElementCopyAttributeValue(node, attr_name, None)
+            if isinstance(attr_val, ApplicationServices.AXUIElementRef):
+                future_to_child.append(executor.submit(_create_axui_node, attr_val, nodes, depth + 1, bbox))
+
+            elif isinstance(attr_val, (AppKit.NSArray, list)):
+                for child in attr_val:
+                    future_to_child.append(executor.submit(_create_axui_node, child, nodes, depth + 1, bbox))
+
+        try:
+            for future in concurrent.futures.as_completed(future_to_child):
+                result = future.result()
+                if result is not None:
+                    xml_node.append(result)
+        except Exception as e:
+            logger.error(f"Exception occurred: {e}")
+
+    return xml_node
+
+
+@app.route("/accessibility", methods=["GET"])
+def get_accessibility_tree():
+    os_name: str = platform.system()
+
+    # AT-SPI works for KDE as well
+    if os_name == "Linux":
+        global libreoffice_version_tuple
+        libreoffice_version_tuple = _get_libreoffice_version()
+
+        desktop: Accessible = pyatspi.Registry.getDesktop(0)
+        xml_node = lxml.etree.Element("desktop-frame", nsmap=_accessibility_ns_map_ubuntu)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [executor.submit(_create_atspi_node, app_node, 1) for app_node in desktop]
+            for future in concurrent.futures.as_completed(futures):
+                xml_tree = future.result()
+                xml_node.append(xml_tree)
+        return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")})
+
+    elif os_name == "Windows":
+        # Attention: Windows a11y tree is implemented to be read through `pywinauto` module, however,
+        # two different backends `win32` and `uia` are supported and different results may be returned
+        desktop: Desktop = Desktop(backend="uia")
+        xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_windows)
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            futures = [executor.submit(_create_pywinauto_node, wnd, {}, 1) for wnd in desktop.windows()]
+            for future in concurrent.futures.as_completed(futures):
+                xml_tree = future.result()
+                xml_node.append(xml_tree)
+        return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")})
+
+    elif os_name == "Darwin":
+        # TODO: Add Dock and MenuBar
+        xml_node = lxml.etree.Element("desktop", nsmap=_accessibility_ns_map_macos)
+
+        with concurrent.futures.ThreadPoolExecutor() as executor:
+            foreground_windows = [
+                win for win in Quartz.CGWindowListCopyWindowInfo(
+                    (Quartz.kCGWindowListExcludeDesktopElements |
+                     Quartz.kCGWindowListOptionOnScreenOnly),
+                    Quartz.kCGNullWindowID
+                ) if win["kCGWindowLayer"] == 0 and win["kCGWindowOwnerName"] != "Window Server"
+            ]
+            dock_info = [
+                win for win in Quartz.CGWindowListCopyWindowInfo(
+                    Quartz.kCGWindowListOptionAll,
+                    Quartz.kCGNullWindowID
+                ) if win.get("kCGWindowName", None) == "Dock"
+            ]
+
+            futures = [
+                executor.submit(_create_axui_node, wnd, None, 0)
+                for wnd in foreground_windows + dock_info
+            ]
+
+            for future in concurrent.futures.as_completed(futures):
+                xml_tree = future.result()
+                if xml_tree is not None:
+                    xml_node.append(xml_tree)
+
+        return jsonify({"AT": lxml.etree.tostring(xml_node, encoding="unicode")})
+
+    else:
+        return "Currently not implemented for platform {:}.".format(platform.platform()), 500
+
+
+@app.route('/screen_size', methods=['POST'])
+def get_screen_size():
+    if platform_name == "Linux":
+        d = display.Display()
+        screen_width = d.screen().width_in_pixels
+        screen_height = d.screen().height_in_pixels
+    elif platform_name == "Windows":
+        user32 = ctypes.windll.user32
+        screen_width: int = user32.GetSystemMetrics(0)
+        screen_height: int = user32.GetSystemMetrics(1)
+    return jsonify(
+        {
+            "width": screen_width,
+            "height": screen_height
+        }
+    )
+
+
+@app.route('/window_size', methods=['POST'])
+def get_window_size():
+    if 'app_class_name' in request.form:
+        app_class_name = request.form['app_class_name']
+    else:
+        return jsonify({"error": "app_class_name is required"}), 400
+
+    if platform_name != "Linux":
+        return jsonify({"error": "window_size is only supported on Linux"}), 501
+
+    d = display.Display()
+    root = d.screen().root
+    window_ids = root.get_full_property(d.intern_atom('_NET_CLIENT_LIST'), X.AnyPropertyType).value
+
+    for window_id in window_ids:
+        try:
+            window = d.create_resource_object('window', window_id)
+            wm_class = window.get_wm_class()
+
+            if wm_class is None:
+                continue
+
+            if app_class_name.lower() in [name.lower() for name in wm_class]:
+                geom = window.get_geometry()
+                return jsonify(
+                    {
+                        "width": geom.width,
+                        "height": geom.height
+                    }
+                )
+        except Xlib.error.XError:  # Ignore windows that give an error
+            continue
+    return None
+
+
+@app.route('/desktop_path', methods=['POST'])
+def get_desktop_path():
+    # Get the home directory in a platform-independent manner using pathlib
+    home_directory = str(Path.home())
+
+    # Determine the desktop path based on the operating system
+    desktop_path = {
+        "Windows": os.path.join(home_directory, "Desktop"),
+        "Darwin": os.path.join(home_directory, "Desktop"),  # macOS
+        "Linux": os.path.join(home_directory, "Desktop")
+    }.get(platform.system(), None)
+
+    # Check if the operating system is supported and the desktop path exists
+    if desktop_path and os.path.exists(desktop_path):
+        return jsonify(desktop_path=desktop_path)
+    else:
+        return jsonify(error="Unsupported operating system or desktop path not found"), 404
+
+
+@app.route('/wallpaper', methods=['POST'])
+def get_wallpaper():
+    def get_wallpaper_windows():
+        SPI_GETDESKWALLPAPER = 0x73
+        MAX_PATH = 260
+        buffer = ctypes.create_unicode_buffer(MAX_PATH)
+        ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
+        return buffer.value
+
+    def get_wallpaper_macos():
+        script = """
+        tell application "System Events" to tell every desktop to get picture
+        """
+        process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        output, error = process.communicate()
+        if error:
+            app.logger.error("Error: %s", error.decode('utf-8'))
+            return None
+        return output.strip().decode('utf-8')
+
+    def get_wallpaper_linux():
+        try:
+            output = subprocess.check_output(
+                ["gsettings", "get", "org.gnome.desktop.background", "picture-uri"],
+                stderr=subprocess.PIPE
+            )
+            return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
+        except subprocess.CalledProcessError as e:
+            app.logger.error("Error: %s", e)
+            return None
+
+    os_name = platform.system()
+    wallpaper_path = None
+    if os_name == 'Windows':
+        wallpaper_path = get_wallpaper_windows()
+    elif os_name == 'Darwin':
+        wallpaper_path = get_wallpaper_macos()
+    elif os_name == 'Linux':
+        wallpaper_path = get_wallpaper_linux()
+    else:
+        app.logger.error(f"Unsupported OS: {os_name}")
+        abort(400, description="Unsupported OS")
+
+    if wallpaper_path:
+        try:
+            # Ensure the filename is secure
+            return send_file(wallpaper_path, mimetype='image/png')
+        except Exception as e:
+            app.logger.error(f"An error occurred while serving the wallpaper file: {e}")
+            abort(500, description="Unable to serve the wallpaper file")
+    else:
+        abort(404, description="Wallpaper file not found")
+
+
+@app.route('/list_directory', methods=['POST'])
+def get_directory_tree():
+    def _list_dir_contents(directory):
+        """
+        List the contents of a directory recursively, building a tree structure.
+
+        :param directory: The path of the directory to inspect.
+        :return: A nested dictionary with the contents of the directory.
+        """
+        tree = {'type': 'directory', 'name': os.path.basename(directory), 'children': []}
+        try:
+            # List all files and directories in the current directory
+            for entry in os.listdir(directory):
+                full_path = os.path.join(directory, entry)
+                # If entry is a directory, recurse into it
+                if os.path.isdir(full_path):
+                    tree['children'].append(_list_dir_contents(full_path))
+                else:
+                    tree['children'].append({'type': 'file', 'name': entry})
+        except OSError as e:
+            # If the directory cannot be accessed, return the exception message
+            tree = {'error': str(e)}
+        return tree
+
+    # Extract the 'path' parameter from the JSON request
+    data = request.get_json()
+    if 'path' not in data:
+        return jsonify(error="Missing 'path' parameter"), 400
+
+    start_path = data['path']
+    # Ensure the provided path is a directory
+    if not os.path.isdir(start_path):
+        return jsonify(error="The provided path is not a directory"), 400
+
+    # Generate the directory tree starting from the provided path
+    directory_tree = _list_dir_contents(start_path)
+    return jsonify(directory_tree=directory_tree)
+
+
+@app.route('/file', methods=['POST'])
+def get_file():
+    # Retrieve filename from the POST request
+    if 'file_path' in request.form:
+        file_path = os.path.expandvars(os.path.expanduser(request.form['file_path']))
+    else:
+        return jsonify({"error": "file_path is required"}), 400
+
+    try:
+        # Check if the file exists and get its size
+        if not os.path.exists(file_path):
+            return jsonify({"error": "File not found"}), 404
+
+        file_size = os.path.getsize(file_path)
+        logger.info(f"Serving file: {file_path} ({file_size} bytes)")
+
+        # Check if the file exists and send it to the user
+        return send_file(file_path, as_attachment=True)
+    except FileNotFoundError:
+        # If the file is not found, return a 404 error
+        return jsonify({"error": "File not found"}), 404
+    except Exception as e:
+        logger.error(f"Error serving file {file_path}: {e}")
+        return jsonify({"error": f"Failed to serve file: {str(e)}"}), 500
+
+
+@app.route("/setup/upload", methods=["POST"])
+def upload_file():
+    # Retrieve filename from the POST request
+    if 'file_path' in request.form and 'file_data' in request.files:
+        file_path = os.path.expandvars(os.path.expanduser(request.form['file_path']))
+        file = request.files["file_data"]
+
+        try:
+            # Ensure target directory exists
+            target_dir = os.path.dirname(file_path)
+            if target_dir:  # Only create directory if it's not empty
+                os.makedirs(target_dir, exist_ok=True)
+
+            # Save file and get size for verification
+            file.save(file_path)
+            uploaded_size = os.path.getsize(file_path)
+
+            logger.info(f"File uploaded successfully: {file_path} ({uploaded_size} bytes)")
+            return f"File Uploaded: {uploaded_size} bytes"
+
+        except Exception as e:
+            logger.error(f"Error uploading file to {file_path}: {e}")
+            # Clean up partial file if it exists
+            if os.path.exists(file_path):
+                try:
+                    os.remove(file_path)
+                except:
+                    pass
+            return jsonify({"error": f"Failed to upload file: {str(e)}"}), 500
+    else:
+        return jsonify({"error": "file_path and file_data are required"}), 400
+
+
+@app.route('/platform', methods=['GET'])
+def get_platform():
+    return platform.system()
+
+
+@app.route('/cursor_position', methods=['GET'])
+def get_cursor_position():
+    pos = pyautogui.position()
+    return jsonify(pos.x, pos.y)
+
+@app.route("/setup/change_wallpaper", methods=['POST'])
+def change_wallpaper():
+    data = request.json
+    path = data.get('path', None)
+
+    if not path:
+        return "Path not supplied!", 400
+
+    path = Path(os.path.expandvars(os.path.expanduser(path)))
+
+    if not path.exists():
+        return f"File not found: {path}", 404
+
+    try:
+        user_platform = platform.system()
+        if user_platform == "Windows":
+            import ctypes
+            ctypes.windll.user32.SystemParametersInfoW(20, 0, str(path), 3)
+        elif user_platform == "Linux":
+            import subprocess
+            subprocess.run(["gsettings", "set", "org.gnome.desktop.background", "picture-uri", f"file://{path}"])
+        elif user_platform == "Darwin":  # (Mac OS)
+            import subprocess
+            subprocess.run(
+                ["osascript", "-e", f'tell application "Finder" to set desktop picture to POSIX file "{path}"'])
+        return "Wallpaper changed successfully"
+    except Exception as e:
+        return f"Failed to change wallpaper. Error: {e}", 500
+
+
+@app.route("/setup/download_file", methods=['POST'])
+def download_file():
+    data = request.json
+    url = data.get('url', None)
+    path = data.get('path', None)
+
+    if not url or not path:
+        return "Path or URL not supplied!", 400
+
+    path = Path(os.path.expandvars(os.path.expanduser(path)))
+    path.parent.mkdir(parents=True, exist_ok=True)
+
+    max_retries = 3
+    error: Optional[Exception] = None
+
+    for i in range(max_retries):
+        try:
+            logger.info(f"Download attempt {i+1}/{max_retries} for {url}")
+            response = requests.get(url, stream=True, timeout=300)
+            response.raise_for_status()
+
+            # Get expected file size if available
+            total_size = int(response.headers.get('content-length', 0))
+            if total_size > 0:
+                logger.info(f"Expected file size: {total_size / (1024*1024):.2f} MB")
+
+            downloaded_size = 0
+            with open(path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        f.write(chunk)
+                        downloaded_size += len(chunk)
+                        if total_size > 0 and downloaded_size % (1024*1024) == 0:  # Log every MB
+                            progress = (downloaded_size / total_size) * 100
+                            logger.info(f"Download progress: {progress:.1f}%")
+
+            # Verify download completeness
+            actual_size = os.path.getsize(path)
+            if total_size > 0 and actual_size != total_size:
+                raise Exception(f"Download incomplete. Expected {total_size} bytes, got {actual_size} bytes")
+
+            logger.info(f"File downloaded successfully: {path} ({actual_size} bytes)")
+            return f"File downloaded successfully: {actual_size} bytes"
+
+        except (requests.RequestException, Exception) as e:
+            error = e
+            logger.error(f"Failed to download {url}: {e}. Retrying... ({max_retries - i - 1} attempts left)")
+            # Clean up partial download
+            if path.exists():
+                try:
+                    path.unlink()
+                except:
+                    pass
+
+    return f"Failed to download {url}. No retries left. Error: {error}", 500
+
+
+@app.route("/setup/open_file", methods=['POST'])
+def open_file():
+    data = request.json
+    path = data.get('path', None)
+
+    if not path:
+        return "Path not supplied!", 400
+
+    path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
+
+    # Check if it's a file path that exists
+    is_file_path = path_obj.exists()
+
+    # If it's not a file path, treat it as an application name/command
+    if not is_file_path:
+        # Check if it's a valid command by trying to find it in PATH
+        import shutil
+        if not shutil.which(path):
+            return f"Application/file not found: {path}", 404
+
+    try:
+        if is_file_path:
+            # Handle file opening
+            if platform.system() == "Windows":
+                os.startfile(path_obj)
+            else:
+                open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
+                subprocess.Popen([open_cmd, str(path_obj)])
+            file_name = path_obj.name
+            file_name_without_ext, _ = os.path.splitext(file_name)
+        else:
+            # Handle application launching
+            if platform.system() == "Windows":
+                subprocess.Popen([path])
+            else:
+                subprocess.Popen([path])
+            file_name = path
+            file_name_without_ext = path
+
+        # Wait for the file/application to open
+
+        start_time = time.time()
+        window_found = False
+
+        while time.time() - start_time < TIMEOUT:
+            os_name = platform.system()
+            if os_name in ['Windows', 'Darwin']:
+                import pygetwindow as gw
+                # Check for window title containing file name or file name without extension
+                windows = gw.getWindowsWithTitle(file_name)
+                if not windows:
+                    windows = gw.getWindowsWithTitle(file_name_without_ext)
+
+                if windows:
+                    # To be more specific, we can try to activate it
+                    windows[0].activate()
+                    window_found = True
+                    break
+            elif os_name == 'Linux':
+                try:
+                    # Using wmctrl to list windows and check if any window title contains the filename
+                    result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True)
+                    window_list = result.stdout.strip().split('\n')
+                    if not result.stdout.strip():
+                        pass  # No windows, just continue waiting
+                    else:
+                        for window in window_list:
+                            if file_name in window or file_name_without_ext in window:
+                                # a window is found, now activate it
+                                window_id = window.split()[0]
+                                subprocess.run(['wmctrl', '-i', '-a', window_id], check=True)
+                                window_found = True
+                                break
+                        if window_found:
+                            break
+                except (subprocess.CalledProcessError, FileNotFoundError):
+                    # wmctrl might not be installed or the window manager isn't ready.
+                    # We just log it once and let the main loop retry.
+                    if 'wmctrl_failed_once' not in locals():
+                        logger.warning("wmctrl command is not ready, will keep retrying...")
+                        wmctrl_failed_once = True
+                    pass  # Let the outer loop retry
+
+            time.sleep(1)
+
+        if window_found:
+            return "File opened and window activated successfully"
+        else:
+            return f"Failed to find window for {file_name} within {TIMEOUT} seconds.", 500
+
+    except Exception as e:
+        return f"Failed to open {path}. Error: {e}", 500
+
+
+@app.route("/setup/activate_window", methods=['POST'])
+def activate_window():
+    data = request.json
+    window_name = data.get('window_name', None)
+    if not window_name:
+        return "window_name required", 400
+    strict: bool = data.get("strict", False)  # compare case-sensitively and match the whole string
+    by_class_name: bool = data.get("by_class", False)
+
+    os_name = platform.system()
+
+    if os_name == 'Windows':
+        import pygetwindow as gw
+        if by_class_name:
+            return "Get window by class name is not supported on Windows currently.", 500
+        windows: List[gw.Window] = gw.getWindowsWithTitle(window_name)
+
+        window: Optional[gw.Window] = None
+        if len(windows) == 0:
+            return "Window {:} not found (empty results)".format(window_name), 404
+        elif strict:
+            for wnd in windows:
+                if wnd.title == wnd:
+                    window = wnd
+            if window is None:
+                return "Window {:} not found (strict mode).".format(window_name), 404
+        else:
+            window = windows[0]
+        window.activate()
+
+    elif os_name == 'Darwin':
+        import pygetwindow as gw
+        if by_class_name:
+            return "Get window by class name is not supported on macOS currently.", 500
+        # Find the VS Code window
+        windows = gw.getWindowsWithTitle(window_name)
+
+        window: Optional[gw.Window] = None
+        if len(windows) == 0:
+            return "Window {:} not found (empty results)".format(window_name), 404
+        elif strict:
+            for wnd in windows:
+                if wnd.title == wnd:
+                    window = wnd
+            if window is None:
+                return "Window {:} not found (strict mode).".format(window_name), 404
+        else:
+            window = windows[0]
+
+        # Un-minimize the window and then bring it to the front
+        window.unminimize()
+        window.activate()
+
+    elif os_name == 'Linux':
+        # Attempt to activate VS Code window using wmctrl
+        subprocess.run(["wmctrl"
+                           , "-{:}{:}a".format("x" if by_class_name else ""
+                                               , "F" if strict else ""
+                                               )
+                           , window_name
+                        ]
+                       )
+
+    else:
+        return f"Operating system {os_name} not supported.", 400
+
+    return "Window activated successfully", 200
+
+
+@app.route("/setup/close_window", methods=["POST"])
+def close_window():
+    data = request.json
+    if "window_name" not in data:
+        return "window_name required", 400
+    window_name: str = data["window_name"]
+    strict: bool = data.get("strict", False)  # compare case-sensitively and match the whole string
+    by_class_name: bool = data.get("by_class", False)
+
+    os_name: str = platform.system()
+    if os_name == "Windows":
+        import pygetwindow as gw
+
+        if by_class_name:
+            return "Get window by class name is not supported on Windows currently.", 500
+        windows: List[gw.Window] = gw.getWindowsWithTitle(window_name)
+
+        window: Optional[gw.Window] = None
+        if len(windows) == 0:
+            return "Window {:} not found (empty results)".format(window_name), 404
+        elif strict:
+            for wnd in windows:
+                if wnd.title == wnd:
+                    window = wnd
+            if window is None:
+                return "Window {:} not found (strict mode).".format(window_name), 404
+        else:
+            window = windows[0]
+        window.close()
+    elif os_name == "Linux":
+        subprocess.run(["wmctrl"
+                           , "-{:}{:}c".format("x" if by_class_name else ""
+                                               , "F" if strict else ""
+                                               )
+                           , window_name
+                        ]
+                       )
+    elif os_name == "Darwin":
+        import pygetwindow as gw
+        return "Currently not supported on macOS.", 500
+    else:
+        return "Not supported platform {:}".format(os_name), 500
+
+    return "Window closed successfully.", 200
+
+
+@app.route('/start_recording', methods=['POST'])
+def start_recording():
+    global recording_process
+    if recording_process and recording_process.poll() is None:
+        return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400
+
+    # Clean up previous recording if it exists
+    if os.path.exists(recording_path):
+        try:
+            os.remove(recording_path)
+        except OSError as e:
+            logger.error(f"Error removing old recording file: {e}")
+            return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500
+
+    if platform_name == "Linux":
+        d = display.Display()
+        screen_width = d.screen().width_in_pixels
+        screen_height = d.screen().height_in_pixels
+        start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
+    elif platform_name == "Windows":
+        user32 = ctypes.windll.user32
+        screen_width = user32.GetSystemMetrics(0)
+        screen_height = user32.GetSystemMetrics(1)
+        # Use gdigrab for Windows screen capture
+        start_command = f"ffmpeg -y -f gdigrab -draw_mouse 1 -framerate 30 -video_size {screen_width}x{screen_height} -i desktop -c:v libx264 -r 30 {recording_path}"
+    else:
+        return jsonify({'status': 'error', 'message': f'Recording not supported on {platform_name}'}), 501
+
+    # Use stderr=PIPE to capture potential errors from ffmpeg
+    recording_process = subprocess.Popen(shlex.split(start_command),
+                                         stdout=subprocess.DEVNULL,
+                                         stderr=subprocess.PIPE,
+                                         text=True  # To get stderr as string
+                                         )
+
+    # Wait a couple of seconds to see if ffmpeg starts successfully
+    try:
+        # Wait for 2 seconds. If ffmpeg exits within this time, it's an error.
+        recording_process.wait(timeout=2)
+        # If wait() returns, it means the process has terminated.
+        error_output = recording_process.stderr.read()
+        return jsonify({
+            'status': 'error',
+            'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}'
+        }), 500
+    except subprocess.TimeoutExpired:
+        # This is the expected outcome: the process is still running after 2 seconds.
+        return jsonify({'status': 'success', 'message': 'Started recording successfully.'})
+
+
+@app.route('/end_recording', methods=['POST'])
+def end_recording():
+    global recording_process
+
+    if not recording_process or recording_process.poll() is not None:
+        recording_process = None  # Clean up stale process object
+        return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
+
+    error_output = ""
+    try:
+        # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
+        # On Windows, use CTRL_C_EVENT; on Unix, use SIGINT
+        if platform_name == "Windows":
+            # On Windows, we need to terminate the process gracefully
+            # ffmpeg responds to standard input 'q' to quit gracefully
+            try:
+                recording_process.stdin.write(b'q')
+                recording_process.stdin.flush()
+            except:
+                # If stdin is not available, use terminate
+                recording_process.terminate()
+        else:
+            recording_process.send_signal(signal.SIGINT)
+        # Wait for ffmpeg to terminate. communicate() gets output and waits.
+        _, error_output = recording_process.communicate(timeout=15)
+    except subprocess.TimeoutExpired:
+        logger.error("ffmpeg did not respond to stop signal, killing the process.")
+        recording_process.kill()
+        # After killing, communicate to get any remaining output.
+        _, error_output = recording_process.communicate()
+        recording_process = None
+        return jsonify({
+            'status': 'error',
+            'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
+        }), 500
+
+    recording_process = None  # Clear the process from global state
+
+    # Check if the recording file was created and is not empty.
+    if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
+        return send_file(recording_path, as_attachment=True)
+    else:
+        logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
+        return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
+
+
+@app.route("/run_python", methods=['POST'])
+def run_python():
+    data = request.json
+    code = data.get('code', None)
+
+    if not code:
+        return jsonify({'status': 'error', 'message': 'Code not supplied!'}), 400
+
+    # Create a temporary file to save the Python code
+    import tempfile
+    import uuid
+
+    # Generate unique filename
+    temp_filename = f"/tmp/python_exec_{uuid.uuid4().hex}.py"
+
+    try:
+        # Write code to temporary file
+        with open(temp_filename, 'w') as f:
+            f.write(code)
+
+        # Execute the file using subprocess to capture all output
+        # Use sys.executable to use the same Python interpreter as the Flask server
+        result = subprocess.run(
+            [sys.executable, temp_filename],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            timeout=30  # 30 second timeout
+        )
+
+        # Clean up the temporary file
+        try:
+            os.remove(temp_filename)
+        except:
+            pass  # Ignore cleanup errors
+
+        # Prepare response
+        output = result.stdout
+        error_output = result.stderr
+
+        # Combine output and errors if both exist
+        combined_message = output
+        if error_output:
+            combined_message += ('\n' + error_output) if output else error_output
+
+        # Determine status based on return code and errors
+        if result.returncode != 0:
+            status = 'error'
+            if not error_output:
+                # If no stderr but non-zero return code, add a generic error message
+                error_output = f"Process exited with code {result.returncode}"
+                combined_message = combined_message + '\n' + error_output if combined_message else error_output
+        else:
+            status = 'success'
+
+        return jsonify({
+            'status': status,
+            'message': combined_message,
+            'need_more': False,      # Not applicable for file execution
+            'output': output,        # stdout only
+            'error': error_output,   # stderr only
+            'return_code': result.returncode
+        })
+
+    except subprocess.TimeoutExpired:
+        # Clean up the temporary file on timeout
+        try:
+            os.remove(temp_filename)
+        except:
+            pass
+
+        return jsonify({
+            'status': 'error',
+            'message': 'Execution timeout: Code took too long to execute',
+            'error': 'TimeoutExpired',
+            'need_more': False,
+            'output': None,
+        }), 500
+
+    except Exception as e:
+        # Clean up the temporary file on error
+        try:
+            os.remove(temp_filename)
+        except:
+            pass
+
+        # Capture the exception details
+        return jsonify({
+            'status': 'error',
+            'message': f'Execution error: {str(e)}',
+            'error': traceback.format_exc(),
+            'need_more': False,
+            'output': None,
+        }), 500
+
+
+@app.route("/run_bash_script", methods=['POST'])
+def run_bash_script():
+    data = request.json
+    script = data.get('script', None)
+    timeout = data.get('timeout', 100)  # Default timeout of 30 seconds
+    working_dir = data.get('working_dir', None)
+
+    if not script:
+        return jsonify({
+            'status': 'error',
+            'output': 'Script not supplied!',
+            'error': "",  # Always empty as requested
+            'returncode': -1
+        }), 400
+
+    # Expand user directory if provided
+    if working_dir:
+        working_dir = os.path.expanduser(working_dir)
+        if not os.path.exists(working_dir):
+            return jsonify({
+                'status': 'error',
+                'output': f'Working directory does not exist: {working_dir}',
+                'error': "",  # Always empty as requested
+                'returncode': -1
+            }), 400
+
+    # Create a temporary script file
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.sh', delete=False) as tmp_file:
+        if "#!/bin/bash" not in script:
+            script = "#!/bin/bash\n\n" + script
+        tmp_file.write(script)
+        tmp_file_path = tmp_file.name
+
+    try:
+        # Make the script executable
+        os.chmod(tmp_file_path, 0o755)
+
+        # Execute the script
+        if platform_name == "Windows":
+            # On Windows, use Git Bash or WSL if available, otherwise cmd
+            flags = subprocess.CREATE_NO_WINDOW
+            # Try to use bash if available (Git Bash, WSL, etc.)
+            result = subprocess.run(
+                ['bash', tmp_file_path],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,  # Merge stderr into stdout
+                text=True,
+                timeout=timeout,
+                cwd=working_dir,
+                creationflags=flags,
+                shell=False
+            )
+        else:
+            # On Unix-like systems, use bash directly
+            flags = 0
+            result = subprocess.run(
+                ['/bin/bash', tmp_file_path],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,  # Merge stderr into stdout
+                text=True,
+                timeout=timeout,
+                cwd=working_dir,
+                creationflags=flags,
+                shell=False
+            )
+
+        # Log the command execution for trajectory recording
+        _append_event("BashScript",
+                      {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode},
+                      ts=time.time())
+
+        return jsonify({
+            'status': 'success' if result.returncode == 0 else 'error',
+            'output': result.stdout,  # Contains both stdout and stderr merged
+            'error': "",  # Always empty as requested
+            'returncode': result.returncode
+        })
+
+    except subprocess.TimeoutExpired:
+        return jsonify({
+            'status': 'error',
+            'output': f'Script execution timed out after {timeout} seconds',
+            'error': "",  # Always empty as requested
+            'returncode': -1
+        }), 500
+    except FileNotFoundError:
+        # Bash not found, try with sh
+        try:
+            result = subprocess.run(
+                ['sh', tmp_file_path],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,  # Merge stderr into stdout
+                text=True,
+                timeout=timeout,
+                cwd=working_dir,
+                shell=False
+            )
+
+            _append_event("BashScript",
+                          {"script": script, "output": result.stdout, "error": "", "returncode": result.returncode},
+                          ts=time.time())
+
+            return jsonify({
+                'status': 'success' if result.returncode == 0 else 'error',
+                'output': result.stdout,  # Contains both stdout and stderr merged
+                'error': "",  # Always empty as requested
+                'returncode': result.returncode,
+            })
+        except Exception as e:
+            return jsonify({
+                'status': 'error',
+                'output': f'Failed to execute script: {str(e)}',
+                'error': "",  # Always empty as requested
+                'returncode': -1
+            }), 500
+    except Exception as e:
+        return jsonify({
+            'status': 'error',
+            'output': f'Failed to execute script: {str(e)}',
+            'error': "",  # Always empty as requested
+            'returncode': -1
+        }), 500
+    finally:
+        # Clean up the temporary file
+        try:
+            os.unlink(tmp_file_path)
+        except:
+            pass
+
+if __name__ == '__main__':
+    app.run(debug=True, host="0.0.0.0")
diff --git a/desktop_env/server_win7/win7_offline_packages/numpy-1.24.4-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/numpy-1.24.4-cp38-cp38-win32.whl
new file mode 100644
index 0000000..06c0890
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/numpy-1.24.4-cp38-cp38-win32.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pillow-10.4.0-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/pillow-10.4.0-cp38-cp38-win32.whl
new file mode 100644
index 0000000..2415455
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pillow-10.4.0-cp38-cp38-win32.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pygame-2.6.1-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/pygame-2.6.1-cp38-cp38-win32.whl
new file mode 100644
index 0000000..df0f24b
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pygame-2.6.1-cp38-cp38-win32.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pymsgbox-2.0.1-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/pymsgbox-2.0.1-py3-none-any.whl
new file mode 100644
index 0000000..c938c0b
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pymsgbox-2.0.1-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pynput-1.7.6-py2.py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/pynput-1.7.6-py2.py3-none-any.whl
new file mode 100644
index 0000000..1375d49
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pynput-1.7.6-py2.py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pyscreeze-1.0.1.tar.gz b/desktop_env/server_win7/win7_offline_packages/pyscreeze-1.0.1.tar.gz
new file mode 100644
index 0000000..1def580
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pyscreeze-1.0.1.tar.gz differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pytweening-1.2.0.tar.gz b/desktop_env/server_win7/win7_offline_packages/pytweening-1.2.0.tar.gz
new file mode 100644
index 0000000..a1c0349
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pytweening-1.2.0.tar.gz differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pywin32-311-cp38-cp38-win32.whl b/desktop_env/server_win7/win7_offline_packages/pywin32-311-cp38-cp38-win32.whl
new file mode 100644
index 0000000..c03f7ea
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pywin32-311-cp38-cp38-win32.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/pywinauto-0.6.9-py2.py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/pywinauto-0.6.9-py2.py3-none-any.whl
new file mode 100644
index 0000000..042276c
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/pywinauto-0.6.9-py2.py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/requests-2.32.4-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/requests-2.32.4-py3-none-any.whl
new file mode 100644
index 0000000..d52fad0
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/requests-2.32.4-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/requirements_win7.txt b/desktop_env/server_win7/win7_offline_packages/requirements_win7.txt
new file mode 100644
index 0000000..ad69323
--- /dev/null
+++ b/desktop_env/server_win7/win7_offline_packages/requirements_win7.txt
@@ -0,0 +1,44 @@
+# Win7 32位 Python3.8 离线安装清单
+# 安装命令: pip install --no-index --find-links . -r requirements_win7.txt
+
+# 底层依赖先装
+six
+zipp
+importlib_metadata
+certifi
+idna
+urllib3
+charset_normalizer
+requests
+
+# Flask 依赖链
+MarkupSafe
+itsdangerous
+click
+blinker
+werkzeug
+jinja2
+flask
+
+# Win32 底层
+pywin32
+
+# 数据处理
+numpy
+Pillow
+lxml
+pygame
+
+# GUI 控制
+pymsgbox
+pytweening
+pyscreeze
+PyGetWindow
+MouseInfo
+PyAutoGUI
+
+# 输入控制
+pynput
+
+# Win 自动化
+pywinauto
diff --git a/desktop_env/server_win7/win7_offline_packages/setuptools-75.3.4-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/setuptools-75.3.4-py3-none-any.whl
new file mode 100644
index 0000000..4cacd34
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/setuptools-75.3.4-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/six-1.17.0-py2.py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/six-1.17.0-py2.py3-none-any.whl
new file mode 100644
index 0000000..c506fd0
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/six-1.17.0-py2.py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/urllib3-2.2.3-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/urllib3-2.2.3-py3-none-any.whl
new file mode 100644
index 0000000..3870568
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/urllib3-2.2.3-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/werkzeug-3.0.6-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/werkzeug-3.0.6-py3-none-any.whl
new file mode 100644
index 0000000..15b739b
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/werkzeug-3.0.6-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/wheel-0.45.1-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/wheel-0.45.1-py3-none-any.whl
new file mode 100644
index 0000000..589308a
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/wheel-0.45.1-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_offline_packages/zipp-3.20.2-py3-none-any.whl b/desktop_env/server_win7/win7_offline_packages/zipp-3.20.2-py3-none-any.whl
new file mode 100644
index 0000000..d506da9
Binary files /dev/null and b/desktop_env/server_win7/win7_offline_packages/zipp-3.20.2-py3-none-any.whl differ
diff --git a/desktop_env/server_win7/win7_server_packages.zip b/desktop_env/server_win7/win7_server_packages.zip
new file mode 100644
index 0000000..31aafe3
Binary files /dev/null and b/desktop_env/server_win7/win7_server_packages.zip differ
diff --git a/evaluation_examples/examples/flsol/flsol_task1.json b/evaluation_examples/examples/flsol/flsol_task1.json
new file mode 100644
index 0000000..b24d716
--- /dev/null
+++ b/evaluation_examples/examples/flsol/flsol_task1.json
@@ -0,0 +1,40 @@
+{
+  "id": "flsol_task1",
+  "snapshot": "flsol",
+  "instruction": "启动 FL Solutions for F-4600 软件，并截图确认主界面已成功打开。",
+  "source": "custom",
+  "config": [
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 2
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "flsol"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 4
+        }
+      }
+    ],
+    "func": "vllm_eval",
+    "expected": {
+      "description": "FL Solutions for F-4600 主界面应已打开，可见菜单栏（File, Method, Measure 等）和仪器连接状态面板。"
+    }
+  },
+  "proxy": false,
+  "fixed_ip": true,
+  "possibility_of_env_change": "low",
+  "metadata": {
+    "input_files": [],
+    "steps": "1. 双击桌面上的 FL Solutions for F-4600 快捷方式，或通过开始菜单找到并打开 FL Solutions 程序（路径：C:\\Program Files\\FL Solutions\\flsol.exe）。\n2. 等待程序加载完成（约 5-10 秒），观察主界面是否出现，包含顶部菜单栏（File、Method、Measure、View、Tools、Help 等选项）以及仪器状态面板。\n3. 确认主界面已打开后，操作完成。",
+    "steps_original": "1. 打开 FL Solutions for F-4600 软件。\n2. 等待主界面加载完毕。"
+  }
+}
diff --git a/evaluation_examples/examples/flsol/flsol_task2.json b/evaluation_examples/examples/flsol/flsol_task2.json
new file mode 100644
index 0000000..1b4d427
--- /dev/null
+++ b/evaluation_examples/examples/flsol/flsol_task2.json
@@ -0,0 +1,48 @@
+{
+  "id": "flsol_task2",
+  "snapshot": "flsol",
+  "instruction": "打开 FL Solutions for F-4600，新建一个波长扫描（Wavelength Scan）方法，将激发波长（Excitation Wavelength）设置为 350 nm，扫描范围设为 400–700 nm，扫描速度设为 240 nm/min，然后保存该方法为 test_scan.mth。",
+  "source": "custom",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "C:\\Program Files\\FL Solutions\\flsol.exe"
+        ]
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 8
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "flsol"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 3
+        }
+      }
+    ],
+    "func": "vllm_eval",
+    "expected": {
+      "description": "屏幕上应显示 FL Solutions 主界面，并且在 Method 或文件相关区域可以看到名为 test_scan.mth 的方法文件已被保存/创建，或在最近文件列表中可以看到该文件名。激发波长应设为 350 nm，扫描范围为 400-700 nm。"
+    }
+  },
+  "proxy": false,
+  "fixed_ip": true,
+  "possibility_of_env_change": "medium",
+  "metadata": {
+    "input_files": [],
+    "steps": "1. 等待 FL Solutions 主界面打开。\n2. 在顶部菜单栏中，单击 'Method'（方法）菜单，然后选择 'New'（新建）或 'Wavelength Scan'（波长扫描）选项，创建一个新的波长扫描方法。\n3. 在弹出的方法设置对话框中，找到 'Excitation Wavelength'（激发波长）输入框，将数值清空后输入 '350'，单位为 nm。\n4. 找到扫描范围（Scan Range）设置区域，将起始波长（Start）设为 '400' nm，将结束波长（End）设为 '700' nm。\n5. 找到扫描速度（Scan Speed）下拉菜单或输入框，选择或输入 '240' nm/min。\n6. 确认设置无误后，单击菜单 'Method' → 'Save As'（另存为），在弹出的文件保存对话框中，将文件名输入为 'test_scan'，确保文件类型为 .mth，然后单击 '保存'（Save）按钮。\n7. 确认文件已成功保存。",
+    "steps_original": "1. 打开 FL Solutions 并新建 Wavelength Scan 方法。\n2. 设置 Ex 波长 350 nm，扫描范围 400-700 nm，速度 240 nm/min。\n3. 另存为 test_scan.mth。"
+  }
+}
diff --git a/evaluation_examples/examples/flsol/flsol_task3.json b/evaluation_examples/examples/flsol/flsol_task3.json
new file mode 100644
index 0000000..1423aa7
--- /dev/null
+++ b/evaluation_examples/examples/flsol/flsol_task3.json
@@ -0,0 +1,48 @@
+{
+  "id": "flsol_task3",
+  "snapshot": "flsol",
+  "instruction": "打开 FL Solutions for F-4600，进入仪器参数设置（Instrument Parameters），将光电倍增管电压（PMT Voltage）修改为 700 V，狭缝宽度（Slit Width，激发和发射均设置）修改为 5.0 nm，响应时间（Response）设置为 Auto，然后截图确认参数设置完成。",
+  "source": "custom",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": [
+          "C:\\Program Files\\FL Solutions\\flsol.exe"
+        ]
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 8
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "flsol"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 3
+        }
+      }
+    ],
+    "func": "vllm_eval",
+    "expected": {
+      "description": "FL Solutions 的仪器参数设置界面（Instrument Parameters 或 Method 设置窗口）应可见，其中 PMT Voltage 显示为 700 V，激发和发射狭缝宽度（Ex Slit / Em Slit）均显示为 5.0 nm，响应时间（Response）显示为 Auto。"
+    }
+  },
+  "proxy": false,
+  "fixed_ip": true,
+  "possibility_of_env_change": "medium",
+  "metadata": {
+    "input_files": [],
+    "steps": "1. 等待 FL Solutions 主界面打开。\n2. 在顶部菜单栏中，单击 'Method'（方法）菜单，选择 'New'（新建）创建一个新方法，或在现有方法的参数面板中操作。\n3. 在方法设置界面中，找到仪器参数（Instrument Parameters）区域（通常在方法编辑窗口的下方或单独的选项卡中）。\n4. 找到 'PMT Voltage'（光电倍增管电压）输入框，清空当前值并输入 '700'，单位为 V。\n5. 找到激发狭缝宽度（EX Slit Width 或 Excitation Slit）输入框或下拉菜单，将值修改为 '5.0'，单位为 nm。\n6. 找到发射狭缝宽度（EM Slit Width 或 Emission Slit）输入框或下拉菜单，将值修改为 '5.0'，单位为 nm。\n7. 找到响应时间（Response）下拉菜单，单击展开，选择 'Auto'（自动）选项。\n8. 确认所有参数设置完成后，截图当前界面。",
+    "steps_original": "1. 打开 FL Solutions，进入方法/仪器参数设置。\n2. 设置 PMT 电压 700V，Ex/Em 狭缝宽度各 5.0 nm，Response 设为 Auto。"
+  }
+}
diff --git a/evaluation_examples/examples/flsol/flsol_task4_measure.json b/evaluation_examples/examples/flsol/flsol_task4_measure.json
new file mode 100644
index 0000000..4bb5de2
--- /dev/null
+++ b/evaluation_examples/examples/flsol/flsol_task4_measure.json
@@ -0,0 +1,44 @@
+{
+  "id": "flsol_task4_measure",
+  "snapshot": "flsol",
+  "instruction": "使用 FL Solutions for F-4600 软件执行一次荧光测量。",
+  "source": "custom",
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": ["C:\\Program Files\\FL Solutions\\flsol.exe"],
+        "shell": false
+      }
+    },
+    {
+      "type": "sleep",
+      "parameters": {
+        "seconds": 15
+      }
+    }
+  ],
+  "trajectory": "trajectories/",
+  "related_apps": [
+    "flsol"
+  ],
+  "evaluator": {
+    "postconfig": [
+      {
+        "type": "sleep",
+        "parameters": {
+          "seconds": 5
+        }
+      }
+    ],
+    "func": "vllm_eval"
+  },
+  "proxy": false,
+  "fixed_ip": true,
+  "possibility_of_env_change": "low",
+  "metadata": {
+    "input_files": [],
+    "steps": "1. 软件已自动打开，等待 FL Solutions 主界面完全加载，确认标题栏显示 'FL Solutions - F-4600 FL Spectrophotometer on USB' 且仪器状态正常（无报错弹窗）。\n2. 直接按键盘 F4 键触发 Measure（这是最可靠的方式，F4 是 Measure 的快捷键）。\n3. 如果 F4 无反应，则通过菜单执行：点击菜单栏 'Spectrophotometer'（第四个菜单项，位于 View 和 Tools 之间），在下拉菜单中点击 'Measure'（快捷键 F4）。\n4. 等待测量过程完成，软件会在图表区域实时绘制扫描曲线，测量完成后曲线绘制停止。\n5. 确认图表区域有测量结果曲线后，操作完成。",
+    "steps_original": "1. 等待 FL Solutions 和 F-4600 仪器完全初始化，状态变为 Ready。\n2. 点击 Measure 按钮执行测量。\n3. 等待测量完成，图表区域出现曲线。"
+  }
+}
diff --git a/evaluation_examples/test_flsol.json b/evaluation_examples/test_flsol.json
new file mode 100644
index 0000000..467c9cc
--- /dev/null
+++ b/evaluation_examples/test_flsol.json
@@ -0,0 +1,5 @@
+{
+  "flsol": [
+    "flsol_task4_measure"
+  ]
+}
diff --git a/run_flsol_win7.sh b/run_flsol_win7.sh
new file mode 100755
index 0000000..861d0d3
--- /dev/null
+++ b/run_flsol_win7.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# =============================================================================
+# FL Solutions for F-4600 评测脚本（Win7 网线直连版）
+# provider: direct —— 直接访问 Flask 服务，无需任何 VM/SSH
+# =============================================================================
+
+# ---------- Win7 直连 IP ----------
+export DIRECT_VM_IP="192.168.1.11"   # Win7 实机 Flask 地址
+
+# ---------- LLM API 配置 ----------
+export OPENAI_API_KEY="sk-EQGuvk0rS7EG4Cu22cF6D5Cc3a324c88B2E2D432Bc59Bb17"
+export OPENAI_BASE_URL="https://vip.apiyi.com/v1"
+
+# ---------- 评测参数（对齐 run_proxmox.sh）----------
+MODEL="gpt-5.4"
+EVAL_MODEL="gemini-3.1-pro-preview"
+MAX_STEPS=50
+SLEEP_AFTER_EXEC=3
+TEMPERATURE=0
+TOP_P=0.9
+MAX_TOKENS=16384
+MAX_TRAJECTORY_LENGTH=3
+OBSERVATION_TYPE="screenshot_a11y_tree"
+ACTION_SPACE="pyautogui"
+SCREEN_WIDTH=1280
+SCREEN_HEIGHT=1024
+RESULT_DIR="/Users/lizhanyuan/Downloads/results2/flsol"
+TEST_META="evaluation_examples/test_flsol.json"
+DOMAIN="flsol"
+INJECT_STEPS=true
+
+# ---------- 预检查 ----------
+cd "$(dirname "$0")"
+
+echo "=== FL Solutions F-4600 评测预检查 ==="
+echo ""
+
+echo -n "Flask Server (${DIRECT_VM_IP}:5000)... "
+HTTP_CODE=$(curl -s --connect-timeout 5 "http://${DIRECT_VM_IP}:5000/screenshot" \
+    -o /dev/null -w "%{http_code}" 2>/dev/null)
+if [ "$HTTP_CODE" = "200" ]; then
+    echo "OK"
+else
+    echo "FAIL (HTTP ${HTTP_CODE})"
+    echo "[ERROR] Win7 Flask Server 不可达，请先在 Win7 运行: python D:\python_server\main.py"
+    exit 1
+fi
+
+mkdir -p "${RESULT_DIR}" logs
+
+echo ""
+echo "=== 开始评测 ==="
+echo "  Provider:   direct (无 VM 管理，直连 Flask)"
+echo "  Win7 IP:    ${DIRECT_VM_IP}"
+echo "  Model:      ${MODEL}"
+echo "  Eval:       ${EVAL_MODEL}"
+echo "  Task:       flsol_task4_measure"
+echo "  Obs Type:   ${OBSERVATION_TYPE}  (screenshot only, Win7 a11y unstable)"
+echo "  Max Steps:  ${MAX_STEPS}"
+echo "  Max Tokens: ${MAX_TOKENS}"
+echo "  Results:    ${RESULT_DIR}"
+echo ""
+
+if [ "${INJECT_STEPS}" = true ]; then
+    INJECT_FLAG="--inject_steps"
+else
+    INJECT_FLAG="--no_inject_steps"
+fi
+
+python3 run.py \
+    --provider_name "direct" \
+    --path_to_vm "ignored" \
+    --observation_type "${OBSERVATION_TYPE}" \
+    --action_space "${ACTION_SPACE}" \
+    --model "${MODEL}" \
+    --eval_model "${EVAL_MODEL}" \
+    --temperature "${TEMPERATURE}" \
+    --top_p "${TOP_P}" \
+    --max_tokens "${MAX_TOKENS}" \
+    --max_trajectory_length "${MAX_TRAJECTORY_LENGTH}" \
+    --screen_width "${SCREEN_WIDTH}" \
+    --screen_height "${SCREEN_HEIGHT}" \
+    --sleep_after_execution "${SLEEP_AFTER_EXEC}" \
+    --max_steps "${MAX_STEPS}" \
+    --result_dir "${RESULT_DIR}" \
+    --test_all_meta_path "${TEST_META}" \
+    --domain "${DOMAIN}" \
+    ${INJECT_FLAG}
+
+echo ""
+echo "=== 评测完成 ==="
+echo "结果保存在: ${RESULT_DIR}"