fix: address https://github.com/xlang-ai/OSWorld/issues/257 by implement fix for PyAutoGUI '<' character bug in command execution. Introduced a new function to handle typewrite and press calls, ensuring correct behavior when using '<' in commands. Updated command execution logic to apply this fix before executing user commands.

This commit is contained in:
yuanmengqi
2025-07-15 04:17:34 +00:00
parent 698483390a
commit 68a9f647f4

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import logging
import os
import time
import re
from typing import Callable, Any, Optional, Tuple
from typing import List, Dict, Union
@@ -22,6 +23,88 @@ MAX_RETRIES = 5 # Maximum retries for environment setup
def _fix_pyautogui_less_than_bug(command: str) -> str:
"""
Fix PyAutoGUI '<' character bug by converting it to hotkey("shift", ',') calls.
This fixes the known PyAutoGUI issue where typing '<' produces '>' instead.
References:
- https://github.com/asweigart/pyautogui/issues/198
- https://github.com/xlang-ai/OSWorld/issues/257
Args:
command (str): The original pyautogui command
Returns:
str: The fixed command with '<' characters handled properly
"""
# Handle typewrite with '<' characters
def replace_typewrite_less_than(match):
content = match.group(1)
# Split the content by '<' and rebuild with hotkey calls
parts = content.split('<')
if len(parts) == 1:
# No '<' found, return original
return match.group(0)
# Rebuild the command
result_parts = []
for i, part in enumerate(parts):
if i == 0:
# First part, just add typewrite if not empty
if part:
result_parts.append(f"pyautogui.typewrite({repr(part)})")
else:
# Add hotkey for '<' and then typewrite for the rest if not empty
result_parts.append('pyautogui.hotkey("shift", ",")')
if part:
result_parts.append(f"pyautogui.typewrite({repr(part)})")
return '; '.join(result_parts)
# Handle press('<') calls
def replace_press_less_than(match):
return 'pyautogui.hotkey("shift", ",")'
# Pattern to match typewrite calls with quoted strings
typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'
# Pattern to match press('<') calls
press_pattern = r'pyautogui\.press\(["\']<["\']\)'
# First handle press('<') calls
command = re.sub(press_pattern, replace_press_less_than, command)
# Then handle typewrite calls
def process_typewrite_match(match):
quote_char = match.group(1)
content = match.group(2)
# Check if content contains '<'
if '<' not in content:
return match.group(0)
# Split by '<' and rebuild
parts = content.split('<')
result_parts = []
for i, part in enumerate(parts):
if i == 0:
# First part
if part:
result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
else:
# Add hotkey for '<' and then typewrite for the rest
result_parts.append('pyautogui.hotkey("shift", ",")')
if part:
result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
return '; '.join(result_parts)
command = re.sub(typewrite_pattern, process_typewrite_match, command)
return command
class DesktopEnv(gym.Env):
"""
DesktopEnv with OpenAI Gym interface. It provides a desktop environment for setting and evaluating desktop automation tasks.
@@ -341,9 +424,13 @@ class DesktopEnv(gym.Env):
else:
# the set of all possible python commands insides `pyautogui`
if type(action) == str:
self.controller.execute_python_command(action)
# Fix PyAutoGUI '<' character bug before execution
fixed_command = _fix_pyautogui_less_than_bug(action)
self.controller.execute_python_command(fixed_command)
elif type(action) == dict:
self.controller.execute_python_command(action['command'])
# Fix PyAutoGUI '<' character bug before execution
fixed_command = _fix_pyautogui_less_than_bug(action['command'])
self.controller.execute_python_command(fixed_command)
time.sleep(pause)
observation = self._get_obs()