fix: address https://github.com/xlang-ai/OSWorld/issues/257 by implement fix for PyAutoGUI '<' character bug in command execution. Introduced a new function to handle typewrite and press calls, ensuring correct behavior when using '<' in commands. Updated command execution logic to apply this fix before executing user commands.

This commit is contained in:
yuanmengqi
2025-07-15 04:17:34 +00:00
parent 698483390a
commit 68a9f647f4

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import logging import logging
import os import os
import time import time
import re
from typing import Callable, Any, Optional, Tuple from typing import Callable, Any, Optional, Tuple
from typing import List, Dict, Union from typing import List, Dict, Union
@@ -22,6 +23,88 @@ MAX_RETRIES = 5 # Maximum retries for environment setup
def _fix_pyautogui_less_than_bug(command: str) -> str:
"""
Fix PyAutoGUI '<' character bug by converting it to hotkey("shift", ',') calls.
This fixes the known PyAutoGUI issue where typing '<' produces '>' instead.
References:
- https://github.com/asweigart/pyautogui/issues/198
- https://github.com/xlang-ai/OSWorld/issues/257
Args:
command (str): The original pyautogui command
Returns:
str: The fixed command with '<' characters handled properly
"""
# Handle typewrite with '<' characters
def replace_typewrite_less_than(match):
content = match.group(1)
# Split the content by '<' and rebuild with hotkey calls
parts = content.split('<')
if len(parts) == 1:
# No '<' found, return original
return match.group(0)
# Rebuild the command
result_parts = []
for i, part in enumerate(parts):
if i == 0:
# First part, just add typewrite if not empty
if part:
result_parts.append(f"pyautogui.typewrite({repr(part)})")
else:
# Add hotkey for '<' and then typewrite for the rest if not empty
result_parts.append('pyautogui.hotkey("shift", ",")')
if part:
result_parts.append(f"pyautogui.typewrite({repr(part)})")
return '; '.join(result_parts)
# Handle press('<') calls
def replace_press_less_than(match):
return 'pyautogui.hotkey("shift", ",")'
# Pattern to match typewrite calls with quoted strings
typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'
# Pattern to match press('<') calls
press_pattern = r'pyautogui\.press\(["\']<["\']\)'
# First handle press('<') calls
command = re.sub(press_pattern, replace_press_less_than, command)
# Then handle typewrite calls
def process_typewrite_match(match):
quote_char = match.group(1)
content = match.group(2)
# Check if content contains '<'
if '<' not in content:
return match.group(0)
# Split by '<' and rebuild
parts = content.split('<')
result_parts = []
for i, part in enumerate(parts):
if i == 0:
# First part
if part:
result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
else:
# Add hotkey for '<' and then typewrite for the rest
result_parts.append('pyautogui.hotkey("shift", ",")')
if part:
result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
return '; '.join(result_parts)
command = re.sub(typewrite_pattern, process_typewrite_match, command)
return command
class DesktopEnv(gym.Env): class DesktopEnv(gym.Env):
""" """
DesktopEnv with OpenAI Gym interface. It provides a desktop environment for setting and evaluating desktop automation tasks. DesktopEnv with OpenAI Gym interface. It provides a desktop environment for setting and evaluating desktop automation tasks.
@@ -341,9 +424,13 @@ class DesktopEnv(gym.Env):
else: else:
# the set of all possible python commands insides `pyautogui` # the set of all possible python commands insides `pyautogui`
if type(action) == str: if type(action) == str:
self.controller.execute_python_command(action) # Fix PyAutoGUI '<' character bug before execution
fixed_command = _fix_pyautogui_less_than_bug(action)
self.controller.execute_python_command(fixed_command)
elif type(action) == dict: elif type(action) == dict:
self.controller.execute_python_command(action['command']) # Fix PyAutoGUI '<' character bug before execution
fixed_command = _fix_pyautogui_less_than_bug(action['command'])
self.controller.execute_python_command(fixed_command)
time.sleep(pause) time.sleep(pause)
observation = self._get_obs() observation = self._get_obs()