Merge remote-tracking branch 'upstream/main'

This commit is contained in:
yuanmengqi
2025-06-30 08:20:45 +00:00
21 changed files with 1020 additions and 216 deletions

View File

@@ -36,6 +36,8 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__))
init_proxy_pool(PROXY_CONFIG_FILE) # initialize the global proxy pool init_proxy_pool(PROXY_CONFIG_FILE) # initialize the global proxy pool
MAX_RETRIES = 20
class SetupController: class SetupController:
def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"): def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"):
self.vm_ip: str = vm_ip self.vm_ip: str = vm_ip
@@ -64,20 +66,20 @@ class SetupController:
# make sure connection can be established # make sure connection can be established
logger.info(f"try to connect {self.http_server}") logger.info(f"try to connect {self.http_server}")
retry = 0 retry = 0
while retry < 50: while retry < MAX_RETRIES:
try: try:
_ = requests.get(self.http_server + "/terminal") _ = requests.get(self.http_server + "/terminal")
break break
except: except:
time.sleep(5) time.sleep(5)
retry += 1 retry += 1
logger.info(f"retry: {retry}/50") logger.info(f"retry: {retry}/{MAX_RETRIES}")
if retry == 50: if retry == MAX_RETRIES:
return False return False
for cfg in config: for i, cfg in enumerate(config):
config_type: str = cfg["type"] config_type: str = cfg["type"]
parameters: Dict[str, Any] = cfg["parameters"] parameters: Dict[str, Any] = cfg["parameters"]
@@ -85,10 +87,17 @@ class SetupController:
# protocol # protocol
setup_function: str = "_{:}_setup".format(config_type) setup_function: str = "_{:}_setup".format(config_type)
assert hasattr(self, setup_function), f'Setup controller cannot find init function {setup_function}' assert hasattr(self, setup_function), f'Setup controller cannot find init function {setup_function}'
logger.info(f"call function {setup_function}")
getattr(self, setup_function)(**parameters) try:
logger.info(f"Executing setup step {i+1}/{len(config)}: {setup_function}")
logger.info("SETUP: %s(%s)", setup_function, str(parameters)) logger.debug(f"Setup parameters: {parameters}")
getattr(self, setup_function)(**parameters)
logger.info(f"SETUP COMPLETED: {setup_function}({str(parameters)})")
except Exception as e:
logger.error(f"SETUP FAILED at step {i+1}/{len(config)}: {setup_function}({str(parameters)})")
logger.error(f"Error details: {e}")
logger.error(f"Traceback: {traceback.format_exc()}")
raise Exception(f"Setup step {i+1} failed: {setup_function} - {e}") from e
return True return True
@@ -111,25 +120,41 @@ class SetupController:
raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).") raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).")
if not os.path.exists(cache_path): if not os.path.exists(cache_path):
logger.info(f"Cache file not found, downloading from {url} to {cache_path}")
max_retries = 3 max_retries = 3
downloaded = False downloaded = False
e = None e = None
for i in range(max_retries): for i in range(max_retries):
try: try:
response = requests.get(url, stream=True) logger.info(f"Download attempt {i+1}/{max_retries} for {url}")
response = requests.get(url, stream=True, timeout=300) # Add 5 minute timeout
response.raise_for_status() response.raise_for_status()
# Get file size if available
total_size = int(response.headers.get('content-length', 0))
if total_size > 0:
logger.info(f"File size: {total_size / (1024*1024):.2f} MB")
downloaded_size = 0
with open(cache_path, 'wb') as f: with open(cache_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
logger.info("File downloaded successfully") downloaded_size += len(chunk)
if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB
progress = (downloaded_size / total_size) * 100
logger.info(f"Download progress: {progress:.1f}%")
logger.info(f"File downloaded successfully to {cache_path} ({downloaded_size / (1024*1024):.2f} MB)")
downloaded = True downloaded = True
break break
except requests.RequestException as e: except requests.RequestException as e:
logger.error( logger.error(
f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)") f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
# Clean up partial download
if os.path.exists(cache_path):
os.remove(cache_path)
if not downloaded: if not downloaded:
raise requests.RequestException(f"Failed to download {url}. No retries left.") raise requests.RequestException(f"Failed to download {url}. No retries left.")
@@ -142,14 +167,18 @@ class SetupController:
# send request to server to upload file # send request to server to upload file
try: try:
logger.info(f"Uploading {os.path.basename(path)} to VM at {path}")
logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload") logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload")
response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form) response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form, timeout=600) # 10 minute timeout for upload
if response.status_code == 200: if response.status_code == 200:
logger.info("Command executed successfully: %s", response.text) logger.info(f"File uploaded successfully: {path}")
logger.debug("Upload response: %s", response.text)
else: else:
logger.error("Failed to upload file. Status code: %s", response.text) logger.error(f"Failed to upload file {path}. Status code: {response.status_code}, Response: {response.text}")
raise requests.RequestException(f"Upload failed with status {response.status_code}")
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e) logger.error(f"An error occurred while trying to upload {path}: {e}")
raise
def _upload_file_setup(self, files: List[Dict[str, str]]): def _upload_file_setup(self, files: List[Dict[str, str]]):
""" """
@@ -219,13 +248,14 @@ class SetupController:
# send request to server to open file # send request to server to open file
try: try:
response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload) # The server-side call is now blocking and can take time.
if response.status_code == 200: # We set a timeout that is slightly longer than the server's timeout (1800s).
logger.info("Command executed successfully: %s", response.text) response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810)
else: response.raise_for_status() # This will raise an exception for 4xx and 5xx status codes
logger.error("Failed to open file. Status code: %s", response.text) logger.info("Command executed successfully: %s", response.text)
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e) logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
def _launch_setup(self, command: Union[str, List[str]], shell: bool = False): def _launch_setup(self, command: Union[str, List[str]], shell: bool = False):
if not command: if not command:
@@ -302,6 +332,57 @@ class SetupController:
if not terminates: if not terminates:
time.sleep(0.3) time.sleep(0.3)
def _execute_with_verification_setup(
self,
command: List[str],
verification: Dict[str, Any] = None,
max_wait_time: int = 10,
check_interval: float = 1.0,
shell: bool = False
):
"""Execute command with verification of results
Args:
command: Command to execute
verification: Dict with verification criteria:
- window_exists: Check if window with this name exists
- command_success: Execute this command and check if it succeeds
max_wait_time: Maximum time to wait for verification
check_interval: Time between verification checks
shell: Whether to use shell
"""
if not command:
raise Exception("Empty command to launch.")
verification = verification or {}
payload = json.dumps({
"command": command,
"shell": shell,
"verification": verification,
"max_wait_time": max_wait_time,
"check_interval": check_interval
})
headers = {"Content-Type": "application/json"}
try:
response = requests.post(self.http_server + "/setup" + "/execute_with_verification",
headers=headers, data=payload, timeout=max_wait_time + 10)
if response.status_code == 200:
result = response.json()
logger.info("Command executed and verified successfully: %s -> %s"
, " ".join(command) if isinstance(command, list) else command
, response.text
)
return result
else:
logger.error("Failed to execute with verification. Status code: %s", response.text)
raise Exception(f"Command verification failed: {response.text}")
except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e)
traceback.print_exc()
raise Exception(f"Request failed: {e}")
def _command_setup(self, command: List[str], **kwargs): def _command_setup(self, command: List[str], **kwargs):
self._execute_setup(command, **kwargs) self._execute_setup(command, **kwargs)

View File

@@ -18,7 +18,7 @@ logger = logging.getLogger("desktopenv.env")
Metric = Callable[[Any, Any], float] Metric = Callable[[Any, Any], float]
Getter = Callable[[gym.Env, Dict[str, Any]], Any] Getter = Callable[[gym.Env, Dict[str, Any]], Any]
MAX_RETRIES = 5 MAX_RETRIES = 5 # Maximum retries for environment setup
class DesktopEnv(gym.Env): class DesktopEnv(gym.Env):
""" """
@@ -72,6 +72,16 @@ class DesktopEnv(gym.Env):
self.os_type = os_type self.os_type = os_type
# Track whether environment has been used (step/setup) to optimize snapshot revert
# docker, aws, gcp, azure are always unused as the emulator starts from a clean state
# vmware, virtualbox are always used as the emulator starts from a dirty state
if self.provider_name in {"docker", "aws", "gcp", "azure"}:
self.is_environment_used = False
elif self.provider_name in {"vmware", "virtualbox"}:
self.is_environment_used = True
else:
raise ValueError(f"Invalid provider name: {self.provider_name}")
# Initialize environment variables # Initialize environment variables
if path_to_vm: if path_to_vm:
self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \ self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \
@@ -190,11 +200,19 @@ class DesktopEnv(gym.Env):
logger.info("Using regular AWS provider.") logger.info("Using regular AWS provider.")
logger.info("Reverting to snapshot to {}...".format(self.snapshot_name)) # Only revert to snapshot if environment has been used (step/setup)
self._revert_to_snapshot() # This optimization is especially important for cloud providers like AWS
logger.info("Starting emulator...") # where unnecessary snapshot operations are costly and time-consuming
self._start_emulator() if self.is_environment_used:
logger.info("Emulator started.") logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
self._revert_to_snapshot()
logger.info("Starting emulator...")
self._start_emulator()
logger.info("Emulator started.")
# Reset the usage flag after reverting
self.is_environment_used = False
else:
logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))
if task_config is not None: if task_config is not None:
self._set_task_info(task_config) self._set_task_info(task_config)
@@ -202,6 +220,9 @@ class DesktopEnv(gym.Env):
logger.info("Setting up environment...") logger.info("Setting up environment...")
success = self.setup_controller.setup(self.config) success = self.setup_controller.setup(self.config)
if success: if success:
# Mark environment as used when setup is successfully executed
if self.config: # Only mark as used if there were actual setup operations
self.is_environment_used = True
break break
else: else:
logger.error( logger.error(
@@ -300,6 +321,9 @@ class DesktopEnv(gym.Env):
def step(self, action, pause=2): def step(self, action, pause=2):
self._step_no += 1 self._step_no += 1
self.action_history.append(action) self.action_history.append(action)
# Mark environment as used when step is called
self.is_environment_used = True
reward = 0 # todo: Define reward calculation for each example reward = 0 # todo: Define reward calculation for each example
done = False # todo: Define episode termination condition for each example done = False # todo: Define episode termination condition for each example
@@ -336,7 +360,11 @@ class DesktopEnv(gym.Env):
Evaluate whether the task is successfully completed. Evaluate whether the task is successfully completed.
""" """
self.setup_controller.setup(self.evaluator.get("postconfig", [])) postconfig = self.evaluator.get("postconfig", [])
self.setup_controller.setup(postconfig)
# Mark environment as used if there were postconfig setup operations
if postconfig:
self.is_environment_used = True
if self.evaluator['func'] == "infeasible": if self.evaluator['func'] == "infeasible":
if len(self.action_history) > 0 and self.action_history[-1] == "FAIL": if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":

View File

@@ -80,18 +80,16 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
returned. returned.
only support for single file now: only support for single file now:
time_suffix(bool): optional. defaults to False. if True, append the current time in required format. time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix. time_format(str): optional. defaults to "%Y%m%d_%H%M%S". format of the time suffix.
""" """
time_format = "%Y_%m_%d" time_format = "%Y%m%d_%H%M%S"
if not config.get("multi", False): if not config.get("multi", False):
paths: List[str] = [config["path"]] paths: List[str] = [config["path"]]
dests: List[str] = [config["dest"]] dests: List[str] = [config["dest"]]
if "time_suffix" in config.keys() and config["time_suffix"]: if config.get("time_suffix", False):
if "time_format" in config.keys(): time_format = config.get("time_format", time_format)
time_format = config["time_format"] # Insert time before file extension.
# Insert time before . in file type suffix dests = [f"{os.path.splitext(d)[0]}_{datetime.now().strftime(time_format)}{os.path.splitext(d)[1]}" for d in dests]
paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
else: else:
paths: List[str] = config["path"] paths: List[str] = config["path"]
dests: List[str] = config["dest"] dests: List[str] = config["dest"]

View File

@@ -53,7 +53,8 @@ from .docs import (
compare_docx_files_and_ignore_new_lines, compare_docx_files_and_ignore_new_lines,
compare_docx_images, compare_docx_images,
compare_image_text, compare_image_text,
compare_references compare_references,
compare_unique_train_records
) )
from .general import ( from .general import (
check_csv, check_csv,

View File

@@ -167,8 +167,12 @@ def compare_docx_files(file1, file2, **options):
if ignore_case: if ignore_case:
p1, p2 = p1.lower(), p2.lower() p1, p2 = p1.lower(), p2.lower()
if p1 != p2: if p1 != p2:
print(p1) # show the difference
print(p2) print("=== First Paragraph ===")
print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars
print("=== Second Paragraph ===")
print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars
print("=" * 50) # Clear boundary
return 0 return 0
return 1 return 1
@@ -886,3 +890,72 @@ def compare_references(file1, file2, **options):
return (result - reference_base_result) / (1 - reference_base_result) return (result - reference_base_result) / (1 - reference_base_result)
else: else:
return 0 return 0
def compare_unique_train_records(processed_file, expected_files, **kwargs):
"""
Compares the processed file with a list of expected files containing the
gold standard and the initial document.
expected_files[0] should be the gold standard file.
expected_files[1] should be the initial file.
"""
# Debug logging to understand what we're actually receiving
logger.info(f"DEBUG: processed_file type: {type(processed_file)}, value: {processed_file}")
logger.info(f"DEBUG: expected_files type: {type(expected_files)}, value: {expected_files}")
logger.info(f"DEBUG: kwargs: {kwargs}")
if not processed_file or not isinstance(expected_files, list) or len(expected_files) < 2:
logger.error("Invalid arguments: processed_file and a list of 2 expected_files are required.")
return 0
gold_file = expected_files[0]
initial_file = expected_files[1]
if not gold_file or not initial_file:
logger.error("Gold file or initial file path is missing from expected_files list.")
return 0
# Helper function to get lines and IDs from a file
def get_lines_and_ids_from_file(file_path):
try:
doc = Document(file_path)
lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
train_ids = [line.split(',')[1].strip() for line in lines if len(line.split(',')) == 4]
return lines, train_ids
except Exception as e:
logger.error(f"Error opening or parsing file {file_path}: {e}")
return None, None
# Get data from all three files
processed_lines, processed_train_ids = get_lines_and_ids_from_file(processed_file)
if processed_lines is None: return 0
gold_lines, gold_train_ids = get_lines_and_ids_from_file(gold_file)
if gold_lines is None: return 0
initial_lines, _ = get_lines_and_ids_from_file(initial_file)
if initial_lines is None: return 0
initial_lines_set = set(initial_lines)
# 1. Subset Check: Ensure every processed line was in the initial file
if not set(processed_lines).issubset(initial_lines_set):
logger.error("Processed file contains lines not present in the initial file.")
logger.error(f"Extra lines: {set(processed_lines) - initial_lines_set}")
return 0
# 2. Uniqueness Check: Check for duplicates within the processed file
if len(processed_train_ids) != len(set(processed_train_ids)):
logger.error("Duplicate train_ids found in the processed file.")
return 0
# 3. Correctness Check: Compare the set of train_ids
if set(processed_train_ids) != set(gold_train_ids):
logger.error("Set of train_ids does not match between processed file and gold file.")
return 0
# 4. Line count check
if len(processed_lines) != len(gold_lines):
logger.error("Number of lines does not match between processed file and gold file.")
return 0
return 1

View File

@@ -3,6 +3,7 @@ import os
import subprocess import subprocess
from typing import Dict from typing import Dict
from xml.etree import ElementTree from xml.etree import ElementTree
from urllib.parse import urlparse
import acoustid import acoustid
import cv2 import cv2
@@ -26,15 +27,108 @@ def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float:
tree = ElementTree.fromstring(actual_status) tree = ElementTree.fromstring(actual_status)
status = tree.find('state').text status = tree.find('state').text
logger.info(f"VLC Status: {status}")
if status == 'playing': if status == 'playing':
if rule['type'] == 'file_name': if rule['type'] == 'file_name':
file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text # Try multiple possible paths for file information in VLC XML
file_paths = [
'information/category[@name="meta"]/info[@name="filename"]',
'information/category[@name="meta"]/info[@name="title"]',
'information/category[@name="meta"]/info[@name="uri"]',
'information/category[@name="meta"]/info[@name="location"]',
'information/category[@name="meta"]/info[@name="name"]'
]
file_info = None
for path in file_paths:
element = tree.find(path)
if element is not None and element.text:
file_info = element.text
break
if file_info: if file_info:
return 1 if file_info.endswith(rule['file_name']) else 0 expected_filename = rule['file_name']
# Method 1: Direct filename match (most precise)
actual_basename = os.path.basename(file_info)
if actual_basename == expected_filename:
return 1
# Method 2: Endswith match (for backward compatibility)
if file_info.endswith(expected_filename):
return 1
# Method 3: For paths, check if expected filename is in the path
if expected_filename in file_info:
# Additional check to avoid false positives
# Make sure it's actually the filename, not just part of a path
if file_info.endswith('/' + expected_filename) or file_info.endswith('\\' + expected_filename):
return 1
logger.warning(f"File name mismatch - Expected: {expected_filename}, Found: {file_info}")
return 0
else:
logger.warning(f"Could not find file information in VLC status XML for rule: {rule}")
return 0
elif rule['type'] == 'url': elif rule['type'] == 'url':
file_info = tree.find('information/category[@name="meta"]/info[@name="url"]').text # Try multiple possible paths for URL information in VLC XML
url_paths = [
'information/category[@name="meta"]/info[@name="url"]',
'information/category[@name="meta"]/info[@name="URI"]',
'information/category[@name="meta"]/info[@name="location"]',
'information/category[@name="meta"]/info[@name="title"]', # Sometimes URL is in title for streams
'information/category[@name="meta"]/info[@name="filename"]', # Sometimes URL is in filename for streams
'information/category[@name="Stream 0"]/info[@name="Codec"]', # Try stream info
'information/category[@name="Stream 0"]/info[@name="Type"]',
'information/category[@name="Stream 0"]/info[@name="Language"]'
]
file_info = None
logger.debug(f"Looking for URL: {rule['url']}")
for path in url_paths:
element = tree.find(path)
if element is not None and element.text:
file_info = element.text
logger.debug(f"Found URL info at '{path}': {file_info}")
break
if file_info: if file_info:
return 1 if file_info.endswith(rule['url']) else 0 # For URL comparison, check if the rule URL is contained in the file_info
# This handles cases where VLC might show a longer or modified URL
expected_url = rule['url']
# Method 1: Direct URL match
if expected_url in file_info or file_info.endswith(expected_url):
return 1
# Method 2: For HLS streams, VLC often shows just the filename instead of full URL
# Check if the file_info matches the filename part of the expected URL
try:
expected_parsed = urlparse(expected_url)
expected_filename = os.path.basename(expected_parsed.path)
# If VLC shows just the filename (common for HLS streams)
if file_info == expected_filename:
logger.info(f"URL filename match - Expected URL: {expected_url}, VLC shows filename: {file_info}")
return 1
# Method 3: Check if both are URLs from the same domain and similar path
if '://' in file_info: # file_info is also a URL
actual_parsed = urlparse(file_info)
# Same domain and similar path structure
if (expected_parsed.netloc == actual_parsed.netloc and
expected_parsed.path in actual_parsed.path):
return 1
except Exception as e:
logger.debug(f"URL parsing error: {e}")
pass
logger.warning(f"URL mismatch - Expected: {expected_url}, Found: {file_info}")
return 0
else:
logger.warning(f"Could not find URL information in VLC status XML for rule: {rule}")
return 0
else: else:
logger.error(f"Unknown type: {rule['type']}") logger.error(f"Unknown type: {rule['type']}")
return 0 return 0
@@ -130,33 +224,67 @@ def compare_audios(audio_path_1, audio_path_2):
Compare two audio files and return a similarity score in the range [0, 1]. Compare two audio files and return a similarity score in the range [0, 1].
audio_path_1, audio_path_2: paths to the audio files to compare audio_path_1, audio_path_2: paths to the audio files to compare
""" """
# Example Usage:
# similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3') # similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3')
# print(f'Similarity Score: {similarity}') # print(f'Similarity Score: {similarity}')
# Convert to common format if necessary and load audio
if not audio_path_1 or not audio_path_2: if not audio_path_1 or not audio_path_2:
return 0 return 0
# Load the audio files and extract MFCC features y1, y2 = None, None
y1, sr1 = librosa.load(audio_path_1) try:
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1) y1, sr1 = librosa.load(audio_path_1)
except Exception:
logger.warning(f"Could not load audio from {os.path.basename(audio_path_1)}. It might be empty or corrupt.")
y2, sr2 = librosa.load(audio_path_2) try:
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2) y2, sr2 = librosa.load(audio_path_2)
except Exception:
logger.warning(f"Could not load audio from {os.path.basename(audio_path_2)}. It might be empty or corrupt.")
# Handle cases where one or both audio files are empty or corrupt.
is_y1_bad = (y1 is None) or (y1.shape[0] == 0)
is_y2_bad = (y2 is None) or (y2.shape[0] == 0)
if is_y1_bad and is_y2_bad:
logger.info("Both audio files are empty or corrupt. Considering them perfectly similar.")
return 1.0
if is_y1_bad or is_y2_bad:
logger.warning(f"One audio file is empty/corrupt, the other is not. Similarity is 0.")
return 0.0
try:
logger.info(f"Audio 1 ({os.path.basename(audio_path_1)}): sr={sr1}, len={len(y1)}")
logger.info(f"Audio 2 ({os.path.basename(audio_path_2)}): sr={sr2}, len={len(y2)}")
# Extract MFCC features
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
except Exception as e:
logger.error(f"Error during MFCC extraction: {e}")
return 0.0
# Normalize the MFCC features # Normalize the MFCC features
mfcc1 = librosa.util.normalize(mfcc1, axis=1) mfcc1 = librosa.util.normalize(mfcc1, axis=1)
mfcc2 = librosa.util.normalize(mfcc2, axis=1) mfcc2 = librosa.util.normalize(mfcc2, axis=1)
logger.info(f"MFCCs normalized.")
# Define a lambda function to compute cosine distance # Define a lambda function to compute cosine distance
dist_func = lambda x, y: cosine(x, y) dist_func = lambda x, y: cosine(x, y)
# Use the DTW algorithm to find the best alignment path # Use the DTW algorithm to find the best alignment path
distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func) distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func)
logger.info(f"DTW distance: {distance:.4f}, Path length: {len(path)}")
# Calculate the similarity score, here we use 1/(1+distance) to convert distance to a similarity score # Normalize the DTW distance by the length of the alignment path.
similarity = 1 / (1 + distance) if len(path) == 0:
normalized_distance = np.inf
else:
normalized_distance = distance / len(path)
logger.info(f"Normalized DTW distance: {normalized_distance:.4f}")
# Convert the normalized distance to a similarity score using an exponential decay function.
similarity = np.exp(-normalized_distance)
return similarity return similarity
@@ -204,32 +332,6 @@ def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=
return 1. return 1.
def are_audio_files_similar(mp3_file_path, mp4_file_path):
# Extract audio fingerprint from MP3 file
mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path)
# Extract the audio stream from the MP4 file
mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3'
try:
subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3",
mp4_audio_path], check=True)
except subprocess.CalledProcessError as e:
print(f"An error occurred during audio extraction from MP4: {e}")
return 0.
# Extract audio fingerprint from the extracted audio
mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path)
# Clean up temporary extracted audio file
os.remove(mp4_audio_path)
# Compare fingerprints (rudimentary comparison)
if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint:
return 1.
return 0.
def check_qt_bgcone(actual_config_path, rule): def check_qt_bgcone(actual_config_path, rule):
with open(actual_config_path, 'rb') as file: with open(actual_config_path, 'rb') as file:
config_file = file.read().decode('utf-8') config_file = file.read().decode('utf-8')

View File

@@ -58,16 +58,20 @@ def check_json_settings(actual: str, expected: str, **options) -> float:
if not actual: if not actual:
return 0. return 0.
with open(actual, 'r') as f: try:
data = json.load(f) with open(actual, 'r') as f:
data = json.load(f)
except Exception as e:
return 0.0
expect = expected['expected'] expect = expected['expected']
data_copy = copy.deepcopy(data)
data_copy.update(expect) # Check if all expected key-value pairs are in the actual data
if data == data_copy: for key, value in expect.items():
return 1.0 if key not in data or data[key] != value:
else: return 0.0
return 0.0
return 1.0
def compare_text_file(actual: str, expected: str, **options) -> float: def compare_text_file(actual: str, expected: str, **options) -> float:

View File

@@ -5,6 +5,10 @@ import psutil
import logging import logging
import dotenv import dotenv
import signal import signal
INSTANCE_TYPE = "t3.xlarge"
# Load environment variables from .env file # Load environment variables from .env file
dotenv.load_dotenv() dotenv.load_dotenv()
@@ -31,37 +35,17 @@ logger.setLevel(logging.INFO)
DEFAULT_REGION = "us-east-1" DEFAULT_REGION = "us-east-1"
# todo: Add doc for the configuration of image, security group and network interface # todo: Add doc for the configuration of image, security group and network interface
# todo: public the AMI images # todo: public the AMI images
# ami-05e7d7bd279ea4f14
IMAGE_ID_MAP = { IMAGE_ID_MAP = {
"us-east-1": "ami-03a22c6e501415fb1", "us-east-1": "ami-0cae20d2680c939d4",
"ap-east-1": "ami-0c092a5b8be4116f5", "ap-east-1": "ami-0c092a5b8be4116f5",
} }
INSTANCE_TYPE = "t3.medium"
def _allocate_vm(region=DEFAULT_REGION): def _allocate_vm(region=DEFAULT_REGION):
if region not in IMAGE_ID_MAP: if region not in IMAGE_ID_MAP:
raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}") raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}")
run_instances_params = {
"MaxCount": 1,
"MinCount": 1,
"ImageId": IMAGE_ID_MAP[region],
"InstanceType": INSTANCE_TYPE,
"EbsOptimized": True,
"NetworkInterfaces": [
{
"SubnetId": os.getenv('AWS_SUBNET_ID'),
"AssociatePublicIpAddress": True,
"DeviceIndex": 0,
"Groups": [
os.getenv('AWS_SECURITY_GROUP_ID')
]
}
]
}
ec2_client = boto3.client('ec2', region_name=region) ec2_client = boto3.client('ec2', region_name=region)
instance_id = None instance_id = None
original_sigint_handler = signal.getsignal(signal.SIGINT) original_sigint_handler = signal.getsignal(signal.SIGINT)
@@ -94,26 +78,76 @@ def _allocate_vm(region=DEFAULT_REGION):
signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGTERM, signal_handler) signal.signal(signal.SIGTERM, signal_handler)
if not os.getenv('AWS_SECURITY_GROUP_ID'):
raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.")
if not os.getenv('AWS_SUBNET_ID'):
raise ValueError("AWS_SUBNET_ID is not set in the environment variables.")
run_instances_params = {
"MaxCount": 1,
"MinCount": 1,
"ImageId": IMAGE_ID_MAP[region],
"InstanceType": INSTANCE_TYPE,
"EbsOptimized": True,
"NetworkInterfaces": [
{
"SubnetId": os.getenv('AWS_SUBNET_ID'),
"AssociatePublicIpAddress": True,
"DeviceIndex": 0,
"Groups": [
os.getenv('AWS_SECURITY_GROUP_ID')
]
}
],
"BlockDeviceMappings": [
{
"DeviceName": "/dev/sda1",
"Ebs": {
# "VolumeInitializationRate": 300
"VolumeSize": 30, # Size in GB
"VolumeType": "gp3", # General Purpose SSD
"Throughput": 1000,
"Iops": 4000 # Adjust IOPS as needed
}
}
]
}
response = ec2_client.run_instances(**run_instances_params) response = ec2_client.run_instances(**run_instances_params)
instance_id = response['Instances'][0]['InstanceId'] instance_id = response['Instances'][0]['InstanceId']
waiter = ec2_client.get_waiter('instance_running')
logger.info(f"Waiting for instance {instance_id} to be running...") logger.info(f"Waiting for instance {instance_id} to be running...")
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id]) waiter.wait(InstanceIds=[instance_id])
logger.info(f"Instance {instance_id} is ready.") logger.info(f"Instance {instance_id} is ready.")
# 获取并显示VNC访问地址
try:
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 Instance ID: {instance_id}")
logger.info("="*80)
print(f"\n🌐 VNC访问地址: {vnc_url}")
print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
except KeyboardInterrupt: except KeyboardInterrupt:
logger.warning("VM allocation interrupted by user (SIGINT).") logger.warning("VM allocation interrupted by user (SIGINT).")
raise if instance_id:
except SystemExit: logger.info(f"Terminating instance {instance_id} due to interruption.")
logger.warning("VM allocation terminated by parent process (SIGTERM).") ec2_client.terminate_instances(InstanceIds=[instance_id])
raise raise
except Exception as e: except Exception as e:
logger.error(f"Failed to allocate VM in region {region}: {str(e)}") logger.error(f"Failed to allocate VM: {e}", exc_info=True)
# try to clean up any resources that were created if instance_id:
try: logger.info(f"Terminating instance {instance_id} due to an error.")
if instance_id: ec2_client.terminate_instances(InstanceIds=[instance_id])
ec2_client.terminate_instances(InstanceIds=[instance_id])
logger.info(f"Terminated instance {instance_id} due to allocation failure.")
except Exception as cleanup_error:
logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}")
raise raise
finally: finally:
# Restore original signal handlers # Restore original signal handlers
@@ -153,6 +187,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None):
subnet_id=os.getenv('AWS_SUBNET_ID') subnet_id=os.getenv('AWS_SUBNET_ID')
) )
try:
ec2_client = boto3.client('ec2', region_name=region)
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 Instance ID: {instance_id}")
if current_proxy:
logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
if current_proxy:
print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}")
return instance_id return instance_id
@@ -213,4 +268,4 @@ class AWSVMManager(VMManager):
else: else:
logger.info("Allocating a new VM in region: {}".format(region)) logger.info("Allocating a new VM in region: {}".format(region))
new_vm_path = _allocate_vm(region) new_vm_path = _allocate_vm(region)
return new_vm_path return new_vm_path

View File

@@ -63,10 +63,24 @@ class AWSProvider(Provider):
for reservation in response['Reservations']: for reservation in response['Reservations']:
for instance in reservation['Instances']: for instance in reservation['Instances']:
private_ip_address = instance.get('PrivateIpAddress', '') private_ip_address = instance.get('PrivateIpAddress', '')
public_ip_address = instance.get('PublicIpAddress', '')
if public_ip_address:
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip_address}")
logger.info(f"🏠 Private IP: {private_ip_address}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
else:
logger.warning("No public IP address available for VNC access")
return private_ip_address return private_ip_address
return '' # Return an empty string if no IP address is found return '' # Return an empty string if no IP address is found
except ClientError as e: except ClientError as e:
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}") logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
raise raise
def save_state(self, path_to_vm: str, snapshot_name: str): def save_state(self, path_to_vm: str, snapshot_name: str):
@@ -74,7 +88,7 @@ class AWSProvider(Provider):
ec2_client = boto3.client('ec2', region_name=self.region) ec2_client = boto3.client('ec2', region_name=self.region)
try: try:
image_response = ec2_client.create_image(InstanceId=path_to_vm, ImageId=snapshot_name) image_response = ec2_client.create_image(InstanceId=path_to_vm, Name=snapshot_name)
image_id = image_response['ImageId'] image_id = image_response['ImageId']
logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.") logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.")
return image_id return image_id
@@ -83,7 +97,7 @@ class AWSProvider(Provider):
raise raise
def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str): def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str):
logger.info(f"Reverting AWS VM to snapshot: {snapshot_name}...") logger.info(f"Reverting AWS VM to snapshot AMI: {snapshot_name}...")
ec2_client = boto3.client('ec2', region_name=self.region) ec2_client = boto3.client('ec2', region_name=self.region)
try: try:
@@ -93,37 +107,65 @@ class AWSProvider(Provider):
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']] security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
subnet_id = instance['SubnetId'] subnet_id = instance['SubnetId']
instance_type = instance['InstanceType'] instance_type = instance['InstanceType']
instance_snapshot = instance_details['Reservations'][0]['Instances'][0]['ImageId']
# Step 2: Terminate the old instance # Step 2: Terminate the old instance
ec2_client.terminate_instances(InstanceIds=[path_to_vm]) ec2_client.terminate_instances(InstanceIds=[path_to_vm])
logger.info(f"Old instance {path_to_vm} has been terminated.") logger.info(f"Old instance {path_to_vm} has been terminated.")
# Step 3: Launch a new instance from the snapshot # Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
logger.info(f"Launching a new instance from snapshot {instance_snapshot}...") logger.info(f"Launching a new instance from AMI {snapshot_name}...")
run_instances_params = {
new_instance = ec2_client.run_instances( "MaxCount": 1,
MaxCount = 1, "MinCount": 1,
MinCount = 1, "ImageId": snapshot_name,
ImageId = instance_snapshot, "InstanceType": instance_type,
InstanceType = instance_type, "EbsOptimized": True,
EbsOptimized = True, "NetworkInterfaces": [
NetworkInterfaces = [
{ {
"SubnetId": subnet_id, "SubnetId": subnet_id,
"AssociatePublicIpAddress": True, "AssociatePublicIpAddress": True,
"DeviceIndex": 0, "DeviceIndex": 0,
"Groups": security_groups "Groups": security_groups
} }
],
"BlockDeviceMappings": [
{
"DeviceName": "/dev/sda1",
"Ebs": {
# "VolumeInitializationRate": 300
"VolumeSize": 30, # Size in GB
"VolumeType": "gp3", # General Purpose SSD
"Throughput": 1000,
"Iops": 4000 # Adjust IOPS as needed
}
}
] ]
) }
new_instance = ec2_client.run_instances(**run_instances_params)
new_instance_id = new_instance['Instances'][0]['InstanceId'] new_instance_id = new_instance['Instances'][0]['InstanceId']
logger.info(f"New instance {new_instance_id} launched from snapshot {snapshot_name}.") logger.info(f"New instance {new_instance_id} launched from AMI {snapshot_name}.")
logger.info(f"Waiting for instance {new_instance_id} to be running...") logger.info(f"Waiting for instance {new_instance_id} to be running...")
ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id]) ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id])
logger.info(f"Instance {new_instance_id} is ready.") logger.info(f"Instance {new_instance_id} is ready.")
try:
instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ New Instance VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 New Instance ID: {new_instance_id}")
logger.info("="*80)
print(f"\n🌐 New Instance VNC Web Access URL: {vnc_url}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for new instance {new_instance_id}: {e}")
return new_instance_id return new_instance_id

View File

@@ -163,16 +163,34 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
logger.info(f"Created new instance {instance_id} with proxy configuration") logger.info(f"Created new instance {instance_id} with proxy configuration")
# 等待实例运行
logger.info(f"Waiting for instance {instance_id} to be running...") logger.info(f"Waiting for instance {instance_id} to be running...")
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id]) ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
logger.info(f"Instance {instance_id} is ready.") logger.info(f"Instance {instance_id} is ready.")
try:
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
instance = instance_details['Reservations'][0]['Instances'][0]
public_ip = instance.get('PublicIpAddress', '')
if public_ip:
vnc_url = f"http://{public_ip}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip}")
logger.info(f"🆔 Instance ID: {instance_id}")
if self.current_proxy:
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
if self.current_proxy:
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
except Exception as e:
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
return instance_id return instance_id
except ClientError as e: except ClientError as e:
logger.error(f"Failed to create instance with proxy: {str(e)}") logger.error(f"Failed to create instance with proxy: {str(e)}")
# 如果当前代理失败,尝试轮换代理
if self.current_proxy: if self.current_proxy:
proxy_pool = get_global_proxy_pool() proxy_pool = get_global_proxy_pool()
proxy_pool.mark_proxy_failed(self.current_proxy) proxy_pool.mark_proxy_failed(self.current_proxy)
@@ -188,10 +206,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
for reservation in response['Reservations']: for reservation in response['Reservations']:
for instance in reservation['Instances']: for instance in reservation['Instances']:
private_ip_address = instance.get('PrivateIpAddress', '') private_ip_address = instance.get('PrivateIpAddress', '')
public_ip_address = instance.get('PublicIpAddress', '')
if public_ip_address:
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
logger.info("="*80)
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
logger.info(f"📡 Public IP: {public_ip_address}")
logger.info(f"🏠 Private IP: {private_ip_address}")
if self.current_proxy:
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
logger.info("="*80)
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
if self.current_proxy:
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
print(f"📍 Please open the above address in the browser for remote desktop access\n")
else:
logger.warning("No public IP address available for VNC access")
return private_ip_address return private_ip_address
return '' return ''
except ClientError as e: except ClientError as e:
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}") logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
raise raise
def save_state(self, path_to_vm: str, snapshot_name: str): def save_state(self, path_to_vm: str, snapshot_name: str):
@@ -212,24 +248,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
ec2_client = boto3.client('ec2', region_name=self.region) ec2_client = boto3.client('ec2', region_name=self.region)
try: try:
# 获取原实例详情 # Get original instance details for config.
instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm]) instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm])
instance = instance_details['Reservations'][0]['Instances'][0] instance = instance_details['Reservations'][0]['Instances'][0]
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']] security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
subnet_id = instance['SubnetId'] subnet_id = instance['SubnetId']
instance_type = instance['InstanceType'] instance_type = instance['InstanceType']
# 终止旧实例 # Terminate the old instance. This is a non-blocking call.
logger.info(f"Initiating termination for old instance {path_to_vm}...")
ec2_client.terminate_instances(InstanceIds=[path_to_vm]) ec2_client.terminate_instances(InstanceIds=[path_to_vm])
logger.info(f"Old instance {path_to_vm} has been terminated.") logger.info(f"Old instance {path_to_vm} termination initiated.")
# 轮换到新的代理 # Rotate to a new proxy
self._rotate_proxy() self._rotate_proxy()
# 创建新实例 # Create a new instance
new_instance_id = self.create_instance_with_proxy( new_instance_id = self.create_instance_with_proxy(
snapshot_name, instance_type, security_groups, subnet_id snapshot_name, instance_type, security_groups, subnet_id
) )
# Note: VNC address is displayed within create_instance_with_proxy
logger.info(f"Successfully launched new instance {new_instance_id} for revert.")
return new_instance_id return new_instance_id

View File

@@ -256,6 +256,8 @@ def _install_vm(vm_name, vms_dir, downloaded_file_name, os_type, original_vm_nam
# Try downloading the screenshot until successful # Try downloading the screenshot until successful
while not download_screenshot(vm_ip): while not download_screenshot(vm_ip):
# Try to get the IP again in case it has changed
vm_ip = get_vm_ip(vm_path)
logger.info("Check whether the virtual machine is ready...") logger.info("Check whether the virtual machine is ready...")
logger.info("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...") logger.info("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...")

View File

@@ -4,6 +4,7 @@ import platform
import shlex import shlex
import json import json
import subprocess, signal import subprocess, signal
import time
from pathlib import Path from pathlib import Path
from typing import Any, Optional, Sequence from typing import Any, Optional, Sequence
from typing import List, Dict, Tuple, Literal from typing import List, Dict, Tuple, Literal
@@ -65,6 +66,8 @@ app = Flask(__name__)
pyautogui.PAUSE = 0 pyautogui.PAUSE = 0
pyautogui.DARWIN_CATCH_UP_TIME = 0 pyautogui.DARWIN_CATCH_UP_TIME = 0
TIMEOUT = 1800 # seconds
logger = app.logger logger = app.logger
recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process
recording_path = "/tmp/recording.mp4" recording_path = "/tmp/recording.mp4"
@@ -114,6 +117,113 @@ def execute_command():
}), 500 }), 500
@app.route('/setup/execute_with_verification', methods=['POST'])
@app.route('/execute_with_verification', methods=['POST'])
def execute_command_with_verification():
"""Execute command and verify the result based on provided verification criteria"""
data = request.json
shell = data.get('shell', False)
command = data.get('command', "" if shell else [])
verification = data.get('verification', {})
max_wait_time = data.get('max_wait_time', 10) # Maximum wait time in seconds
check_interval = data.get('check_interval', 1) # Check interval in seconds
if isinstance(command, str) and not shell:
command = shlex.split(command)
# Expand user directory
for i, arg in enumerate(command):
if arg.startswith("~/"):
command[i] = os.path.expanduser(arg)
# Execute the main command
try:
if platform_name == "Windows":
flags = subprocess.CREATE_NO_WINDOW
else:
flags = 0
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=shell,
text=True,
timeout=120,
creationflags=flags,
)
# If no verification is needed, return immediately
if not verification:
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode
})
# Wait and verify the result
import time
start_time = time.time()
while time.time() - start_time < max_wait_time:
verification_passed = True
# Check window existence if specified
if 'window_exists' in verification:
window_name = verification['window_exists']
try:
if platform_name == 'Linux':
wmctrl_result = subprocess.run(['wmctrl', '-l'],
capture_output=True, text=True, check=True)
if window_name.lower() not in wmctrl_result.stdout.lower():
verification_passed = False
elif platform_name in ['Windows', 'Darwin']:
import pygetwindow as gw
windows = gw.getWindowsWithTitle(window_name)
if not windows:
verification_passed = False
except Exception:
verification_passed = False
# Check command execution if specified
if 'command_success' in verification:
verify_cmd = verification['command_success']
try:
verify_result = subprocess.run(verify_cmd, shell=True,
capture_output=True, text=True, timeout=5)
if verify_result.returncode != 0:
verification_passed = False
except Exception:
verification_passed = False
if verification_passed:
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode,
'verification': 'passed',
'wait_time': time.time() - start_time
})
time.sleep(check_interval)
# Verification failed
return jsonify({
'status': 'verification_failed',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode,
'verification': 'failed',
'wait_time': max_wait_time
}), 500
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
def _get_machine_architecture() -> str: def _get_machine_architecture() -> str:
""" Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc. """ Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
""" """
@@ -202,8 +312,8 @@ def capture_screen_with_cursor():
pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty)) pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty))
img.paste(cursor, pos, cursor) img.paste(cursor, pos, cursor)
except: except Exception as e:
pass logger.warning(f"Failed to capture cursor on Windows, screenshot will not have a cursor. Error: {e}")
img.save(file_path) img.save(file_path)
elif user_platform == "Linux": elif user_platform == "Linux":
@@ -1025,11 +1135,21 @@ def get_file():
return jsonify({"error": "file_path is required"}), 400 return jsonify({"error": "file_path is required"}), 400
try: try:
# Check if the file exists and get its size
if not os.path.exists(file_path):
return jsonify({"error": "File not found"}), 404
file_size = os.path.getsize(file_path)
logger.info(f"Serving file: {file_path} ({file_size} bytes)")
# Check if the file exists and send it to the user # Check if the file exists and send it to the user
return send_file(file_path, as_attachment=True) return send_file(file_path, as_attachment=True)
except FileNotFoundError: except FileNotFoundError:
# If the file is not found, return a 404 error # If the file is not found, return a 404 error
return jsonify({"error": "File not found"}), 404 return jsonify({"error": "File not found"}), 404
except Exception as e:
logger.error(f"Error serving file {file_path}: {e}")
return jsonify({"error": f"Failed to serve file: {str(e)}"}), 500
@app.route("/setup/upload", methods=["POST"]) @app.route("/setup/upload", methods=["POST"])
@@ -1038,8 +1158,27 @@ def upload_file():
if 'file_path' in request.form and 'file_data' in request.files: if 'file_path' in request.form and 'file_data' in request.files:
file_path = os.path.expandvars(os.path.expanduser(request.form['file_path'])) file_path = os.path.expandvars(os.path.expanduser(request.form['file_path']))
file = request.files["file_data"] file = request.files["file_data"]
file.save(file_path)
return "File Uploaded" try:
# Ensure target directory exists
os.makedirs(os.path.dirname(file_path), exist_ok=True)
# Save file and get size for verification
file.save(file_path)
uploaded_size = os.path.getsize(file_path)
logger.info(f"File uploaded successfully: {file_path} ({uploaded_size} bytes)")
return f"File Uploaded: {uploaded_size} bytes"
except Exception as e:
logger.error(f"Error uploading file to {file_path}: {e}")
# Clean up partial file if it exists
if os.path.exists(file_path):
try:
os.remove(file_path)
except:
pass
return jsonify({"error": f"Failed to upload file: {str(e)}"}), 500
else: else:
return jsonify({"error": "file_path and file_data are required"}), 400 return jsonify({"error": "file_path and file_data are required"}), 400
@@ -1098,20 +1237,45 @@ def download_file():
max_retries = 3 max_retries = 3
error: Optional[Exception] = None error: Optional[Exception] = None
for i in range(max_retries): for i in range(max_retries):
try: try:
response = requests.get(url, stream=True) logger.info(f"Download attempt {i+1}/{max_retries} for {url}")
response = requests.get(url, stream=True, timeout=300)
response.raise_for_status() response.raise_for_status()
# Get expected file size if available
total_size = int(response.headers.get('content-length', 0))
if total_size > 0:
logger.info(f"Expected file size: {total_size / (1024*1024):.2f} MB")
downloaded_size = 0
with open(path, 'wb') as f: with open(path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192): for chunk in response.iter_content(chunk_size=8192):
if chunk: if chunk:
f.write(chunk) f.write(chunk)
return "File downloaded successfully" downloaded_size += len(chunk)
if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB
progress = (downloaded_size / total_size) * 100
logger.info(f"Download progress: {progress:.1f}%")
# Verify download completeness
actual_size = os.path.getsize(path)
if total_size > 0 and actual_size != total_size:
raise Exception(f"Download incomplete. Expected {total_size} bytes, got {actual_size} bytes")
logger.info(f"File downloaded successfully: {path} ({actual_size} bytes)")
return f"File downloaded successfully: {actual_size} bytes"
except requests.RequestException as e: except (requests.RequestException, Exception) as e:
error = e error = e
logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)") logger.error(f"Failed to download {url}: {e}. Retrying... ({max_retries - i - 1} attempts left)")
# Clean up partial download
if path.exists():
try:
path.unlink()
except:
pass
return f"Failed to download {url}. No retries left. Error: {error}", 500 return f"Failed to download {url}. No retries left. Error: {error}", 500
@@ -1124,18 +1288,88 @@ def open_file():
if not path: if not path:
return "Path not supplied!", 400 return "Path not supplied!", 400
path = Path(os.path.expandvars(os.path.expanduser(path))) path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
if not path.exists(): # Check if it's a file path that exists
return f"File not found: {path}", 404 is_file_path = path_obj.exists()
# If it's not a file path, treat it as an application name/command
if not is_file_path:
# Check if it's a valid command by trying to find it in PATH
import shutil
if not shutil.which(path):
return f"Application/file not found: {path}", 404
try: try:
if platform.system() == "Windows": if is_file_path:
os.startfile(path) # Handle file opening
if platform.system() == "Windows":
os.startfile(path_obj)
else:
open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
subprocess.Popen([open_cmd, str(path_obj)])
file_name = path_obj.name
file_name_without_ext, _ = os.path.splitext(file_name)
else: else:
open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open" # Handle application launching
subprocess.Popen([open_cmd, str(path)]) if platform.system() == "Windows":
return "File opened successfully" subprocess.Popen([path])
else:
subprocess.Popen([path])
file_name = path
file_name_without_ext = path
# Wait for the file/application to open
start_time = time.time()
window_found = False
while time.time() - start_time < TIMEOUT:
os_name = platform.system()
if os_name in ['Windows', 'Darwin']:
import pygetwindow as gw
# Check for window title containing file name or file name without extension
windows = gw.getWindowsWithTitle(file_name)
if not windows:
windows = gw.getWindowsWithTitle(file_name_without_ext)
if windows:
# To be more specific, we can try to activate it
windows[0].activate()
window_found = True
break
elif os_name == 'Linux':
try:
# Using wmctrl to list windows and check if any window title contains the filename
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True)
window_list = result.stdout.strip().split('\n')
if not result.stdout.strip():
pass # No windows, just continue waiting
else:
for window in window_list:
if file_name in window or file_name_without_ext in window:
# a window is found, now activate it
window_id = window.split()[0]
subprocess.run(['wmctrl', '-i', '-a', window_id], check=True)
window_found = True
break
if window_found:
break
except (subprocess.CalledProcessError, FileNotFoundError):
# wmctrl might not be installed or the window manager isn't ready.
# We just log it once and let the main loop retry.
if 'wmctrl_failed_once' not in locals():
logger.warning("wmctrl command is not ready, will keep retrying...")
wmctrl_failed_once = True
pass # Let the outer loop retry
time.sleep(1)
if window_found:
return "File opened and window activated successfully"
else:
return f"Failed to find window for {file_name} within {timeout} seconds.", 500
except Exception as e: except Exception as e:
return f"Failed to open {path}. Error: {e}", 500 return f"Failed to open {path}. Error: {e}", 500
@@ -1258,37 +1492,78 @@ def close_window():
@app.route('/start_recording', methods=['POST']) @app.route('/start_recording', methods=['POST'])
def start_recording(): def start_recording():
global recording_process global recording_process
if recording_process: if recording_process and recording_process.poll() is None:
return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400 return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400
# Clean up previous recording if it exists
if os.path.exists(recording_path):
try:
os.remove(recording_path)
except OSError as e:
logger.error(f"Error removing old recording file: {e}")
return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500
d = display.Display() d = display.Display()
screen_width = d.screen().width_in_pixels screen_width = d.screen().width_in_pixels
screen_height = d.screen().height_in_pixels screen_height = d.screen().height_in_pixels
start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}" start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL, # Use stderr=PIPE to capture potential errors from ffmpeg
stderr=subprocess.DEVNULL) recording_process = subprocess.Popen(shlex.split(start_command),
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True # To get stderr as string
)
return jsonify({'status': 'success', 'message': 'Started recording.'}) # Wait a couple of seconds to see if ffmpeg starts successfully
try:
# Wait for 2 seconds. If ffmpeg exits within this time, it's an error.
recording_process.wait(timeout=2)
# If wait() returns, it means the process has terminated.
error_output = recording_process.stderr.read()
return jsonify({
'status': 'error',
'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}'
}), 500
except subprocess.TimeoutExpired:
# This is the expected outcome: the process is still running after 2 seconds.
return jsonify({'status': 'success', 'message': 'Started recording successfully.'})
@app.route('/end_recording', methods=['POST']) @app.route('/end_recording', methods=['POST'])
def end_recording(): def end_recording():
global recording_process global recording_process
if not recording_process: if not recording_process or recording_process.poll() is not None:
recording_process = None # Clean up stale process object
return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400 return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
recording_process.send_signal(signal.SIGINT) error_output = ""
recording_process.wait() try:
recording_process = None # Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
recording_process.send_signal(signal.SIGINT)
# Wait for ffmpeg to terminate. communicate() gets output and waits.
_, error_output = recording_process.communicate(timeout=15)
except subprocess.TimeoutExpired:
logger.error("ffmpeg did not respond to SIGINT, killing the process.")
recording_process.kill()
# After killing, communicate to get any remaining output.
_, error_output = recording_process.communicate()
recording_process = None
return jsonify({
'status': 'error',
'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
}), 500
# return recording video file recording_process = None # Clear the process from global state
if os.path.exists(recording_path):
# Check if the recording file was created and is not empty.
if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
return send_file(recording_path, as_attachment=True) return send_file(recording_path, as_attachment=True)
else: else:
return abort(404, description="Recording failed") logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -27,17 +27,57 @@
"libreoffice_writer" "libreoffice_writer"
], ],
"evaluator": { "evaluator": {
"func": "compare_pdfs", "func": [
"expected": { "compare_pdfs",
"compare_pdfs",
"compare_pdfs",
"compare_pdfs"
],
"conj": "or",
"expected": [
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold_1.pdf"
},
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold_2.pdf"
},
{
"type": "cloud_file", "type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf", "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold.pdf" "dest": "Constitution_Template_With_Guidelines_Gold_3.pdf"
}, },
"result": { {
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_Gold_4.pdf"
}
],
"result": [
{
"type": "vm_file", "type": "vm_file",
"path": "/home/user/Desktop/View_Person_Organizational_Summary.pdf", "path": "/home/user/Desktop/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines.pdf" "dest": "Constitution_Template_With_Guidelines_1.pdf"
} },
{
"type": "vm_file",
"path": "/home/user/Documents/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_2.pdf"
},
{
"type": "vm_file",
"path": "/home/user/Downloads/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_3.pdf"
},
{
"type": "vm_file",
"path": "/home/user/View_Person_Organizational_Summary.pdf",
"dest": "Constitution_Template_With_Guidelines_4.pdf"
}
]
}, },
"proxy": false "proxy": false
} }

View File

@@ -38,7 +38,7 @@
"command": [ "command": [
"python", "python",
"-c", "-c",
"import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)" "import pyautogui; import time; time.sleep(15); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)"
] ]
} }
} }
@@ -68,12 +68,12 @@
"command": [ "command": [
"python", "python",
"-c", "-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter');" "import pyautogui; import time; time.sleep(1); pyautogui.hotkey('ctrl', 's'); time.sleep(3);"
] ]
} }
} }
], ],
"func": "compare_contains_image", "func": "compare_docx_images",
"result": { "result": {
"type": "vm_file", "type": "vm_file",
"path": "/home/user/Desktop/Viewing_Your_Class_Schedule_and_Textbooks.docx", "path": "/home/user/Desktop/Viewing_Your_Class_Schedule_and_Textbooks.docx",

View File

@@ -52,7 +52,7 @@
} }
} }
], ],
"func": "compare_docx_lines", "func": "compare_unique_train_records",
"result": { "result": {
"type": "vm_file", "type": "vm_file",
"path": "/home/user/Desktop/HK_train_record.docx", "path": "/home/user/Desktop/HK_train_record.docx",
@@ -60,8 +60,16 @@
}, },
"expected": { "expected": {
"type": "cloud_file", "type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx", "path": [
"dest": "HK_train_record_Gold.docx" "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
"https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record.docx"
],
"dest": [
"HK_train_record_Gold.docx",
"HK_train_record_Original.docx"
],
"multi": true,
"gives": [0, 1]
} }
}, },
"proxy": false "proxy": false

View File

@@ -52,20 +52,57 @@
} }
} }
], ],
"func": "compare_docx_files", "func": [
"expected": { "compare_docx_files",
"type": "cloud_file", "compare_docx_files",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold.docx", "compare_docx_files"
"dest": "CCCH9003_Tutorial_guidelines_Gold.docx" ],
}, "conj": "or",
"result": { "expected": [
"type": "vm_file", {
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx", "type": "cloud_file",
"dest": "CCCH9003_Tutorial_guidelines.docx" "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_1.docx",
}, "dest": "CCCH9003_Tutorial_guidelines_Gold_1.docx"
"options": { },
"ignore_blanks": false {
} "type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_2.docx",
"dest": "CCCH9003_Tutorial_guidelines_Gold_2.docx"
},
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_3.docx",
"dest": "CCCH9003_Tutorial_guidelines_Gold_3.docx"
}
],
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
"dest": "CCCH9003_Tutorial_guidelines.docx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
"dest": "CCCH9003_Tutorial_guidelines.docx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
"dest": "CCCH9003_Tutorial_guidelines.docx"
}
],
"options": [
{
"ignore_blanks": false
},
{
"ignore_blanks": false
},
{
"ignore_blanks": false
}
]
}, },
"proxy": false "proxy": false
} }

View File

@@ -47,22 +47,40 @@
"command": [ "command": [
"python", "python",
"-c", "-c",
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); " "import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(2); "
] ]
} }
} }
], ],
"func": "compare_docx_tables", "func": [
"expected": { "compare_docx_tables",
"type": "cloud_file", "compare_docx_tables"
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx", ],
"dest": "Graphemes_Sound_Letter_Patterns_Gold.docx" "conj": "or",
}, "expected": [
"result": { {
"type": "vm_file", "type": "cloud_file",
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx", "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx",
"dest": "Graphemes_Sound_Letter_Patterns.docx" "dest": "Graphemes_Sound_Letter_Patterns_Gold.docx"
} },
{
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold_2.docx",
"dest": "Graphemes_Sound_Letter_Patterns_Gold_2.docx"
}
],
"result": [
{
"type": "vm_file",
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
"dest": "Graphemes_Sound_Letter_Patterns.docx"
},
{
"type": "vm_file",
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
"dest": "Graphemes_Sound_Letter_Patterns.docx"
}
]
}, },
"proxy": false "proxy": false
} }

View File

@@ -50,7 +50,7 @@
"type": "rule", "type": "rule",
"rules": { "rules": {
"include": [ "include": [
"54\n" "54"
], ],
"exclude": [] "exclude": []
} }

View File

@@ -1,8 +1,8 @@
{ {
"id": "bba3381f-b5eb-4439-bd9e-80c22218d5a7", "id": "bba3381f-b5eb-4439-bd9e-80c22218d5a7",
"snapshot": "base_setup", "snapshot": "base_setup",
"instruction": "Can you start streaming the video from this link for me? https://www.youtube.com/watch?v=pgBsyTKAwLw", "instruction": "Can you start streaming the video from this link for me? https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8",
"source": "https://www.quora.com/How-do-I-play-online-videos-using-the-VLC-media-player", "source": "https://developer.apple.com/streaming/examples/ - Apple HLS test streams for developers",
"config": [ "config": [
{ {
"type": "launch", "type": "launch",
@@ -32,7 +32,7 @@
"type": "rule", "type": "rule",
"rules": { "rules": {
"type": "url", "type": "url",
"url": "https://www.youtube.com/watch?v=pgBsyTKAwLw" "url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8"
} }
}, },
"result": { "result": {

View File

@@ -1,7 +1,7 @@
{ {
"id": "276cc624-87ea-4f08-ab93-f770e3790175", "id": "276cc624-87ea-4f08-ab93-f770e3790175",
"snapshot": "vscode", "snapshot": "vscode",
"instruction": "Please help me set the current user's line length to 50 characters in VS Code.", "instruction": "Please help me set the current user's line length for code wrapping to 50 characters in VS Code.",
"source": "https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code", "source": "https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code",
"config": [ "config": [
{ {

View File

@@ -1,7 +1,7 @@
{ {
"id": "7c4cc09e-7a92-40dd-8338-b2286535c4ed", "id": "7c4cc09e-7a92-40dd-8338-b2286535c4ed",
"snapshot": "vscode", "snapshot": "vscode",
"instruction": "Please help me change the display language of VS Code to \"Arabic\".", "instruction": "Please help me change the display language of VS Code to \"Arabic\" without using any extensions.",
"source": "", "source": "",
"config": [ "config": [
{ {