Merge remote-tracking branch 'upstream/main'
This commit is contained in:
@@ -36,6 +36,8 @@ FILE_PATH = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
init_proxy_pool(PROXY_CONFIG_FILE) # initialize the global proxy pool
|
||||
|
||||
MAX_RETRIES = 20
|
||||
|
||||
class SetupController:
|
||||
def __init__(self, vm_ip: str, server_port: int = 5000, chromium_port: int = 9222, vlc_port: int = 8080, cache_dir: str = "cache"):
|
||||
self.vm_ip: str = vm_ip
|
||||
@@ -64,20 +66,20 @@ class SetupController:
|
||||
# make sure connection can be established
|
||||
logger.info(f"try to connect {self.http_server}")
|
||||
retry = 0
|
||||
while retry < 50:
|
||||
while retry < MAX_RETRIES:
|
||||
try:
|
||||
_ = requests.get(self.http_server + "/terminal")
|
||||
break
|
||||
except:
|
||||
time.sleep(5)
|
||||
retry += 1
|
||||
logger.info(f"retry: {retry}/50")
|
||||
logger.info(f"retry: {retry}/{MAX_RETRIES}")
|
||||
|
||||
if retry == 50:
|
||||
if retry == MAX_RETRIES:
|
||||
return False
|
||||
|
||||
|
||||
for cfg in config:
|
||||
for i, cfg in enumerate(config):
|
||||
config_type: str = cfg["type"]
|
||||
parameters: Dict[str, Any] = cfg["parameters"]
|
||||
|
||||
@@ -85,10 +87,17 @@ class SetupController:
|
||||
# protocol
|
||||
setup_function: str = "_{:}_setup".format(config_type)
|
||||
assert hasattr(self, setup_function), f'Setup controller cannot find init function {setup_function}'
|
||||
logger.info(f"call function {setup_function}")
|
||||
getattr(self, setup_function)(**parameters)
|
||||
|
||||
logger.info("SETUP: %s(%s)", setup_function, str(parameters))
|
||||
|
||||
try:
|
||||
logger.info(f"Executing setup step {i+1}/{len(config)}: {setup_function}")
|
||||
logger.debug(f"Setup parameters: {parameters}")
|
||||
getattr(self, setup_function)(**parameters)
|
||||
logger.info(f"SETUP COMPLETED: {setup_function}({str(parameters)})")
|
||||
except Exception as e:
|
||||
logger.error(f"SETUP FAILED at step {i+1}/{len(config)}: {setup_function}({str(parameters)})")
|
||||
logger.error(f"Error details: {e}")
|
||||
logger.error(f"Traceback: {traceback.format_exc()}")
|
||||
raise Exception(f"Setup step {i+1} failed: {setup_function} - {e}") from e
|
||||
|
||||
return True
|
||||
|
||||
@@ -111,25 +120,41 @@ class SetupController:
|
||||
raise Exception(f"Setup Download - Invalid URL ({url}) or path ({path}).")
|
||||
|
||||
if not os.path.exists(cache_path):
|
||||
logger.info(f"Cache file not found, downloading from {url} to {cache_path}")
|
||||
max_retries = 3
|
||||
downloaded = False
|
||||
e = None
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
logger.info(f"Download attempt {i+1}/{max_retries} for {url}")
|
||||
response = requests.get(url, stream=True, timeout=300) # Add 5 minute timeout
|
||||
response.raise_for_status()
|
||||
|
||||
# Get file size if available
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
if total_size > 0:
|
||||
logger.info(f"File size: {total_size / (1024*1024):.2f} MB")
|
||||
|
||||
downloaded_size = 0
|
||||
with open(cache_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
logger.info("File downloaded successfully")
|
||||
downloaded_size += len(chunk)
|
||||
if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB
|
||||
progress = (downloaded_size / total_size) * 100
|
||||
logger.info(f"Download progress: {progress:.1f}%")
|
||||
|
||||
logger.info(f"File downloaded successfully to {cache_path} ({downloaded_size / (1024*1024):.2f} MB)")
|
||||
downloaded = True
|
||||
break
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(
|
||||
f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
|
||||
# Clean up partial download
|
||||
if os.path.exists(cache_path):
|
||||
os.remove(cache_path)
|
||||
if not downloaded:
|
||||
raise requests.RequestException(f"Failed to download {url}. No retries left.")
|
||||
|
||||
@@ -142,14 +167,18 @@ class SetupController:
|
||||
|
||||
# send request to server to upload file
|
||||
try:
|
||||
logger.info(f"Uploading {os.path.basename(path)} to VM at {path}")
|
||||
logger.debug("REQUEST ADDRESS: %s", self.http_server + "/setup" + "/upload")
|
||||
response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form)
|
||||
response = requests.post(self.http_server + "/setup" + "/upload", headers=headers, data=form, timeout=600) # 10 minute timeout for upload
|
||||
if response.status_code == 200:
|
||||
logger.info("Command executed successfully: %s", response.text)
|
||||
logger.info(f"File uploaded successfully: {path}")
|
||||
logger.debug("Upload response: %s", response.text)
|
||||
else:
|
||||
logger.error("Failed to upload file. Status code: %s", response.text)
|
||||
logger.error(f"Failed to upload file {path}. Status code: {response.status_code}, Response: {response.text}")
|
||||
raise requests.RequestException(f"Upload failed with status {response.status_code}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error("An error occurred while trying to send the request: %s", e)
|
||||
logger.error(f"An error occurred while trying to upload {path}: {e}")
|
||||
raise
|
||||
|
||||
def _upload_file_setup(self, files: List[Dict[str, str]]):
|
||||
"""
|
||||
@@ -219,13 +248,14 @@ class SetupController:
|
||||
|
||||
# send request to server to open file
|
||||
try:
|
||||
response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload)
|
||||
if response.status_code == 200:
|
||||
logger.info("Command executed successfully: %s", response.text)
|
||||
else:
|
||||
logger.error("Failed to open file. Status code: %s", response.text)
|
||||
# The server-side call is now blocking and can take time.
|
||||
# We set a timeout that is slightly longer than the server's timeout (1800s).
|
||||
response = requests.post(self.http_server + "/setup" + "/open_file", headers=headers, data=payload, timeout=1810)
|
||||
response.raise_for_status() # This will raise an exception for 4xx and 5xx status codes
|
||||
logger.info("Command executed successfully: %s", response.text)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error("An error occurred while trying to send the request: %s", e)
|
||||
logger.error(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}")
|
||||
raise Exception(f"Failed to open file '{path}'. An error occurred while trying to send the request or the server responded with an error: {e}") from e
|
||||
|
||||
def _launch_setup(self, command: Union[str, List[str]], shell: bool = False):
|
||||
if not command:
|
||||
@@ -302,6 +332,57 @@ class SetupController:
|
||||
if not terminates:
|
||||
time.sleep(0.3)
|
||||
|
||||
def _execute_with_verification_setup(
|
||||
self,
|
||||
command: List[str],
|
||||
verification: Dict[str, Any] = None,
|
||||
max_wait_time: int = 10,
|
||||
check_interval: float = 1.0,
|
||||
shell: bool = False
|
||||
):
|
||||
"""Execute command with verification of results
|
||||
|
||||
Args:
|
||||
command: Command to execute
|
||||
verification: Dict with verification criteria:
|
||||
- window_exists: Check if window with this name exists
|
||||
- command_success: Execute this command and check if it succeeds
|
||||
max_wait_time: Maximum time to wait for verification
|
||||
check_interval: Time between verification checks
|
||||
shell: Whether to use shell
|
||||
"""
|
||||
if not command:
|
||||
raise Exception("Empty command to launch.")
|
||||
|
||||
verification = verification or {}
|
||||
|
||||
payload = json.dumps({
|
||||
"command": command,
|
||||
"shell": shell,
|
||||
"verification": verification,
|
||||
"max_wait_time": max_wait_time,
|
||||
"check_interval": check_interval
|
||||
})
|
||||
headers = {"Content-Type": "application/json"}
|
||||
|
||||
try:
|
||||
response = requests.post(self.http_server + "/setup" + "/execute_with_verification",
|
||||
headers=headers, data=payload, timeout=max_wait_time + 10)
|
||||
if response.status_code == 200:
|
||||
result = response.json()
|
||||
logger.info("Command executed and verified successfully: %s -> %s"
|
||||
, " ".join(command) if isinstance(command, list) else command
|
||||
, response.text
|
||||
)
|
||||
return result
|
||||
else:
|
||||
logger.error("Failed to execute with verification. Status code: %s", response.text)
|
||||
raise Exception(f"Command verification failed: {response.text}")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error("An error occurred while trying to send the request: %s", e)
|
||||
traceback.print_exc()
|
||||
raise Exception(f"Request failed: {e}")
|
||||
|
||||
def _command_setup(self, command: List[str], **kwargs):
|
||||
self._execute_setup(command, **kwargs)
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ logger = logging.getLogger("desktopenv.env")
|
||||
Metric = Callable[[Any, Any], float]
|
||||
Getter = Callable[[gym.Env, Dict[str, Any]], Any]
|
||||
|
||||
MAX_RETRIES = 5
|
||||
MAX_RETRIES = 5 # Maximum retries for environment setup
|
||||
|
||||
class DesktopEnv(gym.Env):
|
||||
"""
|
||||
@@ -72,6 +72,16 @@ class DesktopEnv(gym.Env):
|
||||
|
||||
self.os_type = os_type
|
||||
|
||||
# Track whether environment has been used (step/setup) to optimize snapshot revert
|
||||
# docker, aws, gcp, azure are always unused as the emulator starts from a clean state
|
||||
# vmware, virtualbox are always used as the emulator starts from a dirty state
|
||||
if self.provider_name in {"docker", "aws", "gcp", "azure"}:
|
||||
self.is_environment_used = False
|
||||
elif self.provider_name in {"vmware", "virtualbox"}:
|
||||
self.is_environment_used = True
|
||||
else:
|
||||
raise ValueError(f"Invalid provider name: {self.provider_name}")
|
||||
|
||||
# Initialize environment variables
|
||||
if path_to_vm:
|
||||
self.path_to_vm = os.path.abspath(os.path.expandvars(os.path.expanduser(path_to_vm))) \
|
||||
@@ -190,11 +200,19 @@ class DesktopEnv(gym.Env):
|
||||
logger.info("Using regular AWS provider.")
|
||||
|
||||
|
||||
logger.info("Reverting to snapshot to {}...".format(self.snapshot_name))
|
||||
self._revert_to_snapshot()
|
||||
logger.info("Starting emulator...")
|
||||
self._start_emulator()
|
||||
logger.info("Emulator started.")
|
||||
# Only revert to snapshot if environment has been used (step/setup)
|
||||
# This optimization is especially important for cloud providers like AWS
|
||||
# where unnecessary snapshot operations are costly and time-consuming
|
||||
if self.is_environment_used:
|
||||
logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
|
||||
self._revert_to_snapshot()
|
||||
logger.info("Starting emulator...")
|
||||
self._start_emulator()
|
||||
logger.info("Emulator started.")
|
||||
# Reset the usage flag after reverting
|
||||
self.is_environment_used = False
|
||||
else:
|
||||
logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))
|
||||
|
||||
if task_config is not None:
|
||||
self._set_task_info(task_config)
|
||||
@@ -202,6 +220,9 @@ class DesktopEnv(gym.Env):
|
||||
logger.info("Setting up environment...")
|
||||
success = self.setup_controller.setup(self.config)
|
||||
if success:
|
||||
# Mark environment as used when setup is successfully executed
|
||||
if self.config: # Only mark as used if there were actual setup operations
|
||||
self.is_environment_used = True
|
||||
break
|
||||
else:
|
||||
logger.error(
|
||||
@@ -300,6 +321,9 @@ class DesktopEnv(gym.Env):
|
||||
def step(self, action, pause=2):
|
||||
self._step_no += 1
|
||||
self.action_history.append(action)
|
||||
|
||||
# Mark environment as used when step is called
|
||||
self.is_environment_used = True
|
||||
|
||||
reward = 0 # todo: Define reward calculation for each example
|
||||
done = False # todo: Define episode termination condition for each example
|
||||
@@ -336,7 +360,11 @@ class DesktopEnv(gym.Env):
|
||||
Evaluate whether the task is successfully completed.
|
||||
"""
|
||||
|
||||
self.setup_controller.setup(self.evaluator.get("postconfig", []))
|
||||
postconfig = self.evaluator.get("postconfig", [])
|
||||
self.setup_controller.setup(postconfig)
|
||||
# Mark environment as used if there were postconfig setup operations
|
||||
if postconfig:
|
||||
self.is_environment_used = True
|
||||
|
||||
if self.evaluator['func'] == "infeasible":
|
||||
if len(self.action_history) > 0 and self.action_history[-1] == "FAIL":
|
||||
|
||||
@@ -80,18 +80,16 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
|
||||
returned.
|
||||
only support for single file now:
|
||||
time_suffix(bool): optional. defaults to False. if True, append the current time in required format.
|
||||
time_format(str): optional. defaults to "%Y_%m_%d". format of the time suffix.
|
||||
time_format(str): optional. defaults to "%Y%m%d_%H%M%S". format of the time suffix.
|
||||
"""
|
||||
time_format = "%Y_%m_%d"
|
||||
time_format = "%Y%m%d_%H%M%S"
|
||||
if not config.get("multi", False):
|
||||
paths: List[str] = [config["path"]]
|
||||
dests: List[str] = [config["dest"]]
|
||||
if "time_suffix" in config.keys() and config["time_suffix"]:
|
||||
if "time_format" in config.keys():
|
||||
time_format = config["time_format"]
|
||||
# Insert time before . in file type suffix
|
||||
paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths]
|
||||
dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests]
|
||||
if config.get("time_suffix", False):
|
||||
time_format = config.get("time_format", time_format)
|
||||
# Insert time before file extension.
|
||||
dests = [f"{os.path.splitext(d)[0]}_{datetime.now().strftime(time_format)}{os.path.splitext(d)[1]}" for d in dests]
|
||||
else:
|
||||
paths: List[str] = config["path"]
|
||||
dests: List[str] = config["dest"]
|
||||
|
||||
@@ -53,7 +53,8 @@ from .docs import (
|
||||
compare_docx_files_and_ignore_new_lines,
|
||||
compare_docx_images,
|
||||
compare_image_text,
|
||||
compare_references
|
||||
compare_references,
|
||||
compare_unique_train_records
|
||||
)
|
||||
from .general import (
|
||||
check_csv,
|
||||
|
||||
@@ -167,8 +167,12 @@ def compare_docx_files(file1, file2, **options):
|
||||
if ignore_case:
|
||||
p1, p2 = p1.lower(), p2.lower()
|
||||
if p1 != p2:
|
||||
print(p1)
|
||||
print(p2)
|
||||
# show the difference
|
||||
print("=== First Paragraph ===")
|
||||
print(f"\033[92m{repr(p1)}\033[0m") # Green color for p1, repr() shows hidden chars
|
||||
print("=== Second Paragraph ===")
|
||||
print(f"\033[91m{repr(p2)}\033[0m") # Red color for p2, repr() shows hidden chars
|
||||
print("=" * 50) # Clear boundary
|
||||
return 0
|
||||
|
||||
return 1
|
||||
@@ -886,3 +890,72 @@ def compare_references(file1, file2, **options):
|
||||
return (result - reference_base_result) / (1 - reference_base_result)
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def compare_unique_train_records(processed_file, expected_files, **kwargs):
|
||||
"""
|
||||
Compares the processed file with a list of expected files containing the
|
||||
gold standard and the initial document.
|
||||
expected_files[0] should be the gold standard file.
|
||||
expected_files[1] should be the initial file.
|
||||
"""
|
||||
# Debug logging to understand what we're actually receiving
|
||||
logger.info(f"DEBUG: processed_file type: {type(processed_file)}, value: {processed_file}")
|
||||
logger.info(f"DEBUG: expected_files type: {type(expected_files)}, value: {expected_files}")
|
||||
logger.info(f"DEBUG: kwargs: {kwargs}")
|
||||
|
||||
if not processed_file or not isinstance(expected_files, list) or len(expected_files) < 2:
|
||||
logger.error("Invalid arguments: processed_file and a list of 2 expected_files are required.")
|
||||
return 0
|
||||
|
||||
gold_file = expected_files[0]
|
||||
initial_file = expected_files[1]
|
||||
|
||||
if not gold_file or not initial_file:
|
||||
logger.error("Gold file or initial file path is missing from expected_files list.")
|
||||
return 0
|
||||
|
||||
# Helper function to get lines and IDs from a file
|
||||
def get_lines_and_ids_from_file(file_path):
|
||||
try:
|
||||
doc = Document(file_path)
|
||||
lines = [p.text.strip() for p in doc.paragraphs if p.text.strip()]
|
||||
train_ids = [line.split(',')[1].strip() for line in lines if len(line.split(',')) == 4]
|
||||
return lines, train_ids
|
||||
except Exception as e:
|
||||
logger.error(f"Error opening or parsing file {file_path}: {e}")
|
||||
return None, None
|
||||
|
||||
# Get data from all three files
|
||||
processed_lines, processed_train_ids = get_lines_and_ids_from_file(processed_file)
|
||||
if processed_lines is None: return 0
|
||||
|
||||
gold_lines, gold_train_ids = get_lines_and_ids_from_file(gold_file)
|
||||
if gold_lines is None: return 0
|
||||
|
||||
initial_lines, _ = get_lines_and_ids_from_file(initial_file)
|
||||
if initial_lines is None: return 0
|
||||
initial_lines_set = set(initial_lines)
|
||||
|
||||
# 1. Subset Check: Ensure every processed line was in the initial file
|
||||
if not set(processed_lines).issubset(initial_lines_set):
|
||||
logger.error("Processed file contains lines not present in the initial file.")
|
||||
logger.error(f"Extra lines: {set(processed_lines) - initial_lines_set}")
|
||||
return 0
|
||||
|
||||
# 2. Uniqueness Check: Check for duplicates within the processed file
|
||||
if len(processed_train_ids) != len(set(processed_train_ids)):
|
||||
logger.error("Duplicate train_ids found in the processed file.")
|
||||
return 0
|
||||
|
||||
# 3. Correctness Check: Compare the set of train_ids
|
||||
if set(processed_train_ids) != set(gold_train_ids):
|
||||
logger.error("Set of train_ids does not match between processed file and gold file.")
|
||||
return 0
|
||||
|
||||
# 4. Line count check
|
||||
if len(processed_lines) != len(gold_lines):
|
||||
logger.error("Number of lines does not match between processed file and gold file.")
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
@@ -3,6 +3,7 @@ import os
|
||||
import subprocess
|
||||
from typing import Dict
|
||||
from xml.etree import ElementTree
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import acoustid
|
||||
import cv2
|
||||
@@ -26,15 +27,108 @@ def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float:
|
||||
|
||||
tree = ElementTree.fromstring(actual_status)
|
||||
status = tree.find('state').text
|
||||
logger.info(f"VLC Status: {status}")
|
||||
if status == 'playing':
|
||||
if rule['type'] == 'file_name':
|
||||
file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
|
||||
# Try multiple possible paths for file information in VLC XML
|
||||
file_paths = [
|
||||
'information/category[@name="meta"]/info[@name="filename"]',
|
||||
'information/category[@name="meta"]/info[@name="title"]',
|
||||
'information/category[@name="meta"]/info[@name="uri"]',
|
||||
'information/category[@name="meta"]/info[@name="location"]',
|
||||
'information/category[@name="meta"]/info[@name="name"]'
|
||||
]
|
||||
|
||||
file_info = None
|
||||
for path in file_paths:
|
||||
element = tree.find(path)
|
||||
if element is not None and element.text:
|
||||
file_info = element.text
|
||||
break
|
||||
|
||||
if file_info:
|
||||
return 1 if file_info.endswith(rule['file_name']) else 0
|
||||
expected_filename = rule['file_name']
|
||||
|
||||
# Method 1: Direct filename match (most precise)
|
||||
actual_basename = os.path.basename(file_info)
|
||||
if actual_basename == expected_filename:
|
||||
return 1
|
||||
|
||||
# Method 2: Endswith match (for backward compatibility)
|
||||
if file_info.endswith(expected_filename):
|
||||
return 1
|
||||
|
||||
# Method 3: For paths, check if expected filename is in the path
|
||||
if expected_filename in file_info:
|
||||
# Additional check to avoid false positives
|
||||
# Make sure it's actually the filename, not just part of a path
|
||||
if file_info.endswith('/' + expected_filename) or file_info.endswith('\\' + expected_filename):
|
||||
return 1
|
||||
|
||||
logger.warning(f"File name mismatch - Expected: {expected_filename}, Found: {file_info}")
|
||||
return 0
|
||||
else:
|
||||
logger.warning(f"Could not find file information in VLC status XML for rule: {rule}")
|
||||
return 0
|
||||
elif rule['type'] == 'url':
|
||||
file_info = tree.find('information/category[@name="meta"]/info[@name="url"]').text
|
||||
# Try multiple possible paths for URL information in VLC XML
|
||||
url_paths = [
|
||||
'information/category[@name="meta"]/info[@name="url"]',
|
||||
'information/category[@name="meta"]/info[@name="URI"]',
|
||||
'information/category[@name="meta"]/info[@name="location"]',
|
||||
'information/category[@name="meta"]/info[@name="title"]', # Sometimes URL is in title for streams
|
||||
'information/category[@name="meta"]/info[@name="filename"]', # Sometimes URL is in filename for streams
|
||||
'information/category[@name="Stream 0"]/info[@name="Codec"]', # Try stream info
|
||||
'information/category[@name="Stream 0"]/info[@name="Type"]',
|
||||
'information/category[@name="Stream 0"]/info[@name="Language"]'
|
||||
]
|
||||
|
||||
file_info = None
|
||||
logger.debug(f"Looking for URL: {rule['url']}")
|
||||
|
||||
for path in url_paths:
|
||||
element = tree.find(path)
|
||||
if element is not None and element.text:
|
||||
file_info = element.text
|
||||
logger.debug(f"Found URL info at '{path}': {file_info}")
|
||||
break
|
||||
|
||||
if file_info:
|
||||
return 1 if file_info.endswith(rule['url']) else 0
|
||||
# For URL comparison, check if the rule URL is contained in the file_info
|
||||
# This handles cases where VLC might show a longer or modified URL
|
||||
expected_url = rule['url']
|
||||
|
||||
# Method 1: Direct URL match
|
||||
if expected_url in file_info or file_info.endswith(expected_url):
|
||||
return 1
|
||||
|
||||
# Method 2: For HLS streams, VLC often shows just the filename instead of full URL
|
||||
# Check if the file_info matches the filename part of the expected URL
|
||||
try:
|
||||
expected_parsed = urlparse(expected_url)
|
||||
expected_filename = os.path.basename(expected_parsed.path)
|
||||
|
||||
# If VLC shows just the filename (common for HLS streams)
|
||||
if file_info == expected_filename:
|
||||
logger.info(f"URL filename match - Expected URL: {expected_url}, VLC shows filename: {file_info}")
|
||||
return 1
|
||||
|
||||
# Method 3: Check if both are URLs from the same domain and similar path
|
||||
if '://' in file_info: # file_info is also a URL
|
||||
actual_parsed = urlparse(file_info)
|
||||
# Same domain and similar path structure
|
||||
if (expected_parsed.netloc == actual_parsed.netloc and
|
||||
expected_parsed.path in actual_parsed.path):
|
||||
return 1
|
||||
except Exception as e:
|
||||
logger.debug(f"URL parsing error: {e}")
|
||||
pass
|
||||
|
||||
logger.warning(f"URL mismatch - Expected: {expected_url}, Found: {file_info}")
|
||||
return 0
|
||||
else:
|
||||
logger.warning(f"Could not find URL information in VLC status XML for rule: {rule}")
|
||||
return 0
|
||||
else:
|
||||
logger.error(f"Unknown type: {rule['type']}")
|
||||
return 0
|
||||
@@ -130,33 +224,67 @@ def compare_audios(audio_path_1, audio_path_2):
|
||||
Compare two audio files and return a similarity score in the range [0, 1].
|
||||
audio_path_1, audio_path_2: paths to the audio files to compare
|
||||
"""
|
||||
# Example Usage:
|
||||
# similarity = compare_audios_simple('path_to_audio1.mp3', 'path_to_audio2.mp3')
|
||||
# print(f'Similarity Score: {similarity}')
|
||||
|
||||
# Convert to common format if necessary and load audio
|
||||
if not audio_path_1 or not audio_path_2:
|
||||
return 0
|
||||
|
||||
# Load the audio files and extract MFCC features
|
||||
y1, sr1 = librosa.load(audio_path_1)
|
||||
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
|
||||
y1, y2 = None, None
|
||||
try:
|
||||
y1, sr1 = librosa.load(audio_path_1)
|
||||
except Exception:
|
||||
logger.warning(f"Could not load audio from {os.path.basename(audio_path_1)}. It might be empty or corrupt.")
|
||||
|
||||
y2, sr2 = librosa.load(audio_path_2)
|
||||
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
|
||||
try:
|
||||
y2, sr2 = librosa.load(audio_path_2)
|
||||
except Exception:
|
||||
logger.warning(f"Could not load audio from {os.path.basename(audio_path_2)}. It might be empty or corrupt.")
|
||||
|
||||
# Handle cases where one or both audio files are empty or corrupt.
|
||||
is_y1_bad = (y1 is None) or (y1.shape[0] == 0)
|
||||
is_y2_bad = (y2 is None) or (y2.shape[0] == 0)
|
||||
|
||||
if is_y1_bad and is_y2_bad:
|
||||
logger.info("Both audio files are empty or corrupt. Considering them perfectly similar.")
|
||||
return 1.0
|
||||
|
||||
if is_y1_bad or is_y2_bad:
|
||||
logger.warning(f"One audio file is empty/corrupt, the other is not. Similarity is 0.")
|
||||
return 0.0
|
||||
|
||||
try:
|
||||
logger.info(f"Audio 1 ({os.path.basename(audio_path_1)}): sr={sr1}, len={len(y1)}")
|
||||
logger.info(f"Audio 2 ({os.path.basename(audio_path_2)}): sr={sr2}, len={len(y2)}")
|
||||
|
||||
# Extract MFCC features
|
||||
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1)
|
||||
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during MFCC extraction: {e}")
|
||||
return 0.0
|
||||
|
||||
# Normalize the MFCC features
|
||||
mfcc1 = librosa.util.normalize(mfcc1, axis=1)
|
||||
mfcc2 = librosa.util.normalize(mfcc2, axis=1)
|
||||
logger.info(f"MFCCs normalized.")
|
||||
|
||||
# Define a lambda function to compute cosine distance
|
||||
dist_func = lambda x, y: cosine(x, y)
|
||||
|
||||
# Use the DTW algorithm to find the best alignment path
|
||||
distance, path = fastdtw(mfcc1.T, mfcc2.T, dist=dist_func)
|
||||
logger.info(f"DTW distance: {distance:.4f}, Path length: {len(path)}")
|
||||
|
||||
# Calculate the similarity score, here we use 1/(1+distance) to convert distance to a similarity score
|
||||
similarity = 1 / (1 + distance)
|
||||
# Normalize the DTW distance by the length of the alignment path.
|
||||
if len(path) == 0:
|
||||
normalized_distance = np.inf
|
||||
else:
|
||||
normalized_distance = distance / len(path)
|
||||
logger.info(f"Normalized DTW distance: {normalized_distance:.4f}")
|
||||
|
||||
# Convert the normalized distance to a similarity score using an exponential decay function.
|
||||
similarity = np.exp(-normalized_distance)
|
||||
|
||||
return similarity
|
||||
|
||||
@@ -204,32 +332,6 @@ def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=
|
||||
return 1.
|
||||
|
||||
|
||||
def are_audio_files_similar(mp3_file_path, mp4_file_path):
|
||||
# Extract audio fingerprint from MP3 file
|
||||
mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path)
|
||||
|
||||
# Extract the audio stream from the MP4 file
|
||||
mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3'
|
||||
try:
|
||||
subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3",
|
||||
mp4_audio_path], check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"An error occurred during audio extraction from MP4: {e}")
|
||||
return 0.
|
||||
|
||||
# Extract audio fingerprint from the extracted audio
|
||||
mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path)
|
||||
|
||||
# Clean up temporary extracted audio file
|
||||
os.remove(mp4_audio_path)
|
||||
|
||||
# Compare fingerprints (rudimentary comparison)
|
||||
if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint:
|
||||
return 1.
|
||||
|
||||
return 0.
|
||||
|
||||
|
||||
def check_qt_bgcone(actual_config_path, rule):
|
||||
with open(actual_config_path, 'rb') as file:
|
||||
config_file = file.read().decode('utf-8')
|
||||
|
||||
@@ -58,16 +58,20 @@ def check_json_settings(actual: str, expected: str, **options) -> float:
|
||||
if not actual:
|
||||
return 0.
|
||||
|
||||
with open(actual, 'r') as f:
|
||||
data = json.load(f)
|
||||
try:
|
||||
with open(actual, 'r') as f:
|
||||
data = json.load(f)
|
||||
except Exception as e:
|
||||
return 0.0
|
||||
|
||||
expect = expected['expected']
|
||||
data_copy = copy.deepcopy(data)
|
||||
data_copy.update(expect)
|
||||
if data == data_copy:
|
||||
return 1.0
|
||||
else:
|
||||
return 0.0
|
||||
|
||||
# Check if all expected key-value pairs are in the actual data
|
||||
for key, value in expect.items():
|
||||
if key not in data or data[key] != value:
|
||||
return 0.0
|
||||
|
||||
return 1.0
|
||||
|
||||
|
||||
def compare_text_file(actual: str, expected: str, **options) -> float:
|
||||
|
||||
@@ -5,6 +5,10 @@ import psutil
|
||||
import logging
|
||||
import dotenv
|
||||
import signal
|
||||
|
||||
|
||||
INSTANCE_TYPE = "t3.xlarge"
|
||||
|
||||
# Load environment variables from .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
@@ -31,37 +35,17 @@ logger.setLevel(logging.INFO)
|
||||
DEFAULT_REGION = "us-east-1"
|
||||
# todo: Add doc for the configuration of image, security group and network interface
|
||||
# todo: public the AMI images
|
||||
# ami-05e7d7bd279ea4f14
|
||||
IMAGE_ID_MAP = {
|
||||
"us-east-1": "ami-03a22c6e501415fb1",
|
||||
"us-east-1": "ami-0cae20d2680c939d4",
|
||||
"ap-east-1": "ami-0c092a5b8be4116f5",
|
||||
}
|
||||
|
||||
INSTANCE_TYPE = "t3.medium"
|
||||
|
||||
def _allocate_vm(region=DEFAULT_REGION):
|
||||
|
||||
if region not in IMAGE_ID_MAP:
|
||||
raise ValueError(f"Region {region} is not supported. Supported regions are: {list(IMAGE_ID_MAP.keys())}")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": IMAGE_ID_MAP[region],
|
||||
"InstanceType": INSTANCE_TYPE,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": os.getenv('AWS_SUBNET_ID'),
|
||||
"AssociatePublicIpAddress": True,
|
||||
"DeviceIndex": 0,
|
||||
"Groups": [
|
||||
os.getenv('AWS_SECURITY_GROUP_ID')
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
ec2_client = boto3.client('ec2', region_name=region)
|
||||
instance_id = None
|
||||
original_sigint_handler = signal.getsignal(signal.SIGINT)
|
||||
@@ -94,26 +78,76 @@ def _allocate_vm(region=DEFAULT_REGION):
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
|
||||
if not os.getenv('AWS_SECURITY_GROUP_ID'):
|
||||
raise ValueError("AWS_SECURITY_GROUP_ID is not set in the environment variables.")
|
||||
if not os.getenv('AWS_SUBNET_ID'):
|
||||
raise ValueError("AWS_SUBNET_ID is not set in the environment variables.")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": IMAGE_ID_MAP[region],
|
||||
"InstanceType": INSTANCE_TYPE,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": os.getenv('AWS_SUBNET_ID'),
|
||||
"AssociatePublicIpAddress": True,
|
||||
"DeviceIndex": 0,
|
||||
"Groups": [
|
||||
os.getenv('AWS_SECURITY_GROUP_ID')
|
||||
]
|
||||
}
|
||||
],
|
||||
"BlockDeviceMappings": [
|
||||
{
|
||||
"DeviceName": "/dev/sda1",
|
||||
"Ebs": {
|
||||
# "VolumeInitializationRate": 300
|
||||
"VolumeSize": 30, # Size in GB
|
||||
"VolumeType": "gp3", # General Purpose SSD
|
||||
"Throughput": 1000,
|
||||
"Iops": 4000 # Adjust IOPS as needed
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = ec2_client.run_instances(**run_instances_params)
|
||||
instance_id = response['Instances'][0]['InstanceId']
|
||||
|
||||
waiter = ec2_client.get_waiter('instance_running')
|
||||
logger.info(f"Waiting for instance {instance_id} to be running...")
|
||||
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
|
||||
waiter.wait(InstanceIds=[instance_id])
|
||||
logger.info(f"Instance {instance_id} is ready.")
|
||||
|
||||
# 获取并显示VNC访问地址
|
||||
try:
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC访问地址: {vnc_url}")
|
||||
print(f"📍 请在浏览器中打开上述地址进行远程桌面访问\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
|
||||
except KeyboardInterrupt:
|
||||
logger.warning("VM allocation interrupted by user (SIGINT).")
|
||||
raise
|
||||
except SystemExit:
|
||||
logger.warning("VM allocation terminated by parent process (SIGTERM).")
|
||||
if instance_id:
|
||||
logger.info(f"Terminating instance {instance_id} due to interruption.")
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to allocate VM in region {region}: {str(e)}")
|
||||
# try to clean up any resources that were created
|
||||
try:
|
||||
if instance_id:
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
logger.info(f"Terminated instance {instance_id} due to allocation failure.")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"May fail to clean up instance {instance_id}: {str(cleanup_error)}")
|
||||
logger.error(f"Failed to allocate VM: {e}", exc_info=True)
|
||||
if instance_id:
|
||||
logger.info(f"Terminating instance {instance_id} due to an error.")
|
||||
ec2_client.terminate_instances(InstanceIds=[instance_id])
|
||||
raise
|
||||
finally:
|
||||
# Restore original signal handlers
|
||||
@@ -153,6 +187,27 @@ def _allocate_vm_with_proxy(region=DEFAULT_REGION, proxy_config_file=None):
|
||||
subnet_id=os.getenv('AWS_SUBNET_ID')
|
||||
)
|
||||
|
||||
try:
|
||||
ec2_client = boto3.client('ec2', region_name=region)
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
if current_proxy:
|
||||
logger.info(f"🌐 Proxy: {current_proxy.host}:{current_proxy.port}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
if current_proxy:
|
||||
print(f"🔄 Current Proxy: {current_proxy.host}:{current_proxy.port}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for proxy instance {instance_id}: {e}")
|
||||
|
||||
return instance_id
|
||||
|
||||
|
||||
@@ -213,4 +268,4 @@ class AWSVMManager(VMManager):
|
||||
else:
|
||||
logger.info("Allocating a new VM in region: {}".format(region))
|
||||
new_vm_path = _allocate_vm(region)
|
||||
return new_vm_path
|
||||
return new_vm_path
|
||||
@@ -63,10 +63,24 @@ class AWSProvider(Provider):
|
||||
for reservation in response['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
private_ip_address = instance.get('PrivateIpAddress', '')
|
||||
public_ip_address = instance.get('PublicIpAddress', '')
|
||||
|
||||
if public_ip_address:
|
||||
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip_address}")
|
||||
logger.info(f"🏠 Private IP: {private_ip_address}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
else:
|
||||
logger.warning("No public IP address available for VNC access")
|
||||
|
||||
return private_ip_address
|
||||
return '' # Return an empty string if no IP address is found
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
|
||||
logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
|
||||
raise
|
||||
|
||||
def save_state(self, path_to_vm: str, snapshot_name: str):
|
||||
@@ -74,7 +88,7 @@ class AWSProvider(Provider):
|
||||
ec2_client = boto3.client('ec2', region_name=self.region)
|
||||
|
||||
try:
|
||||
image_response = ec2_client.create_image(InstanceId=path_to_vm, ImageId=snapshot_name)
|
||||
image_response = ec2_client.create_image(InstanceId=path_to_vm, Name=snapshot_name)
|
||||
image_id = image_response['ImageId']
|
||||
logger.info(f"AMI {image_id} created successfully from instance {path_to_vm}.")
|
||||
return image_id
|
||||
@@ -83,7 +97,7 @@ class AWSProvider(Provider):
|
||||
raise
|
||||
|
||||
def revert_to_snapshot(self, path_to_vm: str, snapshot_name: str):
|
||||
logger.info(f"Reverting AWS VM to snapshot: {snapshot_name}...")
|
||||
logger.info(f"Reverting AWS VM to snapshot AMI: {snapshot_name}...")
|
||||
ec2_client = boto3.client('ec2', region_name=self.region)
|
||||
|
||||
try:
|
||||
@@ -93,37 +107,65 @@ class AWSProvider(Provider):
|
||||
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
|
||||
subnet_id = instance['SubnetId']
|
||||
instance_type = instance['InstanceType']
|
||||
instance_snapshot = instance_details['Reservations'][0]['Instances'][0]['ImageId']
|
||||
|
||||
|
||||
# Step 2: Terminate the old instance
|
||||
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
|
||||
logger.info(f"Old instance {path_to_vm} has been terminated.")
|
||||
|
||||
# Step 3: Launch a new instance from the snapshot
|
||||
logger.info(f"Launching a new instance from snapshot {instance_snapshot}...")
|
||||
|
||||
|
||||
new_instance = ec2_client.run_instances(
|
||||
MaxCount = 1,
|
||||
MinCount = 1,
|
||||
ImageId = instance_snapshot,
|
||||
InstanceType = instance_type,
|
||||
EbsOptimized = True,
|
||||
NetworkInterfaces = [
|
||||
# Step 3: Launch a new instance from the snapshot(AMI) with performance optimization
|
||||
logger.info(f"Launching a new instance from AMI {snapshot_name}...")
|
||||
|
||||
run_instances_params = {
|
||||
"MaxCount": 1,
|
||||
"MinCount": 1,
|
||||
"ImageId": snapshot_name,
|
||||
"InstanceType": instance_type,
|
||||
"EbsOptimized": True,
|
||||
"NetworkInterfaces": [
|
||||
{
|
||||
"SubnetId": subnet_id,
|
||||
"AssociatePublicIpAddress": True,
|
||||
"DeviceIndex": 0,
|
||||
"Groups": security_groups
|
||||
}
|
||||
],
|
||||
"BlockDeviceMappings": [
|
||||
{
|
||||
"DeviceName": "/dev/sda1",
|
||||
"Ebs": {
|
||||
# "VolumeInitializationRate": 300
|
||||
"VolumeSize": 30, # Size in GB
|
||||
"VolumeType": "gp3", # General Purpose SSD
|
||||
"Throughput": 1000,
|
||||
"Iops": 4000 # Adjust IOPS as needed
|
||||
}
|
||||
}
|
||||
]
|
||||
)
|
||||
}
|
||||
|
||||
new_instance = ec2_client.run_instances(**run_instances_params)
|
||||
new_instance_id = new_instance['Instances'][0]['InstanceId']
|
||||
logger.info(f"New instance {new_instance_id} launched from snapshot {snapshot_name}.")
|
||||
logger.info(f"New instance {new_instance_id} launched from AMI {snapshot_name}.")
|
||||
logger.info(f"Waiting for instance {new_instance_id} to be running...")
|
||||
ec2_client.get_waiter('instance_running').wait(InstanceIds=[new_instance_id])
|
||||
|
||||
logger.info(f"Instance {new_instance_id} is ready.")
|
||||
|
||||
try:
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[new_instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ New Instance VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 New Instance ID: {new_instance_id}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 New Instance VNC Web Access URL: {vnc_url}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for new instance {new_instance_id}: {e}")
|
||||
|
||||
return new_instance_id
|
||||
|
||||
|
||||
@@ -163,16 +163,34 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
|
||||
|
||||
logger.info(f"Created new instance {instance_id} with proxy configuration")
|
||||
|
||||
# 等待实例运行
|
||||
logger.info(f"Waiting for instance {instance_id} to be running...")
|
||||
ec2_client.get_waiter('instance_running').wait(InstanceIds=[instance_id])
|
||||
logger.info(f"Instance {instance_id} is ready.")
|
||||
|
||||
try:
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[instance_id])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
public_ip = instance.get('PublicIpAddress', '')
|
||||
if public_ip:
|
||||
vnc_url = f"http://{public_ip}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip}")
|
||||
logger.info(f"🆔 Instance ID: {instance_id}")
|
||||
if self.current_proxy:
|
||||
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
if self.current_proxy:
|
||||
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to get VNC address for instance {instance_id}: {e}")
|
||||
|
||||
return instance_id
|
||||
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to create instance with proxy: {str(e)}")
|
||||
# 如果当前代理失败,尝试轮换代理
|
||||
if self.current_proxy:
|
||||
proxy_pool = get_global_proxy_pool()
|
||||
proxy_pool.mark_proxy_failed(self.current_proxy)
|
||||
@@ -188,10 +206,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
|
||||
for reservation in response['Reservations']:
|
||||
for instance in reservation['Instances']:
|
||||
private_ip_address = instance.get('PrivateIpAddress', '')
|
||||
public_ip_address = instance.get('PublicIpAddress', '')
|
||||
|
||||
if public_ip_address:
|
||||
vnc_url = f"http://{public_ip_address}:5910/vnc.html"
|
||||
logger.info("="*80)
|
||||
logger.info(f"🖥️ VNC Web Access URL: {vnc_url}")
|
||||
logger.info(f"📡 Public IP: {public_ip_address}")
|
||||
logger.info(f"🏠 Private IP: {private_ip_address}")
|
||||
if self.current_proxy:
|
||||
logger.info(f"🌐 Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
logger.info("="*80)
|
||||
print(f"\n🌐 VNC Web Access URL: {vnc_url}")
|
||||
if self.current_proxy:
|
||||
print(f"🔄 Current Proxy: {self.current_proxy.host}:{self.current_proxy.port}")
|
||||
print(f"📍 Please open the above address in the browser for remote desktop access\n")
|
||||
else:
|
||||
logger.warning("No public IP address available for VNC access")
|
||||
|
||||
return private_ip_address
|
||||
return ''
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to retrieve private IP address for the instance {path_to_vm}: {str(e)}")
|
||||
logger.error(f"Failed to retrieve IP address for the instance {path_to_vm}: {str(e)}")
|
||||
raise
|
||||
|
||||
def save_state(self, path_to_vm: str, snapshot_name: str):
|
||||
@@ -212,24 +248,28 @@ echo "$(date): Configured proxy {self.current_proxy.host}:{self.current_proxy.po
|
||||
ec2_client = boto3.client('ec2', region_name=self.region)
|
||||
|
||||
try:
|
||||
# 获取原实例详情
|
||||
# Get original instance details for config.
|
||||
instance_details = ec2_client.describe_instances(InstanceIds=[path_to_vm])
|
||||
instance = instance_details['Reservations'][0]['Instances'][0]
|
||||
security_groups = [sg['GroupId'] for sg in instance['SecurityGroups']]
|
||||
subnet_id = instance['SubnetId']
|
||||
instance_type = instance['InstanceType']
|
||||
|
||||
# 终止旧实例
|
||||
# Terminate the old instance. This is a non-blocking call.
|
||||
logger.info(f"Initiating termination for old instance {path_to_vm}...")
|
||||
ec2_client.terminate_instances(InstanceIds=[path_to_vm])
|
||||
logger.info(f"Old instance {path_to_vm} has been terminated.")
|
||||
logger.info(f"Old instance {path_to_vm} termination initiated.")
|
||||
|
||||
# 轮换到新的代理
|
||||
# Rotate to a new proxy
|
||||
self._rotate_proxy()
|
||||
|
||||
# 创建新实例
|
||||
# Create a new instance
|
||||
new_instance_id = self.create_instance_with_proxy(
|
||||
snapshot_name, instance_type, security_groups, subnet_id
|
||||
)
|
||||
|
||||
# Note: VNC address is displayed within create_instance_with_proxy
|
||||
logger.info(f"Successfully launched new instance {new_instance_id} for revert.")
|
||||
|
||||
return new_instance_id
|
||||
|
||||
|
||||
@@ -256,6 +256,8 @@ def _install_vm(vm_name, vms_dir, downloaded_file_name, os_type, original_vm_nam
|
||||
|
||||
# Try downloading the screenshot until successful
|
||||
while not download_screenshot(vm_ip):
|
||||
# Try to get the IP again in case it has changed
|
||||
vm_ip = get_vm_ip(vm_path)
|
||||
logger.info("Check whether the virtual machine is ready...")
|
||||
|
||||
logger.info("Virtual machine is ready. Start to make a snapshot on the virtual machine. It would take a while...")
|
||||
|
||||
@@ -4,6 +4,7 @@ import platform
|
||||
import shlex
|
||||
import json
|
||||
import subprocess, signal
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional, Sequence
|
||||
from typing import List, Dict, Tuple, Literal
|
||||
@@ -65,6 +66,8 @@ app = Flask(__name__)
|
||||
pyautogui.PAUSE = 0
|
||||
pyautogui.DARWIN_CATCH_UP_TIME = 0
|
||||
|
||||
TIMEOUT = 1800 # seconds
|
||||
|
||||
logger = app.logger
|
||||
recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process
|
||||
recording_path = "/tmp/recording.mp4"
|
||||
@@ -114,6 +117,113 @@ def execute_command():
|
||||
}), 500
|
||||
|
||||
|
||||
@app.route('/setup/execute_with_verification', methods=['POST'])
|
||||
@app.route('/execute_with_verification', methods=['POST'])
|
||||
def execute_command_with_verification():
|
||||
"""Execute command and verify the result based on provided verification criteria"""
|
||||
data = request.json
|
||||
shell = data.get('shell', False)
|
||||
command = data.get('command', "" if shell else [])
|
||||
verification = data.get('verification', {})
|
||||
max_wait_time = data.get('max_wait_time', 10) # Maximum wait time in seconds
|
||||
check_interval = data.get('check_interval', 1) # Check interval in seconds
|
||||
|
||||
if isinstance(command, str) and not shell:
|
||||
command = shlex.split(command)
|
||||
|
||||
# Expand user directory
|
||||
for i, arg in enumerate(command):
|
||||
if arg.startswith("~/"):
|
||||
command[i] = os.path.expanduser(arg)
|
||||
|
||||
# Execute the main command
|
||||
try:
|
||||
if platform_name == "Windows":
|
||||
flags = subprocess.CREATE_NO_WINDOW
|
||||
else:
|
||||
flags = 0
|
||||
result = subprocess.run(
|
||||
command,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
shell=shell,
|
||||
text=True,
|
||||
timeout=120,
|
||||
creationflags=flags,
|
||||
)
|
||||
|
||||
# If no verification is needed, return immediately
|
||||
if not verification:
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'output': result.stdout,
|
||||
'error': result.stderr,
|
||||
'returncode': result.returncode
|
||||
})
|
||||
|
||||
# Wait and verify the result
|
||||
import time
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < max_wait_time:
|
||||
verification_passed = True
|
||||
|
||||
# Check window existence if specified
|
||||
if 'window_exists' in verification:
|
||||
window_name = verification['window_exists']
|
||||
try:
|
||||
if platform_name == 'Linux':
|
||||
wmctrl_result = subprocess.run(['wmctrl', '-l'],
|
||||
capture_output=True, text=True, check=True)
|
||||
if window_name.lower() not in wmctrl_result.stdout.lower():
|
||||
verification_passed = False
|
||||
elif platform_name in ['Windows', 'Darwin']:
|
||||
import pygetwindow as gw
|
||||
windows = gw.getWindowsWithTitle(window_name)
|
||||
if not windows:
|
||||
verification_passed = False
|
||||
except Exception:
|
||||
verification_passed = False
|
||||
|
||||
# Check command execution if specified
|
||||
if 'command_success' in verification:
|
||||
verify_cmd = verification['command_success']
|
||||
try:
|
||||
verify_result = subprocess.run(verify_cmd, shell=True,
|
||||
capture_output=True, text=True, timeout=5)
|
||||
if verify_result.returncode != 0:
|
||||
verification_passed = False
|
||||
except Exception:
|
||||
verification_passed = False
|
||||
|
||||
if verification_passed:
|
||||
return jsonify({
|
||||
'status': 'success',
|
||||
'output': result.stdout,
|
||||
'error': result.stderr,
|
||||
'returncode': result.returncode,
|
||||
'verification': 'passed',
|
||||
'wait_time': time.time() - start_time
|
||||
})
|
||||
|
||||
time.sleep(check_interval)
|
||||
|
||||
# Verification failed
|
||||
return jsonify({
|
||||
'status': 'verification_failed',
|
||||
'output': result.stdout,
|
||||
'error': result.stderr,
|
||||
'returncode': result.returncode,
|
||||
'verification': 'failed',
|
||||
'wait_time': max_wait_time
|
||||
}), 500
|
||||
|
||||
except Exception as e:
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': str(e)
|
||||
}), 500
|
||||
|
||||
|
||||
def _get_machine_architecture() -> str:
|
||||
""" Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
|
||||
"""
|
||||
@@ -202,8 +312,8 @@ def capture_screen_with_cursor():
|
||||
pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty))
|
||||
|
||||
img.paste(cursor, pos, cursor)
|
||||
except:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to capture cursor on Windows, screenshot will not have a cursor. Error: {e}")
|
||||
|
||||
img.save(file_path)
|
||||
elif user_platform == "Linux":
|
||||
@@ -1025,11 +1135,21 @@ def get_file():
|
||||
return jsonify({"error": "file_path is required"}), 400
|
||||
|
||||
try:
|
||||
# Check if the file exists and get its size
|
||||
if not os.path.exists(file_path):
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
|
||||
file_size = os.path.getsize(file_path)
|
||||
logger.info(f"Serving file: {file_path} ({file_size} bytes)")
|
||||
|
||||
# Check if the file exists and send it to the user
|
||||
return send_file(file_path, as_attachment=True)
|
||||
except FileNotFoundError:
|
||||
# If the file is not found, return a 404 error
|
||||
return jsonify({"error": "File not found"}), 404
|
||||
except Exception as e:
|
||||
logger.error(f"Error serving file {file_path}: {e}")
|
||||
return jsonify({"error": f"Failed to serve file: {str(e)}"}), 500
|
||||
|
||||
|
||||
@app.route("/setup/upload", methods=["POST"])
|
||||
@@ -1038,8 +1158,27 @@ def upload_file():
|
||||
if 'file_path' in request.form and 'file_data' in request.files:
|
||||
file_path = os.path.expandvars(os.path.expanduser(request.form['file_path']))
|
||||
file = request.files["file_data"]
|
||||
file.save(file_path)
|
||||
return "File Uploaded"
|
||||
|
||||
try:
|
||||
# Ensure target directory exists
|
||||
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
||||
|
||||
# Save file and get size for verification
|
||||
file.save(file_path)
|
||||
uploaded_size = os.path.getsize(file_path)
|
||||
|
||||
logger.info(f"File uploaded successfully: {file_path} ({uploaded_size} bytes)")
|
||||
return f"File Uploaded: {uploaded_size} bytes"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading file to {file_path}: {e}")
|
||||
# Clean up partial file if it exists
|
||||
if os.path.exists(file_path):
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except:
|
||||
pass
|
||||
return jsonify({"error": f"Failed to upload file: {str(e)}"}), 500
|
||||
else:
|
||||
return jsonify({"error": "file_path and file_data are required"}), 400
|
||||
|
||||
@@ -1098,20 +1237,45 @@ def download_file():
|
||||
|
||||
max_retries = 3
|
||||
error: Optional[Exception] = None
|
||||
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
logger.info(f"Download attempt {i+1}/{max_retries} for {url}")
|
||||
response = requests.get(url, stream=True, timeout=300)
|
||||
response.raise_for_status()
|
||||
|
||||
# Get expected file size if available
|
||||
total_size = int(response.headers.get('content-length', 0))
|
||||
if total_size > 0:
|
||||
logger.info(f"Expected file size: {total_size / (1024*1024):.2f} MB")
|
||||
|
||||
downloaded_size = 0
|
||||
with open(path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
return "File downloaded successfully"
|
||||
downloaded_size += len(chunk)
|
||||
if total_size > 0 and downloaded_size % (1024*1024) == 0: # Log every MB
|
||||
progress = (downloaded_size / total_size) * 100
|
||||
logger.info(f"Download progress: {progress:.1f}%")
|
||||
|
||||
# Verify download completeness
|
||||
actual_size = os.path.getsize(path)
|
||||
if total_size > 0 and actual_size != total_size:
|
||||
raise Exception(f"Download incomplete. Expected {total_size} bytes, got {actual_size} bytes")
|
||||
|
||||
logger.info(f"File downloaded successfully: {path} ({actual_size} bytes)")
|
||||
return f"File downloaded successfully: {actual_size} bytes"
|
||||
|
||||
except requests.RequestException as e:
|
||||
except (requests.RequestException, Exception) as e:
|
||||
error = e
|
||||
logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
|
||||
logger.error(f"Failed to download {url}: {e}. Retrying... ({max_retries - i - 1} attempts left)")
|
||||
# Clean up partial download
|
||||
if path.exists():
|
||||
try:
|
||||
path.unlink()
|
||||
except:
|
||||
pass
|
||||
|
||||
return f"Failed to download {url}. No retries left. Error: {error}", 500
|
||||
|
||||
@@ -1124,18 +1288,88 @@ def open_file():
|
||||
if not path:
|
||||
return "Path not supplied!", 400
|
||||
|
||||
path = Path(os.path.expandvars(os.path.expanduser(path)))
|
||||
path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
|
||||
|
||||
if not path.exists():
|
||||
return f"File not found: {path}", 404
|
||||
# Check if it's a file path that exists
|
||||
is_file_path = path_obj.exists()
|
||||
|
||||
# If it's not a file path, treat it as an application name/command
|
||||
if not is_file_path:
|
||||
# Check if it's a valid command by trying to find it in PATH
|
||||
import shutil
|
||||
if not shutil.which(path):
|
||||
return f"Application/file not found: {path}", 404
|
||||
|
||||
try:
|
||||
if platform.system() == "Windows":
|
||||
os.startfile(path)
|
||||
if is_file_path:
|
||||
# Handle file opening
|
||||
if platform.system() == "Windows":
|
||||
os.startfile(path_obj)
|
||||
else:
|
||||
open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
|
||||
subprocess.Popen([open_cmd, str(path_obj)])
|
||||
file_name = path_obj.name
|
||||
file_name_without_ext, _ = os.path.splitext(file_name)
|
||||
else:
|
||||
open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
|
||||
subprocess.Popen([open_cmd, str(path)])
|
||||
return "File opened successfully"
|
||||
# Handle application launching
|
||||
if platform.system() == "Windows":
|
||||
subprocess.Popen([path])
|
||||
else:
|
||||
subprocess.Popen([path])
|
||||
file_name = path
|
||||
file_name_without_ext = path
|
||||
|
||||
# Wait for the file/application to open
|
||||
|
||||
start_time = time.time()
|
||||
window_found = False
|
||||
|
||||
while time.time() - start_time < TIMEOUT:
|
||||
os_name = platform.system()
|
||||
if os_name in ['Windows', 'Darwin']:
|
||||
import pygetwindow as gw
|
||||
# Check for window title containing file name or file name without extension
|
||||
windows = gw.getWindowsWithTitle(file_name)
|
||||
if not windows:
|
||||
windows = gw.getWindowsWithTitle(file_name_without_ext)
|
||||
|
||||
if windows:
|
||||
# To be more specific, we can try to activate it
|
||||
windows[0].activate()
|
||||
window_found = True
|
||||
break
|
||||
elif os_name == 'Linux':
|
||||
try:
|
||||
# Using wmctrl to list windows and check if any window title contains the filename
|
||||
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True)
|
||||
window_list = result.stdout.strip().split('\n')
|
||||
if not result.stdout.strip():
|
||||
pass # No windows, just continue waiting
|
||||
else:
|
||||
for window in window_list:
|
||||
if file_name in window or file_name_without_ext in window:
|
||||
# a window is found, now activate it
|
||||
window_id = window.split()[0]
|
||||
subprocess.run(['wmctrl', '-i', '-a', window_id], check=True)
|
||||
window_found = True
|
||||
break
|
||||
if window_found:
|
||||
break
|
||||
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||
# wmctrl might not be installed or the window manager isn't ready.
|
||||
# We just log it once and let the main loop retry.
|
||||
if 'wmctrl_failed_once' not in locals():
|
||||
logger.warning("wmctrl command is not ready, will keep retrying...")
|
||||
wmctrl_failed_once = True
|
||||
pass # Let the outer loop retry
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
if window_found:
|
||||
return "File opened and window activated successfully"
|
||||
else:
|
||||
return f"Failed to find window for {file_name} within {timeout} seconds.", 500
|
||||
|
||||
except Exception as e:
|
||||
return f"Failed to open {path}. Error: {e}", 500
|
||||
|
||||
@@ -1258,37 +1492,78 @@ def close_window():
|
||||
@app.route('/start_recording', methods=['POST'])
|
||||
def start_recording():
|
||||
global recording_process
|
||||
if recording_process:
|
||||
if recording_process and recording_process.poll() is None:
|
||||
return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400
|
||||
|
||||
# Clean up previous recording if it exists
|
||||
if os.path.exists(recording_path):
|
||||
try:
|
||||
os.remove(recording_path)
|
||||
except OSError as e:
|
||||
logger.error(f"Error removing old recording file: {e}")
|
||||
return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500
|
||||
|
||||
d = display.Display()
|
||||
screen_width = d.screen().width_in_pixels
|
||||
screen_height = d.screen().height_in_pixels
|
||||
|
||||
start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
|
||||
|
||||
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL)
|
||||
# Use stderr=PIPE to capture potential errors from ffmpeg
|
||||
recording_process = subprocess.Popen(shlex.split(start_command),
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True # To get stderr as string
|
||||
)
|
||||
|
||||
return jsonify({'status': 'success', 'message': 'Started recording.'})
|
||||
# Wait a couple of seconds to see if ffmpeg starts successfully
|
||||
try:
|
||||
# Wait for 2 seconds. If ffmpeg exits within this time, it's an error.
|
||||
recording_process.wait(timeout=2)
|
||||
# If wait() returns, it means the process has terminated.
|
||||
error_output = recording_process.stderr.read()
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}'
|
||||
}), 500
|
||||
except subprocess.TimeoutExpired:
|
||||
# This is the expected outcome: the process is still running after 2 seconds.
|
||||
return jsonify({'status': 'success', 'message': 'Started recording successfully.'})
|
||||
|
||||
|
||||
@app.route('/end_recording', methods=['POST'])
|
||||
def end_recording():
|
||||
global recording_process
|
||||
|
||||
if not recording_process:
|
||||
if not recording_process or recording_process.poll() is not None:
|
||||
recording_process = None # Clean up stale process object
|
||||
return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
|
||||
|
||||
recording_process.send_signal(signal.SIGINT)
|
||||
recording_process.wait()
|
||||
recording_process = None
|
||||
error_output = ""
|
||||
try:
|
||||
# Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
|
||||
recording_process.send_signal(signal.SIGINT)
|
||||
# Wait for ffmpeg to terminate. communicate() gets output and waits.
|
||||
_, error_output = recording_process.communicate(timeout=15)
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.error("ffmpeg did not respond to SIGINT, killing the process.")
|
||||
recording_process.kill()
|
||||
# After killing, communicate to get any remaining output.
|
||||
_, error_output = recording_process.communicate()
|
||||
recording_process = None
|
||||
return jsonify({
|
||||
'status': 'error',
|
||||
'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
|
||||
}), 500
|
||||
|
||||
# return recording video file
|
||||
if os.path.exists(recording_path):
|
||||
recording_process = None # Clear the process from global state
|
||||
|
||||
# Check if the recording file was created and is not empty.
|
||||
if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
|
||||
return send_file(recording_path, as_attachment=True)
|
||||
else:
|
||||
return abort(404, description="Recording failed")
|
||||
logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
|
||||
return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -27,17 +27,57 @@
|
||||
"libreoffice_writer"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "compare_pdfs",
|
||||
"expected": {
|
||||
"func": [
|
||||
"compare_pdfs",
|
||||
"compare_pdfs",
|
||||
"compare_pdfs",
|
||||
"compare_pdfs"
|
||||
],
|
||||
"conj": "or",
|
||||
"expected": [
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines_Gold_1.pdf"
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines_Gold_2.pdf"
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines_Gold.pdf"
|
||||
"dest": "Constitution_Template_With_Guidelines_Gold_3.pdf"
|
||||
},
|
||||
"result": {
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/4bcb1253-a636-4df4-8cb0-a35c04dfef31/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines_Gold_4.pdf"
|
||||
}
|
||||
],
|
||||
"result": [
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines.pdf"
|
||||
}
|
||||
"dest": "Constitution_Template_With_Guidelines_1.pdf"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Documents/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines_2.pdf"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Downloads/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines_3.pdf"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/View_Person_Organizational_Summary.pdf",
|
||||
"dest": "Constitution_Template_With_Guidelines_4.pdf"
|
||||
}
|
||||
]
|
||||
},
|
||||
"proxy": false
|
||||
}
|
||||
@@ -38,7 +38,7 @@
|
||||
"command": [
|
||||
"python",
|
||||
"-c",
|
||||
"import pyautogui; import time; time.sleep(5); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)"
|
||||
"import pyautogui; import time; time.sleep(15); pyautogui.press(\"down\", presses=8, interval=0.01); time.sleep(1); pyautogui.scroll(-2)"
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -68,12 +68,12 @@
|
||||
"command": [
|
||||
"python",
|
||||
"-c",
|
||||
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); pyautogui.press('down'); time.sleep(0.5); pyautogui.press('enter');"
|
||||
"import pyautogui; import time; time.sleep(1); pyautogui.hotkey('ctrl', 's'); time.sleep(3);"
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": "compare_contains_image",
|
||||
"func": "compare_docx_images",
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/Viewing_Your_Class_Schedule_and_Textbooks.docx",
|
||||
|
||||
@@ -52,7 +52,7 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": "compare_docx_lines",
|
||||
"func": "compare_unique_train_records",
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/HK_train_record.docx",
|
||||
@@ -60,8 +60,16 @@
|
||||
},
|
||||
"expected": {
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
|
||||
"dest": "HK_train_record_Gold.docx"
|
||||
"path": [
|
||||
"https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record_Gold.docx",
|
||||
"https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/6f81754e-285d-4ce0-b59e-af7edb02d108/HK_train_record.docx"
|
||||
],
|
||||
"dest": [
|
||||
"HK_train_record_Gold.docx",
|
||||
"HK_train_record_Original.docx"
|
||||
],
|
||||
"multi": true,
|
||||
"gives": [0, 1]
|
||||
}
|
||||
},
|
||||
"proxy": false
|
||||
|
||||
@@ -52,20 +52,57 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": "compare_docx_files",
|
||||
"expected": {
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines_Gold.docx"
|
||||
},
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines.docx"
|
||||
},
|
||||
"options": {
|
||||
"ignore_blanks": false
|
||||
}
|
||||
"func": [
|
||||
"compare_docx_files",
|
||||
"compare_docx_files",
|
||||
"compare_docx_files"
|
||||
],
|
||||
"conj": "or",
|
||||
"expected": [
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_1.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines_Gold_1.docx"
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_2.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines_Gold_2.docx"
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/88fe4b2d-3040-4c70-9a70-546a47764b48/CCCH9003_Tutorial_guidelines_Gold_3.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines_Gold_3.docx"
|
||||
}
|
||||
],
|
||||
"result": [
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines.docx"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines.docx"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/CCCH9003_Tutorial_guidelines.docx",
|
||||
"dest": "CCCH9003_Tutorial_guidelines.docx"
|
||||
}
|
||||
],
|
||||
"options": [
|
||||
{
|
||||
"ignore_blanks": false
|
||||
},
|
||||
{
|
||||
"ignore_blanks": false
|
||||
},
|
||||
{
|
||||
"ignore_blanks": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"proxy": false
|
||||
}
|
||||
@@ -47,22 +47,40 @@
|
||||
"command": [
|
||||
"python",
|
||||
"-c",
|
||||
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(0.5); "
|
||||
"import pyautogui; import time; pyautogui.hotkey('ctrl', 's'); time.sleep(2); "
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"func": "compare_docx_tables",
|
||||
"expected": {
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx",
|
||||
"dest": "Graphemes_Sound_Letter_Patterns_Gold.docx"
|
||||
},
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
|
||||
"dest": "Graphemes_Sound_Letter_Patterns.docx"
|
||||
}
|
||||
"func": [
|
||||
"compare_docx_tables",
|
||||
"compare_docx_tables"
|
||||
],
|
||||
"conj": "or",
|
||||
"expected": [
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold.docx",
|
||||
"dest": "Graphemes_Sound_Letter_Patterns_Gold.docx"
|
||||
},
|
||||
{
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/libreoffice_writer/936321ce-5236-426a-9a20-e0e3c5dc536f/Graphemes_Sound_Letter_Patterns_Gold_2.docx",
|
||||
"dest": "Graphemes_Sound_Letter_Patterns_Gold_2.docx"
|
||||
}
|
||||
],
|
||||
"result": [
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
|
||||
"dest": "Graphemes_Sound_Letter_Patterns.docx"
|
||||
},
|
||||
{
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/Graphemes_Sound_Letter_Patterns.docx",
|
||||
"dest": "Graphemes_Sound_Letter_Patterns.docx"
|
||||
}
|
||||
]
|
||||
},
|
||||
"proxy": false
|
||||
}
|
||||
@@ -50,7 +50,7 @@
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"include": [
|
||||
"54\n"
|
||||
"54"
|
||||
],
|
||||
"exclude": []
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"id": "bba3381f-b5eb-4439-bd9e-80c22218d5a7",
|
||||
"snapshot": "base_setup",
|
||||
"instruction": "Can you start streaming the video from this link for me? https://www.youtube.com/watch?v=pgBsyTKAwLw",
|
||||
"source": "https://www.quora.com/How-do-I-play-online-videos-using-the-VLC-media-player",
|
||||
"instruction": "Can you start streaming the video from this link for me? https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8",
|
||||
"source": "https://developer.apple.com/streaming/examples/ - Apple HLS test streams for developers",
|
||||
"config": [
|
||||
{
|
||||
"type": "launch",
|
||||
@@ -32,7 +32,7 @@
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"type": "url",
|
||||
"url": "https://www.youtube.com/watch?v=pgBsyTKAwLw"
|
||||
"url": "https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "276cc624-87ea-4f08-ab93-f770e3790175",
|
||||
"snapshot": "vscode",
|
||||
"instruction": "Please help me set the current user's line length to 50 characters in VS Code.",
|
||||
"instruction": "Please help me set the current user's line length for code wrapping to 50 characters in VS Code.",
|
||||
"source": "https://www.quora.com/unanswered/How-do-you-set-the-line-length-in-Visual-Studio-Code",
|
||||
"config": [
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "7c4cc09e-7a92-40dd-8338-b2286535c4ed",
|
||||
"snapshot": "vscode",
|
||||
"instruction": "Please help me change the display language of VS Code to \"Arabic\".",
|
||||
"instruction": "Please help me change the display language of VS Code to \"Arabic\" without using any extensions.",
|
||||
"source": "",
|
||||
"config": [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user