Robust Evaluation, Blocking File Open, Grader Sensitivity, and LibreOffice Writer Fixes (#217)

* Refactor evaluator structure in LibreOffice Writer example JSON to support multiple expected and result files, enhancing evaluation flexibility.

* Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities.

* Update instance type to t3.large and add VNC access URL logging for allocated VMs, enhancing remote access capabilities.

* Update time format in get_vm_file function to include hours, minutes, and seconds for more precise file naming with time suffix.

* More delay for 936321ce-5236-426a-9a20-e0e3c5dc536f; support one more potential solutions.

* Enhance SetupController with configurable retry limit and improved error handling for file opening requests. Introduce new function to compare unique training records, and update logging for better debugging. Adjust JSON examples for evaluation to support multiple expected and result files.

* Clean debug code

---------

Co-authored-by: yuanmengqi <yuanmengqi@mail.ustc.edu.cn>
This commit is contained in:
Tianbao Xie
2025-06-16 21:37:19 +08:00
committed by GitHub
parent 347238e17e
commit 4e11eafd1d
13 changed files with 523 additions and 135 deletions

View File

@@ -4,6 +4,7 @@ import platform
import shlex
import json
import subprocess, signal
import time
from pathlib import Path
from typing import Any, Optional, Sequence
from typing import List, Dict, Tuple, Literal
@@ -65,6 +66,8 @@ app = Flask(__name__)
pyautogui.PAUSE = 0
pyautogui.DARWIN_CATCH_UP_TIME = 0
TIMEOUT = 1800 # seconds
logger = app.logger
recording_process = None # fixme: this is a temporary solution for recording, need to be changed to support multiple-process
recording_path = "/tmp/recording.mp4"
@@ -202,8 +205,8 @@ def capture_screen_with_cursor():
pos = (round(pos_win[0]*ratio - hotspotx), round(pos_win[1]*ratio - hotspoty))
img.paste(cursor, pos, cursor)
except:
pass
except Exception as e:
logger.warning(f"Failed to capture cursor on Windows, screenshot will not have a cursor. Error: {e}")
img.save(file_path)
elif user_platform == "Linux":
@@ -1124,18 +1127,72 @@ def open_file():
if not path:
return "Path not supplied!", 400
path = Path(os.path.expandvars(os.path.expanduser(path)))
path_obj = Path(os.path.expandvars(os.path.expanduser(path)))
if not path.exists():
return f"File not found: {path}", 404
if not path_obj.exists():
return f"File not found: {path_obj}", 404
try:
if platform.system() == "Windows":
os.startfile(path)
os.startfile(path_obj)
else:
open_cmd: str = "open" if platform.system() == "Darwin" else "xdg-open"
subprocess.Popen([open_cmd, str(path)])
return "File opened successfully"
subprocess.Popen([open_cmd, str(path_obj)])
# Wait for the file to open
file_name = path_obj.name
# Some apps don't include the extension in the title
file_name_without_ext, _ = os.path.splitext(file_name)
start_time = time.time()
window_found = False
while time.time() - start_time < TIMEOUT:
os_name = platform.system()
if os_name in ['Windows', 'Darwin']:
import pygetwindow as gw
# Check for window title containing file name or file name without extension
windows = gw.getWindowsWithTitle(file_name)
if not windows:
windows = gw.getWindowsWithTitle(file_name_without_ext)
if windows:
# To be more specific, we can try to activate it
windows[0].activate()
window_found = True
break
elif os_name == 'Linux':
try:
# Using wmctrl to list windows and check if any window title contains the filename
result = subprocess.run(['wmctrl', '-l'], capture_output=True, text=True, check=True)
window_list = result.stdout.strip().split('\n')
if not result.stdout.strip():
pass # No windows, just continue waiting
else:
for window in window_list:
if file_name in window or file_name_without_ext in window:
# a window is found, now activate it
window_id = window.split()[0]
subprocess.run(['wmctrl', '-i', '-a', window_id], check=True)
window_found = True
break
if window_found:
break
except (subprocess.CalledProcessError, FileNotFoundError):
# wmctrl might not be installed or the window manager isn't ready.
# We just log it once and let the main loop retry.
if 'wmctrl_failed_once' not in locals():
logger.warning("wmctrl command is not ready, will keep retrying...")
wmctrl_failed_once = True
pass # Let the outer loop retry
time.sleep(1)
if window_found:
return "File opened and window activated successfully"
else:
return f"Failed to find window for {file_name} within {timeout} seconds.", 500
except Exception as e:
return f"Failed to open {path}. Error: {e}", 500
@@ -1258,37 +1315,78 @@ def close_window():
@app.route('/start_recording', methods=['POST'])
def start_recording():
global recording_process
if recording_process:
if recording_process and recording_process.poll() is None:
return jsonify({'status': 'error', 'message': 'Recording is already in progress.'}), 400
# Clean up previous recording if it exists
if os.path.exists(recording_path):
try:
os.remove(recording_path)
except OSError as e:
logger.error(f"Error removing old recording file: {e}")
return jsonify({'status': 'error', 'message': f'Failed to remove old recording file: {e}'}), 500
d = display.Display()
screen_width = d.screen().width_in_pixels
screen_height = d.screen().height_in_pixels
start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
# Use stderr=PIPE to capture potential errors from ffmpeg
recording_process = subprocess.Popen(shlex.split(start_command),
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True # To get stderr as string
)
return jsonify({'status': 'success', 'message': 'Started recording.'})
# Wait a couple of seconds to see if ffmpeg starts successfully
try:
# Wait for 2 seconds. If ffmpeg exits within this time, it's an error.
recording_process.wait(timeout=2)
# If wait() returns, it means the process has terminated.
error_output = recording_process.stderr.read()
return jsonify({
'status': 'error',
'message': f'Failed to start recording. ffmpeg terminated unexpectedly. Error: {error_output}'
}), 500
except subprocess.TimeoutExpired:
# This is the expected outcome: the process is still running after 2 seconds.
return jsonify({'status': 'success', 'message': 'Started recording successfully.'})
@app.route('/end_recording', methods=['POST'])
def end_recording():
global recording_process
if not recording_process:
if not recording_process or recording_process.poll() is not None:
recording_process = None # Clean up stale process object
return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
error_output = ""
try:
# Send SIGINT for a graceful shutdown, allowing ffmpeg to finalize the file.
recording_process.send_signal(signal.SIGINT)
recording_process.wait()
# Wait for ffmpeg to terminate. communicate() gets output and waits.
_, error_output = recording_process.communicate(timeout=15)
except subprocess.TimeoutExpired:
logger.error("ffmpeg did not respond to SIGINT, killing the process.")
recording_process.kill()
# After killing, communicate to get any remaining output.
_, error_output = recording_process.communicate()
recording_process = None
return jsonify({
'status': 'error',
'message': f'Recording process was unresponsive and had to be killed. Stderr: {error_output}'
}), 500
# return recording video file
if os.path.exists(recording_path):
recording_process = None # Clear the process from global state
# Check if the recording file was created and is not empty.
if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
return send_file(recording_path, as_attachment=True)
else:
return abort(404, description="Recording failed")
logger.error(f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
return abort(500, description=f"Recording failed. The output file is missing or empty. ffmpeg stderr: {error_output}")
if __name__ == '__main__':