VLC updates, and some infra bugs fix

This commit is contained in:
Timothyxxx
2024-01-09 23:14:06 +08:00
parent 457a0498f2
commit abcafce750
10 changed files with 291 additions and 121 deletions

View File

@@ -1,11 +1,15 @@
import json
import logging
import random
from typing import Any, Dict
import requests
from desktop_env.envs.actions import KEYBOARD_KEYS
import logging
logger = logging.getLogger("desktopenv.pycontroller")
class PythonController:
def __init__(self, http_server: str, pkgs_prefix: str = "python -c \"import pyautogui; {command}\""):
self.http_server = http_server
@@ -62,6 +66,10 @@ class PythonController:
action_type = action["action_type"]
parameters = action["parameters"] if "parameters" in action else {}
move_mode = random.choice(
["pyautogui.easeInQuad", "pyautogui.easeOutQuad", "pyautogui.easeInOutQuad", "pyautogui.easeInBounce",
"pyautogui.easeInElastic"])
duration = random.uniform(0.5, 1)
if action_type == "MOVE_TO":
if parameters == {} or None:
@@ -69,7 +77,7 @@ class PythonController:
elif "x" in parameters and "y" in parameters:
x = parameters["x"]
y = parameters["y"]
self.execute_python_command(f"pyautogui.moveTo({x}, {y})")
self.execute_python_command(f"pyautogui.moveTo({x}, {y}, {duration}, {move_mode})")
else:
raise Exception(f"Unknown parameters: {parameters}")
@@ -211,6 +219,27 @@ class PythonController:
else:
raise Exception(f"Unknown action type: {action_type}")
def get_vm_screen_size(self):
"""
Gets the size of the vm screen.
"""
response = requests.post(self.http_server + "/screen_size")
if response.status_code == 200:
return response.json()
else:
logger.error("Failed to get screen size. Status code: %d", response.status_code)
return None
def get_vm_window_size(self, app_class_name: str):
"""
Gets the size of the vm app window.
"""
response = requests.post(self.http_server + "/window_size", data={"app_class_name": app_class_name})
if response.status_code == 200:
return response.json()
else:
logger.error("Failed to get window size. Status code: %d", response.status_code)
return None
def get_vlc_status(self, host='localhost', port=8080, password='password'):
url = f'http://{host}:{port}/requests/status.xml'
@@ -218,8 +247,7 @@ class PythonController:
response = requests.get(url, auth=('', password))
if response.status_code == 200:
print("File downloaded successfully")
return response.content
else:
print("Failed to get vlc status. Status code:", response.status_code)
logger.error("Failed to get vlc status. Status code: %d", response.status_code)
return None

View File

@@ -199,10 +199,14 @@ class SetupController:
except requests.exceptions.RequestException as e:
logger.error("An error occurred while trying to send the request: %s", e)
def _launch_setup(self, command: List[str]):
def _launch_setup(self, command: Union[str, List[str]]):
if not command:
raise Exception("Empty command to launch.")
if isinstance(command, str) and len(command.split()) > 1:
logger.warning("Command should be a list of strings. Now it is a string. Will split it by space.")
command = command.split()
payload = json.dumps({"command": command})
headers = {"Content-Type": "application/json"}

View File

@@ -1,6 +1,6 @@
import os
from typing import Dict
import os
import requests
@@ -37,11 +37,16 @@ def get_vm_file(env, config: Dict[str, str]) -> str:
_path = os.path.join(env.cache_dir, config["dest"])
file = env.controller.get_file(config["path"])
if file is None:
raise FileNotFoundError("File not found on VM: {:}".format(config["path"]))
with open(_path, "wb") as f:
f.write(file)
return _path
def get_cache_file(env, config: Dict[str, str]) -> str:
"""
Config:

View File

@@ -1,66 +1,13 @@
import logging
from typing import TypeVar
import platform
import subprocess
import ctypes
import os
import logging
logger = logging.getLogger("desktopenv.getters.misc")
R = TypeVar("Rule")
def get_rule(env, config: R) -> R:
"""
Returns the rule as-is.
"""
return config["rules"]
def get_desktop_path():
username = os.getlogin() # Get the current username
if platform.system() == "Windows":
return os.path.join("C:", "Users", username, "Desktop")
elif platform.system() == "Darwin": # macOS is identified as 'Darwin'
return os.path.join("/Users", username, "Desktop")
elif platform.system() == "Linux":
return os.path.join("/home", username, "Desktop")
else:
raise Exception("Unsupported operating system")
def get_wallpaper():
def get_wallpaper_windows():
SPI_GETDESKWALLPAPER = 0x73
MAX_PATH = 260
buffer = ctypes.create_unicode_buffer(MAX_PATH)
ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
return buffer.value
def get_wallpaper_macos():
script = """
tell application "System Events" to tell every desktop to get picture
"""
process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
logger.error("Error: %s", error)
else:
return output.strip().decode('utf-8')
def get_wallpaper_linux():
try:
output = subprocess.check_output(["gsettings", "get", "org.gnome.desktop.background", "picture-uri"])
return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
except Exception as e:
logger.error("Error: %s", e)
return None
os_name = platform.system()
if os_name == 'Windows':
return get_wallpaper_windows()
elif os_name == 'Darwin':
return get_wallpaper_macos()
elif os_name == 'Linux':
return get_wallpaper_linux()
else:
return "Unsupported OS"

View File

@@ -33,7 +33,7 @@ def get_vlc_config(env, config: Dict[str, str]):
# fixme: depends on how we config and install the vlc in virtual machine, need to be aligned and double-checked
if os_type == "Linux":
config_path = \
env.controller.execute_python_command("import os; print(os.path.expanduser('~/snap/vlc/common/vlcrc'))")[
env.controller.execute_python_command("import os; print(os.path.expanduser('~/.config/vlc/vlcrc'))")[
'output'].strip()
elif os_type == "Darwin":
config_path = env.controller.execute_python_command(

View File

@@ -7,8 +7,6 @@ from xml.etree import ElementTree
import acoustid
import cv2
import imagehash
import pyautogui
import pygetwindow as gw # todo: change to the library that supports Linux
from PIL import Image
logger = logging.getLogger("desktopenv.metrics.vlc")
@@ -72,30 +70,11 @@ def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> f
return False
def are_audio_files_similar(mp3_file_path, mp4_file_path):
# Extract audio fingerprint from MP3 file
mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path)
# Extract the audio stream from the MP4 file
mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3'
try:
subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3",
mp4_audio_path], check=True)
except subprocess.CalledProcessError as e:
print(f"An error occurred during audio extraction from MP4: {e}")
return False
# Extract audio fingerprint from the extracted audio
mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path)
# Clean up temporary extracted audio file
os.remove(mp4_audio_path)
# Compare fingerprints (rudimentary comparison)
if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint:
def is_vlc_fullscreen(actual_window_size, screen_size):
if actual_window_size['width'] == screen_size['width'] and actual_window_size['height'] == screen_size['height']:
return True
return False
else:
return False
def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):
@@ -137,28 +116,27 @@ def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=
return True
def is_vlc_fullscreen():
"""
Checks if the VLC window is in full-screen mode.
def are_audio_files_similar(mp3_file_path, mp4_file_path):
# Extract audio fingerprint from MP3 file
mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path)
When VLC is in full-screen mode, its window size matches the screen size with no borders.
"""
# Extract the audio stream from the MP4 file
mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3'
try:
# Get the VLC window; adjust the title as per your VLC window's title
vlc_window = gw.getWindowsWithTitle('VLC media player')[0] # Adjust title if needed
if not vlc_window:
return False
# Get screen size
screen_width, screen_height = pyautogui.size()
# Check if VLC window size matches the screen size
return (vlc_window.width == screen_width and vlc_window.height == screen_height)
except IndexError:
# VLC window not found
logger.error("VLC window not found.")
return False
except Exception as e:
logger.error(f"An error occurred: {e}")
subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3",
mp4_audio_path], check=True)
except subprocess.CalledProcessError as e:
print(f"An error occurred during audio extraction from MP4: {e}")
return False
# Extract audio fingerprint from the extracted audio
mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path)
# Clean up temporary extracted audio file
os.remove(mp4_audio_path)
# Compare fingerprints (rudimentary comparison)
if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint:
return True
return False

View File

@@ -1,13 +1,17 @@
import ctypes
import os
import platform
import subprocess
from pathlib import Path
from typing import List
import Xlib
import pyautogui
import requests
from PIL import Image
from flask import Flask, request, jsonify, send_file
from Xlib import display, X
from flask import Flask, request, jsonify, send_file, abort
from werkzeug.utils import secure_filename
from pyxcursor import Xcursor
@@ -99,6 +103,164 @@ def capture_screen_with_cursor():
return send_file(file_path, mimetype='image/png')
@app.route('/screen_size', methods=['POST'])
def get_screen_size():
d = display.Display()
screen_width = d.screen().width_in_pixels
screen_height = d.screen().height_in_pixels
return jsonify(
{
"width": screen_width,
"height": screen_height
}
)
@app.route('/window_size', methods=['POST'])
def get_window_size():
if 'app_class_name' in request.form:
app_class_name = request.form['app_class_name']
else:
return jsonify({"error": "app_class_name is required"}), 400
d = display.Display()
root = d.screen().root
window_ids = root.get_full_property(d.intern_atom('_NET_CLIENT_LIST'), X.AnyPropertyType).value
for window_id in window_ids:
try:
window = d.create_resource_object('window', window_id)
wm_class = window.get_wm_class()
if wm_class is None:
continue
if app_class_name.lower() in [name.lower() for name in wm_class]:
geom = window.get_geometry()
return jsonify(
{
"width": geom.width,
"height": geom.height
}
)
except Xlib.error.XError: # Ignore windows that give an error
continue
return None
@app.route('/desktop_path', methods=['POST'])
def get_desktop_path():
# Get the home directory in a platform-independent manner using pathlib
home_directory = str(Path.home())
# Determine the desktop path based on the operating system
desktop_path = {
"Windows": os.path.join(home_directory, "Desktop"),
"Darwin": os.path.join(home_directory, "Desktop"), # macOS
"Linux": os.path.join(home_directory, "Desktop")
}.get(platform.system(), None)
# Check if the operating system is supported and the desktop path exists
if desktop_path and os.path.exists(desktop_path):
return jsonify(desktop_path=desktop_path)
else:
return jsonify(error="Unsupported operating system or desktop path not found"), 404
@app.route('/wallpaper', methods=['POST'])
def get_wallpaper():
def get_wallpaper_windows():
SPI_GETDESKWALLPAPER = 0x73
MAX_PATH = 260
buffer = ctypes.create_unicode_buffer(MAX_PATH)
ctypes.windll.user32.SystemParametersInfoW(SPI_GETDESKWALLPAPER, MAX_PATH, buffer, 0)
return buffer.value
def get_wallpaper_macos():
script = """
tell application "System Events" to tell every desktop to get picture
"""
process = subprocess.Popen(['osascript', '-e', script], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = process.communicate()
if error:
app.logger.error("Error: %s", error.decode('utf-8'))
return None
return output.strip().decode('utf-8')
def get_wallpaper_linux():
try:
output = subprocess.check_output(
["gsettings", "get", "org.gnome.desktop.background", "picture-uri"],
stderr=subprocess.PIPE
)
return output.decode('utf-8').strip().replace('file://', '').replace("'", "")
except subprocess.CalledProcessError as e:
app.logger.error("Error: %s", e)
return None
os_name = platform.system()
wallpaper_path = None
if os_name == 'Windows':
wallpaper_path = get_wallpaper_windows()
elif os_name == 'Darwin':
wallpaper_path = get_wallpaper_macos()
elif os_name == 'Linux':
wallpaper_path = get_wallpaper_linux()
else:
app.logger.error(f"Unsupported OS: {os_name}")
abort(400, description="Unsupported OS")
if wallpaper_path:
try:
# Ensure the filename is secure
filename = secure_filename(os.path.basename(wallpaper_path))
return send_file(wallpaper_path, attachment_filename=filename)
except Exception as e:
app.logger.error(f"An error occurred while serving the wallpaper file: {e}")
abort(500, description="Unable to serve the wallpaper file")
else:
abort(404, description="Wallpaper file not found")
@app.route('/list_directory', methods=['POST'])
def get_directory_tree():
def _list_dir_contents(directory):
"""
List the contents of a directory recursively, building a tree structure.
:param directory: The path of the directory to inspect.
:return: A nested dictionary with the contents of the directory.
"""
tree = {'type': 'directory', 'name': os.path.basename(directory), 'children': []}
try:
# List all files and directories in the current directory
for entry in os.listdir(directory):
full_path = os.path.join(directory, entry)
# If entry is a directory, recurse into it
if os.path.isdir(full_path):
tree['children'].append(_list_dir_contents(full_path))
else:
tree['children'].append({'type': 'file', 'name': entry})
except OSError as e:
# If the directory cannot be accessed, return the exception message
tree = {'error': str(e)}
return tree
# Extract the 'path' parameter from the JSON request
data = request.get_json()
if 'path' not in data:
return jsonify(error="Missing 'path' parameter"), 400
start_path = data['path']
# Ensure the provided path is a directory
if not os.path.isdir(start_path):
return jsonify(error="The provided path is not a directory"), 400
# Generate the directory tree starting from the provided path
directory_tree = _list_dir_contents(start_path)
return jsonify(directory_tree=directory_tree)
@app.route('/file', methods=['POST'])
def get_file():
# Retrieve filename from the POST request

View File

@@ -1,12 +1,28 @@
{
"id": "8d9fd4e2-6fdb-46b0-b9b9-02f06495c62f",
"snapshot": "base_setup",
"instruction": "Make the video window whole-screen",
"instruction": "Can you make the video fill up the whole screen? It's a bit too small to see right now, and I'd like to see it better.",
"source": "https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s",
"config": [],
"config": [
{
"type": "launch",
"parameters": {
"command": "vlc"
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vlc"
],
"evaluator": "evaluation_dir"
"evaluator": {
"func": "is_vlc_fullscreen",
"expected": {
"type": "vm_window_size",
"app_class_name": "vlc"
},
"result": {
"type": "vm_screen_size"
}
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6",
"snapshot": "base_setup",
"instruction": "This video is upside down, help me rotate it",
"instruction": "Hey, could you turn this video the right way up for me? And once it's flipped around, could you save it for me with the name '1984_Apple_Macintosh_Commercial.mp4' on the main screen where all my files are?",
"source": "https://www.dedoimedo.com/computers/vlc-rotate-videos.html",
"config": [
{
@@ -10,21 +10,33 @@
"files": [
{
"url": "https://drive.usercontent.google.com/download?id=1CLBjjsjGmHlbDg1lDcxfdE0F0C7-A5gZ&export=download&authuser=0&confirm=t&uuid=dde635fc-e223-4cd3-8065-899396e68d0a&at=APZUnTWQHdWYLLxlofuOIuhE2qiS:1704722380621",
"path": "flipped_1984_Apple_Macintosh_Commercial.mp4"
"path": "Desktop/flipped_1984_Apple_Macintosh_Commercial.mp4"
}
]
}
},
{
"type": "launch",
"parameters": {
"command": "vlc"
}
"parameters": {
"command": ["vlc", "Desktop/flipped_1984_Apple_Macintosh_Commercial.mp4"]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vlc"
],
"evaluator": "evaluation_dir"
"evaluator": {
"func": "compare_videos",
"expected": {
"type": "cloud_file",
"path": "https://drive.usercontent.google.com/download?id=10P9nKW7VTaHGS15tj4C0GVxUIYA0Nh42&export=download&authuser=0&confirm=t&uuid=1b812f06-b624-4aed-8d91-734c54421da3&at=APZUnTUCB-58SwcccQ_WN1T4KdJy:1704808326066",
"dest": "1984_Apple_Macintosh_Commercial_gold.mp4"
},
"result": {
"type": "vm_file",
"path": "1984_Apple_Macintosh_Commercial.mp4",
"dest": "1984_Apple_Macintosh_Commercial.mp4"
}
}
}

View File

@@ -3,7 +3,25 @@
"snapshot": "base_setup",
"instruction": "Set this frame of the current video as my wallpaper",
"source": "https://www.youtube.com/watch?v=XHprwDJ0-fU&t=436s",
"config": [],
"config": [
{
"type": "download",
"parameters": {
"files": [
{
"url": "",
"path": ""
}
]
}
},
{
"type": "launch",
"parameters": {
"command": ["vlc", "/path/to/your/video.mp4", "--start-time=0", "--run-time=10", "vlc://quit", "&&", "vlc", "/path/to/your/video.mp4", "--start-time=10"]
}
}
],
"trajectory": "trajectories/",
"related_apps": [
"vlc"