VLC updates, and some infra bugs fix

2024-01-09 09:30:11 +08:00
parent 2b09b7ce41
commit fa84b20ea5
13 changed files with 278 additions and 71 deletions
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ todo
 ### For users of the environment
 todo

-## Road map (Proposed)
+## Road map of infra (Proposed)

 - [x] Explore VMWare, and whether it can be connected and control through mouse package
 - [x] Explore Windows and MacOS, whether it can be installed
@@ -20,3 +20,7 @@ todo
 - [x] Set up a pipeline and build agents implementation (zero-shot) for the task
 - [x] Start to design on which tasks inside the DesktopENv to focus on, start to wrap up the environment to be public
 - [x] Start to annotate the examples for ~~training~~ and testing
+- [ ] Error handling during file passing and file opening, etc.
+- [ ] Add accessibility tree from the OS into the observation space
+- [ ] Add pre-process and post-process action support for benchmarking setup and evaluation
+- [ ] Multiprocess support, this can enable the reinforcement learning to be more efficient
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -201,7 +201,7 @@ class SetupController:

    def _launch_setup(self, command: List[str]):
        if not command:
-            raise Exception("Empty comman to launch.")
+            raise Exception("Empty command to launch.")

        payload = json.dumps({"command": command})
        headers = {"Content-Type": "application/json"}
--- a/desktop_env/envs/desktop_env.py
+++ b/desktop_env/envs/desktop_env.py
@@ -223,7 +223,7 @@ class DesktopEnv(gym.Env):
        Evaluate whether the task is successfully completed.
        """

-        self.setup_controller.setup(self.evaluator["postconfig"])
+        self.setup_controller.setup(self.evaluator["postconfig"]) if "postconfig" in self.evaluator else None

        result_state = self.result_getter(self, self.evaluator["result"])
        expected_state = self.expected_getter(self, self.evaluator["expected"]) if "expected" in self.evaluator \
--- a/desktop_env/evaluators/getters/vlc.py
+++ b/desktop_env/evaluators/getters/vlc.py
@@ -1,6 +1,9 @@
+import logging
 import os
 from typing import Dict

+logger = logging.getLogger("desktopenv.getters.vlc")
+

 def get_vlc_playing_info(env, config: Dict[str, str]):
    """
@@ -13,7 +16,33 @@ def get_vlc_playing_info(env, config: Dict[str, str]):
    password = 'password'

    content = env.controller.get_vlc_status(host, port, password)
-    print("content: ", content)
+    with open(_path, "wb") as f:
+        f.write(content)
+
+    return _path
+
+
+def get_vlc_config(env, config: Dict[str, str]):
+    """
+    Reads the VLC configuration file to check setting.
+    """
+    _path = os.path.join(env.cache_dir, config["dest"])
+
+    os_type = env.controller.execute_python_command("import platform; print(platform.system())")['output'].strip()
+
+    # fixme: depends on how we config and install the vlc in virtual machine, need to be aligned and double-checked
+    if os_type == "Linux":
+        config_path = \
+        env.controller.execute_python_command("import os; print(os.path.expanduser('~/snap/vlc/common/vlcrc'))")[
+            'output'].strip()
+    elif os_type == "Darwin":
+        config_path = env.controller.execute_python_command(
+            "import os; print(os.path.expanduser('~/Library/Preferences/org.videolan.vlc/vlcrc'))")['output'].strip()
+    elif os_type == "Windows":
+        config_path = env.controller.execute_python_command(
+            "import os; print(os.path.expanduser('~\\AppData\\Roaming\\vlc\\vlcrc'))")['output'].strip()
+
+    content = env.controller.get_file(config_path)
    with open(_path, "wb") as f:
        f.write(content)

--- a/desktop_env/evaluators/metrics/README.md
+++ b/desktop_env/evaluators/metrics/README.md
@@ -130,6 +130,12 @@ To enable and use the HTTP interface in VLC Media Player for remote control and
 - You will be prompted for a password. Enter the password you set in the Lua HTTP settings.
 - Once logged in, you will have access to VLC's HTTP interface for remote control.

+#### Packages
+```bash
+
+pip install opencv-python-headless Pillow imagehash
+```
+
 #### Troubleshooting

 - If you cannot access the HTTP interface, check if your firewall or security software is blocking the connection.
--- a/desktop_env/evaluators/metrics/vlc.py
+++ b/desktop_env/evaluators/metrics/vlc.py
@@ -1,64 +1,142 @@
-import os
-import platform
-from xml.etree import ElementTree
-import pygetwindow as gw
-import pyautogui
-from typing import Dict
-
 import logging
+import os
+import subprocess
+from typing import Dict
+from xml.etree import ElementTree
+
+import acoustid
+import cv2
+import imagehash
+import pyautogui
+import pygetwindow as gw  # todo: change to the library that supports Linux
+from PIL import Image
+
 logger = logging.getLogger("desktopenv.metrics.vlc")

-def get_vlc_config(setting_name):
-    """
-    Reads the VLC configuration file to check for a specific setting.

-    # Example usage
-    setting_name = 'recordings_folder='
-    setting = read_vlc_config(setting_name)
-    """
-    # Common paths for VLC config file on different operating systems
-    paths = {
-        'Windows': os.path.expanduser('~\\AppData\\Roaming\\vlc\\vlcrc'),
-        'Darwin': os.path.expanduser('~/Library/Preferences/org.videolan.vlc/vlcrc'),
-        'Linux': os.path.expanduser('~/.config/vlc/vlcrc')
-    }
-
-    os_type = platform.system()
-    config_path = paths.get(os_type)
-
-    if not config_path or not os.path.exists(config_path):
-        logger.warning("VLC config file not found for this operating system.")
-        return None
-
-    try:
-        with open(config_path, 'r', encoding="utf-8") as file:
-            for line in file:
-                if line.startswith(setting_name):
-                    return line.strip()
-    except IOError as e:
-        logger.error(f"Error reading config file: {e}")
-
-    return None
-
-
-def is_vlc_playing(actual: str, rule: Dict[str, str]) -> float:
+def is_vlc_playing(actual_status_path: str, rule: Dict[str, str]) -> float:
    """
    Checks if VLC is currently playing a file.
    """
-    with open(actual, 'rb') as file:
+    with open(actual_status_path, 'rb') as file:
        actual_status = file.read().decode('utf-8')

    tree = ElementTree.fromstring(actual_status)
    status = tree.find('state').text
    if status == 'playing':
-        file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
-        print("file_info: ", file_info)
-        if file_info:
-            return 1 if file_info.endswith(rule['expected']) else 0
+        if rule['type'] == 'file_name':
+            file_info = tree.find('information/category[@name="meta"]/info[@name="filename"]').text
+            if file_info:
+                return 1 if file_info.endswith(rule['file_name']) else 0
+        elif rule['type'] == 'url':
+            file_info = tree.find('information/category[@name="meta"]/info[@name="url"]').text
+            if file_info:
+                return 1 if file_info.endswith(rule['url']) else 0
+        else:
+            logger.error(f"Unknown type: {rule['type']}")
+            return 0
    else:
        return 0


+def is_vlc_recordings_folder(actual_config_path: str, rule: Dict[str, str]) -> float:
+    """
+    Checks if VLC's recording folder is set to the expected value.
+    """
+    with open(actual_config_path, 'rb') as file:
+        config_file = file.read().decode('utf-8')
+
+    expected_recording_file_path = rule['recording_file_path']
+
+    try:
+        for line in config_file:
+            # Skip comments and empty lines
+            if line.startswith('#') or not line.strip():
+                continue
+            # Check if the line contains the recording path setting
+            if 'recorded_files_path' in line:
+                # Extract the value of the recording path and remove surrounding whitespace
+                current_path = line.split('=')[-1].strip()
+                # Compare with the Desktop path
+                if current_path == expected_recording_file_path:
+                    return True
+                else:
+                    return False
+            # The configuration key was not found in the file
+            return False
+    except FileNotFoundError:
+        logger.error("VLC configuration file not found.")
+        return False
+    except Exception as e:
+        logger.error(f"An error occurred: {e}")
+        return False
+
+
+def are_audio_files_similar(mp3_file_path, mp4_file_path):
+    # Extract audio fingerprint from MP3 file
+    mp3_fingerprint, mp3_duration = acoustid.fingerprint_file(mp3_file_path)
+
+    # Extract the audio stream from the MP4 file
+    mp4_audio_path = os.path.splitext(mp4_file_path)[0] + '_extracted.mp3'
+    try:
+        subprocess.run(["ffmpeg", "-i", mp4_file_path, "-vn", "-ar", "44100", "-ac", "2", "-ab", "192k", "-f", "mp3",
+                        mp4_audio_path], check=True)
+    except subprocess.CalledProcessError as e:
+        print(f"An error occurred during audio extraction from MP4: {e}")
+        return False
+
+    # Extract audio fingerprint from the extracted audio
+    mp4_fingerprint, mp4_duration = acoustid.fingerprint_file(mp4_audio_path)
+
+    # Clean up temporary extracted audio file
+    os.remove(mp4_audio_path)
+
+    # Compare fingerprints (rudimentary comparison)
+    if mp3_duration >= mp4_duration and mp3_fingerprint == mp4_fingerprint:
+        return True
+
+    return False
+
+
+def compare_videos(video_path1, video_path2, max_frames_to_check=100, threshold=5):
+    # Open both video files
+    cap1 = cv2.VideoCapture(video_path1)
+    cap2 = cv2.VideoCapture(video_path2)
+
+    frames_checked = 0
+    mismatch_count = 0
+
+    while frames_checked < max_frames_to_check:
+        # Read frames from both videos
+        ret1, frame1 = cap1.read()
+        ret2, frame2 = cap2.read()
+
+        # If a video ends, then check if both ended to confirm they are of the same length
+        if not ret1 or not ret2:
+            return ret1 == ret2
+
+        # Convert frames to PIL Images
+        frame1 = Image.fromarray(cv2.cvtColor(frame1, cv2.COLOR_BGR2RGB))
+        frame2 = Image.fromarray(cv2.cvtColor(frame2, cv2.COLOR_BGR2RGB))
+
+        # Compute the perceptual hash for each frame
+        hash1 = imagehash.phash(frame1)
+        hash2 = imagehash.phash(frame2)
+
+        # Increment the frames checked
+        frames_checked += 1
+
+        # Compute the difference in the hashes
+        if hash1 - hash2 > threshold:
+            mismatch_count += 1
+            # If there's a significant difference, the frames are not the same
+            if mismatch_count > threshold:
+                return False
+
+    # If we reach here, the content appears to be the same
+    return True
+
+
 def is_vlc_fullscreen():
    """
    Checks if the VLC window is in full-screen mode.
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -1,15 +1,15 @@
 import os
-from pathlib import Path
 import platform
 import subprocess
-import requests
-from .pyxcursor import Xcursor
-# import Xlib.display
+from pathlib import Path
+from typing import List
+
 import pyautogui
-# from PIL import ImageGrab, Image
+import requests
 from PIL import Image
 from flask import Flask, request, jsonify, send_file
-from typing import List
+
+from pyxcursor import Xcursor

 app = Flask(__name__)

@@ -18,6 +18,7 @@ pyautogui.DARWIN_CATCH_UP_TIME = 0

 logger = app.logger

+
@app.route('/setup/execute', methods=['POST'])
@app.route('/execute', methods=['POST'])
 def execute_command():
@@ -40,6 +41,7 @@ def execute_command():
            'message': str(e)
        }), 500

+
@app.route('/setup/launch', methods=["POST"])
 def launch_app():
    data = request.json
@@ -49,11 +51,7 @@ def launch_app():
        subprocess.Popen(command)
        return "{:} launched successfully".format(" ".join(command))
    except Exception as e:
-        return jsonify( { "status": "error"
-                        , "message": str(e)
-                        }
-                      )\
-             , 500
+        return jsonify({"status": "error", "message": str(e)}), 500


@app.route('/screenshot', methods=['GET'])
@@ -116,6 +114,7 @@ def get_file():
        # If the file is not found, return a 404 error
        return jsonify({"error": "File not found"}), 404

+
@app.route("/setup/upload", methods=["POST"])
 def upload_file():
    # Retrieve filename from the POST request
@@ -127,6 +126,7 @@ def upload_file():
    else:
        return jsonify({"error": "file_path and file_data are required"}), 400

+
@app.route('/platform', methods=['GET'])
 def get_platform():
    return platform.system()
--- a/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
+++ b/evaluation_examples/examples/vlc/59f21cfb-0120-4326-b255-a5b827b38967.json
@@ -31,7 +31,8 @@
    "expected": {
      "type": "rule",
      "rules": {
-        "file_path": "Desktop/Rick Astley - Never Gonna Give You Up (Official Music Video).mp4"
+        "type": "file_name",
+        "file_name": "Rick Astley - Never Gonna Give You Up (Official Music Video).mp4"
      }
    },
    "result": {
--- a/evaluation_examples/examples/vlc/8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89.json
+++ b/evaluation_examples/examples/vlc/8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89.json
@@ -1,12 +1,31 @@
 {
  "id": "8ba5ae7a-5ae5-4eab-9fcc-5dd4fe3abf89",
  "snapshot": "base_setup",
-  "instruction": "Help me modify the folder used to store my recordings to LOCAL_PATH.",
+  "instruction": "Help me modify the folder used to store my recordings to Desktop",
  "source": "https://docs.videolan.me/vlc-user/desktop/3.0/en/basic/recording/playing.html#choose-your-recordings-folder",
-  "config": [],
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": "vlc"
+      }
+    }
+  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "vlc"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
+    "func": "is_vlc_recordings_folder",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "recording_file_path": "/home/user/Desktop"
+      }
+    },
+    "result": {
+      "type": "vlc_config",
+      "dest": "vlcrc"
+    }
+  }
 }
--- a/evaluation_examples/examples/vlc/8f080098-ddb1-424c-b438-4e96e5e4786e.json
+++ b/evaluation_examples/examples/vlc/8f080098-ddb1-424c-b438-4e96e5e4786e.json
@@ -3,10 +3,40 @@
  "snapshot": "base_setup",
  "instruction": "Could you help me extract MP3 Audio to AUDIO_PATH from Video at VIDEO_PATH using VLC Media Player?",
  "source": "https://medium.com/@jetscribe_ai/how-to-extract-mp3-audio-from-videos-using-vlc-media-player-beeef644ebfb",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=19jBiz8sb0M7KHHATO9qeTPr17aWm4me-&export=download&authuser=0&confirm=t&uuid=7a2261f4-3905-433f-b53f-a52dd0845651&at=APZUnTU1nmXSa1ObrA5NHYt8t1-p:1704710908141",
+            "path": "Baby Justin Bieber.mp4"
+          }
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": "vlc"
+      }
+    }
+  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "vlc"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
+    "func": "is_vlc_recordings_folder",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "recording_file_path": "/home/user/Desktop"
+      }
+    },
+    "result": {
+      "type": "vlc_config",
+      "dest": "vlcrc"
+    }
+  }
 }
--- a/evaluation_examples/examples/vlc/aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6.json
+++ b/evaluation_examples/examples/vlc/aa4b5023-aef6-4ed9-bdc9-705f59ab9ad6.json
@@ -3,7 +3,25 @@
  "snapshot": "base_setup",
  "instruction": "This video is upside down, help me rotate it",
  "source": "https://www.dedoimedo.com/computers/vlc-rotate-videos.html",
-  "config": [],
+  "config": [
+    {
+      "type": "download",
+      "parameters": {
+        "files": [
+          {
+            "url": "https://drive.usercontent.google.com/download?id=1CLBjjsjGmHlbDg1lDcxfdE0F0C7-A5gZ&export=download&authuser=0&confirm=t&uuid=dde635fc-e223-4cd3-8065-899396e68d0a&at=APZUnTWQHdWYLLxlofuOIuhE2qiS:1704722380621",
+            "path": "flipped_1984_Apple_Macintosh_Commercial.mp4"
+          }
+        ]
+      }
+    },
+    {
+      "type": "launch",
+      "parameters": {
+        "command": "vlc"
+      }
+    }
+  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "vlc"
--- a/evaluation_examples/examples/vlc/bba3381f-b5eb-4439-bd9e-80c22218d5a7.json
+++ b/evaluation_examples/examples/vlc/bba3381f-b5eb-4439-bd9e-80c22218d5a7.json
@@ -1,12 +1,32 @@
 {
  "id": "bba3381f-b5eb-4439-bd9e-80c22218d5a7",
  "snapshot": "base_setup",
-  "instruction": "Could you help me play the online video at URL?(network stream)",
+  "instruction": "Help me play the online video at https://www.youtube.com/watch?v=pgBsyTKAwLw",
  "source": "https://www.quora.com/How-do-I-play-online-videos-using-the-VLC-media-player",
-  "config": [],
+  "config": [
+    {
+      "type": "launch",
+      "parameters": {
+        "command": "vlc"
+      }
+    }
+  ],
  "trajectory": "trajectories/",
  "related_apps": [
    "vlc"
  ],
-  "evaluator": "evaluation_dir"
+  "evaluator": {
+    "func": "is_vlc_playing",
+    "expected": {
+      "type": "rule",
+      "rules": {
+        "type": "url",
+        "url": "https://www.youtube.com/watch?v=pgBsyTKAwLw"
+      }
+    },
+    "result": {
+      "type": "vlc_playing_info",
+      "dest": "status.xml"
+    }
+  }
 }
--- a/mm_agents/gui_som/data_preparation/scrape_crawler.py
+++ b/mm_agents/gui_som/data_preparation/scrape_crawler.py
@@ -17,7 +17,7 @@ with open('get_tag_elem_dict.js', 'r') as f:
 def scrape_data(website_url, action_depth=10):
    # if file exists, skip
    if os.path.exists(os.path.join('collected_data', website_url.split("//")[1])):
-        print("Data already exists, skipping...")
+        # print("Data already exists, skipping...")
        return

    def click_random_link(page):
@@ -100,6 +100,7 @@ def scrape_data(website_url, action_depth=10):
 def run_one(url):
    try:
        scrape_data("https://" + url, action_depth=5)
+        scrape_data("http://" + url, action_depth=5)
    except Exception as e:
        print("Error scraping data:", e)
        print("Start next one...")
@@ -107,6 +108,7 @@ def run_one(url):

 def main():
    urls = read_csv("majestic_million.csv")[:20000]
+    random.shuffle(urls)

    # Number of processes
    num_processes = 50  # Adjust based on your system's capability, on my i9-13900k, 50 processes can be used