Merge remote-tracking branch 'origin/main'

# Conflicts:
#	mm_agents/gpt_4v_agent.py
This commit is contained in:
Timothyxxx
2024-02-02 14:37:23 +08:00
25 changed files with 1141 additions and 46 deletions

View File

@@ -7,6 +7,7 @@ import uuid
import tempfile
from typing import Any, Union, Optional
from typing import Dict, List
import os
import requests
from pydrive.auth import GoogleAuth
@@ -114,6 +115,7 @@ class SetupController:
if not os.path.exists(cache_path):
max_retries = 3
downloaded = False
e = None
for i in range(max_retries):
try:
response = requests.get(url, stream=True)
@@ -128,7 +130,7 @@ class SetupController:
break
except requests.RequestException as e:
logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
logger.error(f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
if not downloaded:
raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
@@ -344,39 +346,49 @@ class SetupController:
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
remote_debugging_url = f"http://{host}:{port}"
with sync_playwright() as p:
logger.info("Connect to Chrome @: %s", remote_debugging_url)
logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ))
for attempt in range(15):
if attempt>0:
time.sleep(5)
browser = None
for attempt in range(15):
with sync_playwright() as p:
try:
browser = p.chromium.connect_over_cdp(remote_debugging_url)
break
#break
except Exception as e:
if attempt < 14:
logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
time.sleep(1)
#time.sleep(10)
continue
else:
logger.error(f"Failed to connect after multiple attempts: {e}")
raise e
if not browser:
return
if not browser:
return
for i, url in enumerate(urls_to_open):
# Use the first context (which should be the only one if using default profile)
if i == 0:
context = browser.contexts[0]
logger.info("Opening %s...", urls_to_open)
for i, url in enumerate(urls_to_open):
# Use the first context (which should be the only one if using default profile)
if i == 0:
context = browser.contexts[0]
page = context.new_page() # Create a new page (tab) within the existing context
page.goto(url, timeout=60000)
logger.info(f"Opened tab {i + 1}: {url}")
page = context.new_page() # Create a new page (tab) within the existing context
try:
page.goto(url, timeout=60000)
except:
logger.warning("Opening %s exceeds time limit", url) # only for human test
logger.info(f"Opened tab {i + 1}: {url}")
if i == 0:
# clear the default tab
default_page = context.pages[0]
default_page.close()
if i == 0:
# clear the default tab
default_page = context.pages[0]
default_page.close()
# Do not close the context or browser; they will remain open after script ends
return browser, context
# Do not close the context or browser; they will remain open after script ends
return browser, context
def _chrome_close_tabs_setup(self, urls_to_close: List[str]):
time.sleep(5) # Wait for Chrome to finish launching
@@ -552,4 +564,4 @@ class SetupController:
else:
raise NotImplementedError
return browser, context
return browser, context

View File

@@ -191,7 +191,7 @@ To enable and use the HTTP interface in VLC Media Player for remote control and
#### 4. Configure Lua HTTP
- Expand the `Main interfaces` node and select `Lua`.
- Under `Lua HTTP`, set a password in the `Lua HTTP` section. This password will be required to access the HTTP interface.
- Under `Lua HTTP`, set a password `password` in the `Lua HTTP` section. This password will be required to access the HTTP interface.
#### 5. Save and Restart VLC
@@ -217,4 +217,4 @@ pip install opencv-python-headless Pillow imagehash
- If the port is in use by another application, you may change the port number in VLC's settings.
## GIMP
Click on the "Keep" of the image loading pop-up.
Click on the "Keep" of the image loading pop-up.

View File

@@ -43,6 +43,7 @@ from .docs import (
compare_highlighted_text,
is_first_line_centered,
check_file_exists,
check_tabstops,
compare_contains_image
)
from .general import (

View File

@@ -6,11 +6,13 @@ import zipfile
from typing import List, Dict, Any
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
from docx.shared import RGBColor
from odf.opendocument import load
from odf.text import P
from odf.text import Span
from skimage.color import deltaE_ciede2000
from skimage.color import rgb2lab
logger = logging.getLogger("desktopenv.metric.docs")
@@ -141,7 +143,7 @@ def compare_docx_tables(docx_file1, docx_file2):
# Compare each cell
for i in range(len(table1.rows)):
for j in range(len(table1.columns)):
if table1.cell(i, j).text != table2.cell(i, j).text:
if table1.cell(i, j).text.strip() != table2.cell(i, j).text.strip():
return 0
return 1
@@ -234,6 +236,40 @@ def check_file_exists(directory, filename):
return 1 if os.path.isfile(file_path) else 0
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
doc1: Document = Document(docx_file1)
doc2: Document = Document(docx_file2)
para1 = [p for p in doc1.paragraphs if p.text.strip()]
para2 = [p for p in doc2.paragraphs if p.text.strip()]
if len(para1) != len(para2): return .0
if kwargs.get('word_number_split_by_tabstop', None) is not None:
number = kwargs['word_number_split_by_tabstop']
index = kwargs.get('index', 0)
for p1 in para1:
splits = p1.text.split('\t')
if len(splits) == 0: return .0
words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index])))
if len(words) != number: return .0
section = doc2.sections[0]
paragraph_width = section.page_width - section.left_margin - section.right_margin
ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
minus = .0
for p1, p2 in zip(para1, para2):
# filter CLEAR tabstop and default left-0 tabstop
tabs1 = [tst for tst in p1.paragraph_format.tab_stops if not ignore_tabs(tst)]
tabs2 = [tst for tst in p2.paragraph_format.tab_stops if not ignore_tabs(tst)]
if len(tabs1) != len(tabs2): return .0
difference = .0
for t1, t2 in zip(tabs1, tabs2):
if t1.alignment != t2.alignment: return .0
difference += abs(t1.position - t2.position)
minus += difference / paragraph_width
score = 1 - (minus / len(para1))
return score
def compare_contains_image(docx_file1, docx_file2):
doc1 = Document(docx_file1)
doc2 = Document(docx_file2)
@@ -258,10 +294,18 @@ def compare_contains_image(docx_file1, docx_file2):
# print(find_default_font("Ani", config_path))
def evaluate_colored_words_in_tables(file_path1, file_path2):
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
if not compare_docx_files(file_path1, file_path2):
return 0
document = Document(file_path1)
threshold = kwargs.get('threshold', 3.5)
def _calculate_color_difference(rgb1, rgb2):
srgb1 = [rgb1[0] / 255.0, rgb1[1] / 255.0, rgb1[2] / 255.0]
srgb2 = [rgb2[0] / 255.0, rgb2[1] / 255.0, rgb2[2] / 255.0]
lab1, lab2 = rgb2lab(srgb1), rgb2lab(srgb2)
delta_e = deltaE_ciede2000(lab1, lab2)
return delta_e
for table in document.tables:
# Iterate through rows and cells in the table
@@ -273,9 +317,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
if word:
first_letter = word[0].lower()
if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0):
if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold:
return 0 # Vowel-colored words should be red
elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255):
elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold:
return 0 # Non-vowel-colored words should be blue
return 1 # All words in tables are correctly colored

View File

@@ -2,7 +2,7 @@ import ctypes
import os
import platform
import shlex
import subprocess
import subprocess, signal
from pathlib import Path
from typing import Any, Optional
from typing import List, Dict, Tuple
@@ -997,7 +997,7 @@ def start_recording():
start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return jsonify({'status': 'success', 'message': 'Started recording.'})
@@ -1009,10 +1009,8 @@ def end_recording():
if not recording_process:
return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
recording_process.terminate()
recording_process.send_signal(signal.SIGINT)
recording_process.wait()
# return_code = recording_process.returncode
output, error = recording_process.communicate()
recording_process = None
# return recording video file