Merge remote-tracking branch 'origin/main'
# Conflicts: # mm_agents/gpt_4v_agent.py
This commit is contained in:
@@ -7,6 +7,7 @@ import uuid
|
||||
import tempfile
|
||||
from typing import Any, Union, Optional
|
||||
from typing import Dict, List
|
||||
import os
|
||||
|
||||
import requests
|
||||
from pydrive.auth import GoogleAuth
|
||||
@@ -114,6 +115,7 @@ class SetupController:
|
||||
if not os.path.exists(cache_path):
|
||||
max_retries = 3
|
||||
downloaded = False
|
||||
e = None
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
@@ -128,7 +130,7 @@ class SetupController:
|
||||
break
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Failed to download {url}. Retrying... ({max_retries - i - 1} attempts left)")
|
||||
logger.error(f"Failed to download {url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
|
||||
if not downloaded:
|
||||
raise requests.RequestException(f"Failed to download {url}. No retries left. Error: {e}")
|
||||
|
||||
@@ -344,39 +346,49 @@ class SetupController:
|
||||
port = 9222 # fixme: this port is hard-coded, need to be changed from config file
|
||||
|
||||
remote_debugging_url = f"http://{host}:{port}"
|
||||
with sync_playwright() as p:
|
||||
logger.info("Connect to Chrome @: %s", remote_debugging_url)
|
||||
logger.debug("PLAYWRIGHT ENV: %s", repr(os.environ))
|
||||
for attempt in range(15):
|
||||
if attempt>0:
|
||||
time.sleep(5)
|
||||
|
||||
browser = None
|
||||
for attempt in range(15):
|
||||
with sync_playwright() as p:
|
||||
try:
|
||||
browser = p.chromium.connect_over_cdp(remote_debugging_url)
|
||||
break
|
||||
#break
|
||||
except Exception as e:
|
||||
if attempt < 14:
|
||||
logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
|
||||
time.sleep(1)
|
||||
#time.sleep(10)
|
||||
continue
|
||||
else:
|
||||
logger.error(f"Failed to connect after multiple attempts: {e}")
|
||||
raise e
|
||||
|
||||
if not browser:
|
||||
return
|
||||
if not browser:
|
||||
return
|
||||
|
||||
for i, url in enumerate(urls_to_open):
|
||||
# Use the first context (which should be the only one if using default profile)
|
||||
if i == 0:
|
||||
context = browser.contexts[0]
|
||||
logger.info("Opening %s...", urls_to_open)
|
||||
for i, url in enumerate(urls_to_open):
|
||||
# Use the first context (which should be the only one if using default profile)
|
||||
if i == 0:
|
||||
context = browser.contexts[0]
|
||||
|
||||
page = context.new_page() # Create a new page (tab) within the existing context
|
||||
page.goto(url, timeout=60000)
|
||||
logger.info(f"Opened tab {i + 1}: {url}")
|
||||
page = context.new_page() # Create a new page (tab) within the existing context
|
||||
try:
|
||||
page.goto(url, timeout=60000)
|
||||
except:
|
||||
logger.warning("Opening %s exceeds time limit", url) # only for human test
|
||||
logger.info(f"Opened tab {i + 1}: {url}")
|
||||
|
||||
if i == 0:
|
||||
# clear the default tab
|
||||
default_page = context.pages[0]
|
||||
default_page.close()
|
||||
if i == 0:
|
||||
# clear the default tab
|
||||
default_page = context.pages[0]
|
||||
default_page.close()
|
||||
|
||||
# Do not close the context or browser; they will remain open after script ends
|
||||
return browser, context
|
||||
# Do not close the context or browser; they will remain open after script ends
|
||||
return browser, context
|
||||
|
||||
def _chrome_close_tabs_setup(self, urls_to_close: List[str]):
|
||||
time.sleep(5) # Wait for Chrome to finish launching
|
||||
@@ -552,4 +564,4 @@ class SetupController:
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return browser, context
|
||||
return browser, context
|
||||
|
||||
@@ -191,7 +191,7 @@ To enable and use the HTTP interface in VLC Media Player for remote control and
|
||||
#### 4. Configure Lua HTTP
|
||||
|
||||
- Expand the `Main interfaces` node and select `Lua`.
|
||||
- Under `Lua HTTP`, set a password in the `Lua HTTP` section. This password will be required to access the HTTP interface.
|
||||
- Under `Lua HTTP`, set a password `password` in the `Lua HTTP` section. This password will be required to access the HTTP interface.
|
||||
|
||||
#### 5. Save and Restart VLC
|
||||
|
||||
@@ -217,4 +217,4 @@ pip install opencv-python-headless Pillow imagehash
|
||||
- If the port is in use by another application, you may change the port number in VLC's settings.
|
||||
|
||||
## GIMP
|
||||
Click on the "Keep" of the image loading pop-up.
|
||||
Click on the "Keep" of the image loading pop-up.
|
||||
|
||||
@@ -43,6 +43,7 @@ from .docs import (
|
||||
compare_highlighted_text,
|
||||
is_first_line_centered,
|
||||
check_file_exists,
|
||||
check_tabstops,
|
||||
compare_contains_image
|
||||
)
|
||||
from .general import (
|
||||
|
||||
@@ -6,11 +6,13 @@ import zipfile
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from docx import Document
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT, WD_TAB_ALIGNMENT
|
||||
from docx.shared import RGBColor
|
||||
from odf.opendocument import load
|
||||
from odf.text import P
|
||||
from odf.text import Span
|
||||
from skimage.color import deltaE_ciede2000
|
||||
from skimage.color import rgb2lab
|
||||
|
||||
logger = logging.getLogger("desktopenv.metric.docs")
|
||||
|
||||
@@ -141,7 +143,7 @@ def compare_docx_tables(docx_file1, docx_file2):
|
||||
# Compare each cell
|
||||
for i in range(len(table1.rows)):
|
||||
for j in range(len(table1.columns)):
|
||||
if table1.cell(i, j).text != table2.cell(i, j).text:
|
||||
if table1.cell(i, j).text.strip() != table2.cell(i, j).text.strip():
|
||||
return 0
|
||||
|
||||
return 1
|
||||
@@ -234,6 +236,40 @@ def check_file_exists(directory, filename):
|
||||
return 1 if os.path.isfile(file_path) else 0
|
||||
|
||||
|
||||
def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
|
||||
doc1: Document = Document(docx_file1)
|
||||
doc2: Document = Document(docx_file2)
|
||||
para1 = [p for p in doc1.paragraphs if p.text.strip()]
|
||||
para2 = [p for p in doc2.paragraphs if p.text.strip()]
|
||||
if len(para1) != len(para2): return .0
|
||||
|
||||
if kwargs.get('word_number_split_by_tabstop', None) is not None:
|
||||
number = kwargs['word_number_split_by_tabstop']
|
||||
index = kwargs.get('index', 0)
|
||||
for p1 in para1:
|
||||
splits = p1.text.split('\t')
|
||||
if len(splits) == 0: return .0
|
||||
words = list(filter(lambda x: x.strip(), re.split(r'\s', splits[index])))
|
||||
if len(words) != number: return .0
|
||||
|
||||
section = doc2.sections[0]
|
||||
paragraph_width = section.page_width - section.left_margin - section.right_margin
|
||||
ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
|
||||
minus = .0
|
||||
for p1, p2 in zip(para1, para2):
|
||||
# filter CLEAR tabstop and default left-0 tabstop
|
||||
tabs1 = [tst for tst in p1.paragraph_format.tab_stops if not ignore_tabs(tst)]
|
||||
tabs2 = [tst for tst in p2.paragraph_format.tab_stops if not ignore_tabs(tst)]
|
||||
if len(tabs1) != len(tabs2): return .0
|
||||
difference = .0
|
||||
for t1, t2 in zip(tabs1, tabs2):
|
||||
if t1.alignment != t2.alignment: return .0
|
||||
difference += abs(t1.position - t2.position)
|
||||
minus += difference / paragraph_width
|
||||
score = 1 - (minus / len(para1))
|
||||
return score
|
||||
|
||||
|
||||
def compare_contains_image(docx_file1, docx_file2):
|
||||
doc1 = Document(docx_file1)
|
||||
doc2 = Document(docx_file2)
|
||||
@@ -258,10 +294,18 @@ def compare_contains_image(docx_file1, docx_file2):
|
||||
# print(find_default_font("Ani", config_path))
|
||||
|
||||
|
||||
def evaluate_colored_words_in_tables(file_path1, file_path2):
|
||||
def evaluate_colored_words_in_tables(file_path1, file_path2, **kwargs):
|
||||
if not compare_docx_files(file_path1, file_path2):
|
||||
return 0
|
||||
document = Document(file_path1)
|
||||
threshold = kwargs.get('threshold', 3.5)
|
||||
|
||||
def _calculate_color_difference(rgb1, rgb2):
|
||||
srgb1 = [rgb1[0] / 255.0, rgb1[1] / 255.0, rgb1[2] / 255.0]
|
||||
srgb2 = [rgb2[0] / 255.0, rgb2[1] / 255.0, rgb2[2] / 255.0]
|
||||
lab1, lab2 = rgb2lab(srgb1), rgb2lab(srgb2)
|
||||
delta_e = deltaE_ciede2000(lab1, lab2)
|
||||
return delta_e
|
||||
|
||||
for table in document.tables:
|
||||
# Iterate through rows and cells in the table
|
||||
@@ -273,9 +317,9 @@ def evaluate_colored_words_in_tables(file_path1, file_path2):
|
||||
if word:
|
||||
first_letter = word[0].lower()
|
||||
|
||||
if first_letter in 'aeiou' and run.font.color.rgb != RGBColor(255, 0, 0):
|
||||
if first_letter in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(255, 0, 0)) > threshold:
|
||||
return 0 # Vowel-colored words should be red
|
||||
elif first_letter not in 'aeiou' and run.font.color.rgb != RGBColor(0, 0, 255):
|
||||
elif first_letter not in 'aeiou' and _calculate_color_difference(run.font.color.rgb, RGBColor(0, 0, 255)) > threshold:
|
||||
return 0 # Non-vowel-colored words should be blue
|
||||
|
||||
return 1 # All words in tables are correctly colored
|
||||
|
||||
@@ -2,7 +2,7 @@ import ctypes
|
||||
import os
|
||||
import platform
|
||||
import shlex
|
||||
import subprocess
|
||||
import subprocess, signal
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from typing import List, Dict, Tuple
|
||||
@@ -997,7 +997,7 @@ def start_recording():
|
||||
|
||||
start_command = f"ffmpeg -y -f x11grab -draw_mouse 1 -s {screen_width}x{screen_height} -i :0.0 -c:v libx264 -r 30 {recording_path}"
|
||||
|
||||
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
recording_process = subprocess.Popen(shlex.split(start_command), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
|
||||
return jsonify({'status': 'success', 'message': 'Started recording.'})
|
||||
|
||||
@@ -1009,10 +1009,8 @@ def end_recording():
|
||||
if not recording_process:
|
||||
return jsonify({'status': 'error', 'message': 'No recording in progress to stop.'}), 400
|
||||
|
||||
recording_process.terminate()
|
||||
recording_process.send_signal(signal.SIGINT)
|
||||
recording_process.wait()
|
||||
# return_code = recording_process.returncode
|
||||
output, error = recording_process.communicate()
|
||||
recording_process = None
|
||||
|
||||
# return recording video file
|
||||
|
||||
Reference in New Issue
Block a user