Merge branch 'main' of https://github.com/xlang-ai/DesktopEnv

2024-03-08 20:37:40 +08:00
parent 4070b41fbd 365c7798f1
commit 3761de4a05
23 changed files with 1160 additions and 89 deletions
--- a/desktop_env/evaluators/getters/init.py
+++ b/desktop_env/evaluators/getters/init.py
@@ -29,7 +29,7 @@ from .chrome import (
 from .file import get_cloud_file, get_vm_file, get_cache_file, get_content_from_vm_file
 from .general import get_vm_command_line, get_vm_terminal_output, get_vm_command_error
 from .gimp import get_gimp_config_file
-from .impress import get_audio_in_slide
+from .impress import get_audio_in_slide, get_background_image_in_slide
 from .info import get_vm_screen_size, get_vm_window_size, get_vm_wallpaper, get_list_directory
 from .misc import get_rule, get_accessibility_tree, get_rule_relativeTime
 from .replay import get_replay
--- a/desktop_env/evaluators/getters/chrome.py
+++ b/desktop_env/evaluators/getters/chrome.py
@@ -589,6 +589,10 @@ def get_active_url_from_accessTree(env, config):
    if len(elements) == 0:
        print("no elements found")
        return None
+    elif elements[-1].text is None:
+        print("no text found")
+        return None
+
    active_tab_url = config["goto_prefix"] + elements[0].text if "goto_prefix" in config.keys() else "https://" + \
                                                                                                     elements[0].text
    print("active tab url now: {}".format(active_tab_url))
--- a/desktop_env/evaluators/getters/impress.py
+++ b/desktop_env/evaluators/getters/impress.py
@@ -7,6 +7,67 @@ from typing import Dict
 from desktop_env.evaluators.getters.file import get_vm_file


+def get_background_image_in_slide(env, config: Dict[str, str]):
+    ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]
+    image_id, image_file_path = None, None
+
+    ppt_file_localhost_path = get_vm_file(env, {"path": ppt_file_path, "dest": os.path.split(ppt_file_path)[-1]})
+
+    with zipfile.ZipFile(ppt_file_localhost_path, 'r') as myzip:
+        slide1_xml_file = 'ppt/slides/slide{}.xml'.format(slide_index + 1)
+        # firstly, check whether the background image is used in the slide
+        if slide1_xml_file not in myzip.namelist(): return None
+        with myzip.open(slide1_xml_file) as f:
+            # Parse the XML tree from the relationships file
+            tree = ET.parse(f)
+            root = tree.getroot()
+            bg_tag = "{http://schemas.openxmlformats.org/presentationml/2006/main}bgPr"
+            image_tag = "{http://schemas.openxmlformats.org/drawingml/2006/main}blip"
+            attr_tag = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
+            for child in root.iter(bg_tag):
+                try:
+                    for element in child.iter(image_tag):
+                        image_id = element.attrib[attr_tag]
+                        break
+                except: pass
+                if image_id is not None: break
+            else: return None
+
+        # next, extract the background image from the slide
+        slide1_rels_file = 'ppt/slides/_rels/slide{}.xml.rels'.format(slide_index + 1)
+        if slide1_rels_file in myzip.namelist():
+            with myzip.open(slide1_rels_file) as f:
+                # Parse the XML tree from the relationships file
+                tree = ET.parse(f)
+                root = tree.getroot()
+                # Define the namespace used in the relationships file
+                namespaces = {'r': 'http://schemas.openxmlformats.org/package/2006/relationships'}
+                # Look for all relationship elements that have a type attribute for image
+                for rel in root.findall('r:Relationship', namespaces):
+                    # Check if the relationship is for an image file
+                    if 'image' in rel.attrib['Type'] and rel.attrib['Id'] == image_id:
+                        target = rel.attrib['Target']
+                        if target.startswith('..'):
+                            # Resolve the relative path to get the correct path within the zip file
+                            image_file_path = os.path.normpath(os.path.join('ppt/slides', target))
+                            # Replace backslashes with forward slashes for ZIP compatibility
+                            image_file_path = image_file_path.replace('\\', '/')
+                            tmpdirname = os.path.dirname(ppt_file_localhost_path)
+                            myzip.extract(image_file_path, tmpdirname)
+                            image_file_path = os.path.join(tmpdirname, image_file_path)
+                            return image_file_path
+                        else: # absolute path
+                            assert target.startswith("file://"), target
+                            image_file_path = target[7:]
+                        break
+    if image_file_path is None:
+        return None
+
+    else:
+        # Get the audio file from vm and return the file path in the host
+        return get_vm_file(env, {"path": image_file_path, "dest": dest})
+
+
 def get_audio_in_slide(env, config: Dict[str, str]):
    ppt_file_path, slide_index, dest = config["ppt_file_path"], int(config["slide_index"]), config["dest"]

--- a/desktop_env/evaluators/metrics/init.py
+++ b/desktop_env/evaluators/metrics/init.py
@@ -48,7 +48,8 @@ from .docs import (
    check_tabstops,
    compare_contains_image,
    compare_docx_images,
-    compare_image_text
+    compare_image_text,
+    compare_references
 )
 from .general import (
    check_csv,
--- a/desktop_env/evaluators/metrics/docs.py
+++ b/desktop_env/evaluators/metrics/docs.py
@@ -11,9 +11,9 @@ from docx.shared import RGBColor
 from odf.opendocument import load
 from odf.text import P
 from odf.text import Span
+from rapidfuzz import fuzz
 from skimage.color import deltaE_ciede2000
 from skimage.color import rgb2lab
-from rapidfuzz import fuzz

 logger = logging.getLogger("desktopenv.metric.docs")

@@ -173,9 +173,11 @@ def compare_docx_tables(docx_file1, docx_file2):

    return 1

+
 from io import BytesIO
 from PIL import Image

+
 def compare_docx_images(docx_file1, docx_file2):
    doc1 = Document(docx_file1)
    doc2 = Document(docx_file2)
@@ -187,7 +189,7 @@ def compare_docx_images(docx_file1, docx_file2):
                img_data = rel.target_part.blob
                images.append(BytesIO(img_data))
        return images
-    
+
    images1 = extract_images(doc1)
    images2 = extract_images(doc2)
    if len(images1) != len(images2):
@@ -197,8 +199,10 @@ def compare_docx_images(docx_file1, docx_file2):
            return 0
    return 1

+
 import pytesseract

+
 def compare_image_text(image_path, rule):
    img = Image.open(image_path)
    img_text = pytesseract.image_to_string(img)
@@ -207,6 +211,7 @@ def compare_image_text(image_path, rule):
    else:
        raise ValueError("Unsupported rule type")

+
 def compare_line_spacing(docx_file1, docx_file2):
    if not compare_docx_files(docx_file1, docx_file2):
        return 0
@@ -313,7 +318,7 @@ def check_tabstops(docx_file1, docx_file2, **kwargs) -> float:
    section = doc2.sections[0]
    paragraph_width = section.page_width - section.left_margin - section.right_margin
    ignore_tabs = lambda x: x.alignment == WD_TAB_ALIGNMENT.CLEAR or (
-                x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
+            x.alignment == WD_TAB_ALIGNMENT.LEFT and x.position == 0)
    minus = .0
    for p1, p2 in zip(para1, para2):
        # filter CLEAR tabstop and default left-0 tabstop
@@ -583,3 +588,95 @@ def compare_highlighted_text(file1, file2):
        return 1
    else:
        return 0
+
+
+def compare_references(file1, file2, **options):
+    reference_indicator = options.get('reference_indicator', 'References')
+    reference_base_result = options.get('reference_base_result', 0.5)
+
+    # Determine file types and load documents
+    if file1.endswith('.docx') and file2.endswith('.docx'):
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+        doc1_paragraphs = [p.text for p in doc1.paragraphs]
+        doc2_paragraphs = [p.text for p in doc2.paragraphs]
+    else:
+        # Unsupported file types or mismatch
+        print("Unsupported file types or mismatch between file types.")
+        return 0
+
+    # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
+    ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
+    ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
+
+    if ref1_idx == -1 and ref2_idx == -1:
+        return 1
+
+    if ref1_idx == -1 or ref2_idx == -1:
+        return 0
+
+    # split the reference section into reference items, and remove the empty string items
+    ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
+    ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
+
+    # Compare the references
+
+    if len(ref1) != len(ref2):
+        return 0
+
+    total_similarity = 0
+    for r1, r2 in zip(ref1, ref2):
+        # fuzzy match the references
+        similarity = fuzz.ratio(r1, r2) / 100.0
+        total_similarity += similarity
+
+    result = total_similarity / len(ref1)
+    if result >= reference_base_result:
+        return (result - reference_base_result) / (1 - reference_base_result)
+    else:
+        return 0
+
+
+def compare_answer(file1, file2, **options):
+    """This is a specific function to compare the """
+    # Determine file types and load documents
+    if file1.endswith('.docx') and file2.endswith('.docx'):
+        doc1 = Document(file1)
+        doc2 = Document(file2)
+        doc1_paragraphs = [p.text for p in doc1.paragraphs]
+        doc2_paragraphs = [p.text for p in doc2.paragraphs]
+    else:
+        # Unsupported file types or mismatch
+        print("Unsupported file types or mismatch between file types.")
+        return 0
+
+    # Find the references section in the paragraphs, find the idx of the last reference_indicator in the paragraph list
+    ref1_idx = doc1_paragraphs.index(reference_indicator) if reference_indicator in doc1_paragraphs else -1
+    ref2_idx = doc2_paragraphs.index(reference_indicator) if reference_indicator in doc2_paragraphs else -1
+
+    if ref1_idx == -1 and ref2_idx == -1:
+        return 1
+
+    if ref1_idx == -1 or ref2_idx == -1:
+        return 0
+
+    # split the reference section into reference items, and remove the empty string items
+    ref1 = [p for p in doc1_paragraphs[ref1_idx + 1:] if p.strip()]
+    ref2 = [p for p in doc2_paragraphs[ref2_idx + 1:] if p.strip()]
+
+    # Compare the references
+
+    if len(ref1) != len(ref2):
+        return 0
+
+    total_similarity = 0
+    for r1, r2 in zip(ref1, ref2):
+        # fuzzy match the references
+        similarity = fuzz.ratio(r1, r2) / 100.0
+        total_similarity += similarity
+
+    result = total_similarity / len(ref1)
+    if result >= reference_base_result:
+        return (result - reference_base_result) / (1 - reference_base_result)
+    else:
+        return 0
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -16,9 +16,9 @@ from openpyxl.utils import get_column_letter
 from openpyxl.worksheet.datavalidation import DataValidation
 from openpyxl.worksheet.worksheet import Worksheet

-from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
-from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
-                 , load_filters, load_pivot_tables
+from desktop_env.evaluators.metrics.utils import _match_value_to_rule, _read_cell_style, read_cell_value
+from desktop_env.evaluators.metrics.utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles \
+    , load_filters, load_pivot_tables
 from rapidfuzz import fuzz

 # from openpyxl.utils import coordinate_to_tuple
@@ -194,7 +194,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
            # sheet_idx1: as sheet_idx0
            # rules: list of dict, each dict is like
            #   { "range": ["A1:B6", "C2:E5"],
-            #     "type": "includes" | "includes_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
+            #     "type": "includes" | "included_by" | "fuzzy_match" | "exact_match", # 0 includes 1, 0 includes_by 1
            #     "threshold": 85, // for fuzzy match
            #     "ignore_case": true | false,
            #     "ignore_chars": " ()", # filtered out
@@ -232,9 +232,9 @@ def compare_table(result: str, expected: str = None, **options) -> float:
                            value2 = value2.lower()

                        if rl["type"]=="includes":
-                            metric: bool = value1 in value2
-                        elif rl["type"]=="includes_by":
                            metric: bool = value2 in value1
+                        elif rl["type"]=="included_by":
+                            metric: bool = value1 in value2
                        elif rl["type"]=="fuzzy_match":
                            metric: bool = fuzz.ratio(value1, value2) >= rl.get("threshold", 85.)
                        elif rl["type"]=="exact_match":
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -274,7 +274,8 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
    #  }}} function load_pivot_tables # 


-_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
+_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si", namespaces=_xlsx_ns_mapping)
+_shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx_ns_mapping)


 def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
@@ -285,7 +286,9 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
                with z_f.open("xl/sharedStrings.xml") as f:
                    shared_str_xml: _Element = lxml.etree.fromstring(f.read())
                    str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
-                    shared_strs: List[str] = [elm.text for elm in str_elements]
+                    shared_strs: List[str] = [ "".join(t.text for t in _shared_str_value_selector(elm))\
+                                           for elm in str_elements
+                                             ]
            except:
                logger.debug("Read shared strings error: %s", xlsx_file)

--- a/desktop_env/evaluators/metrics/vscode.py
+++ b/desktop_env/evaluators/metrics/vscode.py
@@ -2,6 +2,7 @@ import copy
 import importlib.util
 import json
 import sys
+import re
 from typing import Dict


@@ -86,6 +87,18 @@ def compare_text_file(actual: str, expected: str, **options) -> float:
    with open(expected) as f2:
        expected_text = f2.read()

+    ignore_blanks = options.get('ignore_blanks', False)
+    if ignore_blanks:
+        actual_text = re.sub(r'[\t\n]', ' ', actual_text).strip()
+        actual_text = re.sub(r'\s+', ' ', actual_text)
+        expected_text = re.sub(r'[\t\n]', ' ', expected_text).strip()
+        expected_text = re.sub(r'\s+', ' ', expected_text)
+
+    ignore_case = options.get('ignore_case', False)
+    if ignore_case:
+        actual_text = actual_text.lower()
+        expected_text = expected_text.lower()
+
    if actual_text == expected_text:
        return 1.0
    return 0.0