From 96e2c6ee343aff67df69bbe52553fd7e8231789b Mon Sep 17 00:00:00 2001 From: David Chang Date: Tue, 20 Feb 2024 17:10:42 +0800 Subject: [PATCH] ver Feb20th fixed server/main.py fixed several sheetcopilot tasks fixed several calc metrics, including float difference of sheet_data and conditional formatting recognition --- desktop_env/evaluators/metrics/table.py | 20 ++++-- desktop_env/evaluators/metrics/utils.py | 61 ++++++++++++++++--- desktop_env/server/main.py | 15 +++-- .../1954cced-e748-45c4-9c26-9855b97fbc5e.json | 8 +-- .../1d17d234-e39d-4ed7-b46f-4417922a4e7c.json | 6 +- .../1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json | 10 +-- .../21ab7b40-77c2-4ae6-8321-e00d3a086c73.json | 11 +--- 7 files changed, 91 insertions(+), 40 deletions(-) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index 873e808..e2c105c 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -117,7 +117,7 @@ def compare_table(result: str, expected: str = None, **options) -> float: pdworkbooke = None worksheete_names: List[str] = None - parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] = \ + parse_idx: Callable[[Union[str, int], BOOK, BOOK], Tuple[BOOK, str]] = \ functools.partial( _parse_sheet_idx, result_sheet_names=worksheetr_names, @@ -136,10 +136,15 @@ def compare_table(result: str, expected: str = None, **options) -> float: # Compare Sheet Data by Internal Value {{{ # # sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1" # sheet_idx1: as sheet_idx0 + # precision: int as number of decimal digits, default to 4 - sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke)) - sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke)) + error_limit: int = r.get("precision", 4) + sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke)).round(error_limit) + sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke)).round(error_limit) metric: bool = sheet1.equals(sheet2) + logger.debug("Sheet1: \n%s", str(sheet1)) + logger.debug("Sheet2: \n%s", str(sheet2)) + logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2)) logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric) # }}} Compare Sheet Data by Internal Value # @@ -187,8 +192,13 @@ def compare_table(result: str, expected: str = None, **options) -> float: # sheet_idx1: as sheet_idx0 # props: list of str indicating concerned styles, see utils._read_cell_style - styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r) - styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r) + sheet_idx1: Tuple[Book, str] = parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke) + book_name1: str = parse_idx(r["sheet_idx0"], result, expected)[0] + styles1: Dict[str, List[Any]] = load_xlsx_styles(*sheet_idx1, book_name1, **r) + + sheet_idx2: Tuple[Book, str] = parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke) + book_name2: str = parse_idx(r["sheet_idx1"], result, expected)[0] + styles2: Dict[str, List[Any]] = load_xlsx_styles(*sheet_idx2, book_name2, **r) # number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"] # number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"] metric: bool = styles1 == styles2 diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 6270ccf..55a6e8e 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -1,7 +1,7 @@ import logging import zipfile from typing import Any, TypeVar, Union, Iterable, Optional, Callable -from typing import Dict, List, Set, Match, Tuple +from typing import Dict, List, Set, Match, Tuple, Pattern from urllib.parse import urlparse, urlunparse import re import functools @@ -18,10 +18,10 @@ from openpyxl import Workbook from openpyxl.chart._chart import ChartBase from openpyxl.worksheet.worksheet import Worksheet from openpyxl.worksheet.filters import AutoFilter, SortState -from openpyxl.worksheet.cell_range import MultiCellRange +from openpyxl.worksheet.cell_range import MultiCellRange, CellRange from openpyxl.worksheet.dimensions import DimensionHolder from openpyxl.formatting.formatting import ConditionalFormattingList -from openpyxl.utils import coordinate_to_tuple +from openpyxl.utils import coordinate_to_tuple, get_column_letter from openpyxl.cell.cell import Cell from openpyxl.styles.differential import DifferentialStyle from openpyxl.pivot.table import TableDefinition as PivotTableDefinition @@ -255,7 +255,7 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s if "filter" in pivot_props: info["filter_fields"] = set(f.fld for f in pvt.pageFields) if "col_fields" in pivot_props: - info["col_fields"] = [f.x for f in pvt.colFields] + info["col_fields"] = [f.x-left_bias for f in pvt.colFields] if "row_fields" in pivot_props: info["row_fields"] = [f.x-left_bias for f in pvt.rowFields] if "data_fields" in pivot_props: @@ -347,22 +347,39 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different elif style_name=="font_size": return (diff_style or cell).font.size if cell.value is not None else None elif style_name=="fill_type": - return (diff_style or cell).fill.tagname + try: + return (diff_style or cell).fill.tagname + except: + return None elif style_name=="bgcolor": - return (diff_style or cell).fill.bgColor.rgb + try: + return (diff_style or cell).fill.bgColor.rgb + except: + return None elif style_name=="fgcolor": - return (diff_style or cell).fill.fgColor.rgb + try: + return (diff_style or cell).fill.fgColor.rgb + except: + return None elif style_name=="hyperlink": return cell.hyperlink or "" if cell.value is not None else None else: raise NotImplementedError("Unsupported Style: {:}".format(style_name)) -def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, List[Any]]: +_absolute_range_pattern: Pattern[str] = re.compile( r"""\$(?P[A-Z]{1,3})\$(?P\d+) # coord1 + (?:: + \$(?P[A-Z]{1,3})\$(?P\d+) # coord2 + )? + """ + , re.X + ) +def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]: # function load_xlsx_styles {{{ # """ Args: xlsx_file (Workbook): concerned excel book sheet_name (str): sheet name + book_name (str): book name options (Dict[str, List[str]): dick like {"props": list of str} giving the concerned styles @@ -398,10 +415,35 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[st active_cells: List[Cell] = [] if r.type == "expression": condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile() + logger.debug("Expression condition: %s", r.formula[0]) + + arguments: List[Any] = [] + absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0]) + for m in absolute_range_match: + logger.debug("Absolute ranges: %s", repr(m)) + if m[2] is None and m[3] is None: + arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1]))) + else: + arguments.append( [ read_cell_value( book_name, sheet_name + , coordinate="{:}{:}".format( get_column_letter(c[1]) + , c[0] + ) + )\ + for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells\ + ] + ) + logger.debug("Absolute range arguments: %s", repr(arguments)) + for rge in fmt.cells: for c in rge.cells: cell: Cell = worksheet.cell(row=c[0], column=c[1]) - if condition(str(cell.value)): + cell_value = read_cell_value( book_name, sheet_name + , coordinate="{:}{:d}".format( get_column_letter(c[1]) + , c[0] + ) + ) + if condition(cell_value, *arguments): + logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0]) active_cells.append(cell) else: raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type)) @@ -409,6 +451,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[st for c in active_cells: style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles] + logger.debug(".[%s].styles: %s", sheet_name, repr(style_dict)) return style_dict # }}} function load_xlsx_styles # diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py index e66ddd4..4d9763b 100644 --- a/desktop_env/server/main.py +++ b/desktop_env/server/main.py @@ -387,12 +387,15 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N index_base += MAXIMUN_COLUMN return xml_node else: - for i, ch in enumerate(node): - # HYPERPARAMETER - if i>=1025: - logger.warning("Max width reached") - break - xml_node.append(_create_atspi_node(ch, depth+1, flag)) + try: + for i, ch in enumerate(node): + # HYPERPARAMETER + if i>=1025: + logger.warning("Max width reached") + break + xml_node.append(_create_atspi_node(ch, depth+1, flag)) + except: + logger.warning("Error occurred during children traversing. Has Ignored. Node: %s", lxml.etree.tostring(xml_node, encoding="unicode")) return xml_node # }}} function _create_atspi_node # diff --git a/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json b/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json index ad419f2..2bdfc93 100644 --- a/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json +++ b/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json @@ -1,7 +1,7 @@ { "id": "1954cced-e748-45c4-9c26-9855b97fbc5e", "snapshot": "libreoffice_calc", - "instruction": "Create a Pivot Table in a new sheet to count how many times each \"Invoice No.\" appears.", + "instruction": "Create a Pivot Table in a new sheet (Sheet2) to count how many times each \"Invoice No.\" appears.", "source": "SheetCopilot@104", "config": [ { @@ -73,8 +73,8 @@ "rules": [ { "type": "pivot_table", - "sheet_idx0": 0, - "sheet_idx1": "EI0", + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", "pivot_props": [ "col_fields", "filter", @@ -85,4 +85,4 @@ ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json b/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json index fa158d2..ed6b20a 100644 --- a/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json +++ b/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json @@ -73,10 +73,10 @@ "rules": [ { "type": "sheet_data", - "sheet_idx0": 0, - "sheet_idx1": "EI0" + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2" } ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json b/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json index a724018..2407fd3 100644 --- a/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json +++ b/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json @@ -1,7 +1,7 @@ { "id": "1de60575-bb6e-4c3d-9e6a-2fa699f9f197", "snapshot": "libreoffice_calc", - "instruction": "Summarize the total revenue for each promotion type in a new sheet with the promotion names as the column headers.", + "instruction": "Summarize the total revenue for each promotion type in a new sheet (Sheet2) with the promotion names as the column headers.", "source": "SheetCopilot@55", "config": [ { @@ -54,7 +54,7 @@ { "type": "sleep", "parameters": { - "seconds": 0.5 + "seconds": 3.0 } } ], @@ -73,8 +73,8 @@ "rules": [ { "type": "pivot_table", - "sheet_idx0": 0, - "sheet_idx1": "EI0", + "sheet_idx0": "RNSheet2", + "sheet_idx1": "ENSheet2", "pivot_props": [ "col_fields", "filter", @@ -85,4 +85,4 @@ ] } } -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json b/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json index 0040efa..b4e36fe 100644 --- a/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json +++ b/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json @@ -1,7 +1,7 @@ { "id": "21ab7b40-77c2-4ae6-8321-e00d3a086c73", "snapshot": "libreoffice_calc", - "instruction": "Please calculate the period rate for my data in a new column with header \"Period Rate (%)\", convert the results as number type, and highlight the highest result as green.", + "instruction": "Please calculate the period rate for my data in a new column with header \"Period Rate (%)\", convert the results as number type, and highlight the highest result with green (#00ff00) font.", "source": "SheetCopilot@124", "config": [ { @@ -81,15 +81,10 @@ "sheet_idx0": 0, "sheet_idx1": "EI0", "props": [ - "font_bold", - "font_color", - "bgcolor", - "font_name", - "font_italic", - "font_underline" + "font_color" ] } ] } } -} \ No newline at end of file +}