From 96e2c6ee343aff67df69bbe52553fd7e8231789b Mon Sep 17 00:00:00 2001
From: David Chang <zdy004007@126.com>
Date: Tue, 20 Feb 2024 17:10:42 +0800
Subject: [PATCH] ver Feb20th

fixed server/main.py
fixed several sheetcopilot tasks
fixed several calc metrics, including float difference of sheet_data and
conditional formatting recognition
---
 desktop_env/evaluators/metrics/table.py       | 20 ++++--
 desktop_env/evaluators/metrics/utils.py       | 61 ++++++++++++++++---
 desktop_env/server/main.py                    | 15 +++--
 .../1954cced-e748-45c4-9c26-9855b97fbc5e.json |  8 +--
 .../1d17d234-e39d-4ed7-b46f-4417922a4e7c.json |  6 +-
 .../1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json | 10 +--
 .../21ab7b40-77c2-4ae6-8321-e00d3a086c73.json | 11 +---
 7 files changed, 91 insertions(+), 40 deletions(-)

diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py
index 873e808..e2c105c 100644
--- a/desktop_env/evaluators/metrics/table.py
+++ b/desktop_env/evaluators/metrics/table.py
@@ -117,7 +117,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
         pdworkbooke = None
         worksheete_names: List[str] = None
 
-    parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] = \
+    parse_idx: Callable[[Union[str, int], BOOK, BOOK], Tuple[BOOK, str]] = \
         functools.partial(
             _parse_sheet_idx,
             result_sheet_names=worksheetr_names,
@@ -136,10 +136,15 @@ def compare_table(result: str, expected: str = None, **options) -> float:
             #  Compare Sheet Data by Internal Value {{{ # 
             # sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
             # sheet_idx1: as sheet_idx0
+            # precision: int as number of decimal digits, default to 4
 
-            sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke))
-            sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
+            error_limit: int = r.get("precision", 4)
+            sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke)).round(error_limit)
+            sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke)).round(error_limit)
             metric: bool = sheet1.equals(sheet2)
+            logger.debug("Sheet1: \n%s", str(sheet1))
+            logger.debug("Sheet2: \n%s", str(sheet2))
+            logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2))
             logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
             #  }}} Compare Sheet Data by Internal Value # 
 
@@ -187,8 +192,13 @@ def compare_table(result: str, expected: str = None, **options) -> float:
             # sheet_idx1: as sheet_idx0
             # props: list of str indicating concerned styles, see utils._read_cell_style
 
-            styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
-            styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
+            sheet_idx1: Tuple[Book, str] = parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
+            book_name1: str = parse_idx(r["sheet_idx0"], result, expected)[0]
+            styles1: Dict[str, List[Any]] = load_xlsx_styles(*sheet_idx1, book_name1, **r)
+
+            sheet_idx2: Tuple[Book, str] = parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
+            book_name2: str = parse_idx(r["sheet_idx1"], result, expected)[0]
+            styles2: Dict[str, List[Any]] = load_xlsx_styles(*sheet_idx2, book_name2, **r)
             # number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
             # number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
             metric: bool = styles1 == styles2
diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py
index 6270ccf..55a6e8e 100644
--- a/desktop_env/evaluators/metrics/utils.py
+++ b/desktop_env/evaluators/metrics/utils.py
@@ -1,7 +1,7 @@
 import logging
 import zipfile
 from typing import Any, TypeVar, Union, Iterable, Optional, Callable
-from typing import Dict, List, Set, Match, Tuple
+from typing import Dict, List, Set, Match, Tuple, Pattern
 from urllib.parse import urlparse, urlunparse
 import re
 import functools
@@ -18,10 +18,10 @@ from openpyxl import Workbook
 from openpyxl.chart._chart import ChartBase
 from openpyxl.worksheet.worksheet import Worksheet
 from openpyxl.worksheet.filters import AutoFilter, SortState
-from openpyxl.worksheet.cell_range import MultiCellRange
+from openpyxl.worksheet.cell_range import MultiCellRange, CellRange
 from openpyxl.worksheet.dimensions import DimensionHolder
 from openpyxl.formatting.formatting import ConditionalFormattingList
-from openpyxl.utils import coordinate_to_tuple
+from openpyxl.utils import coordinate_to_tuple, get_column_letter
 from openpyxl.cell.cell import Cell
 from openpyxl.styles.differential import DifferentialStyle
 from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
@@ -255,7 +255,7 @@ def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[s
         if "filter" in pivot_props:
             info["filter_fields"] = set(f.fld for f in pvt.pageFields)
         if "col_fields" in pivot_props:
-            info["col_fields"] = [f.x for f in pvt.colFields]
+            info["col_fields"] = [f.x-left_bias for f in pvt.colFields]
         if "row_fields" in pivot_props:
             info["row_fields"] = [f.x-left_bias for f in pvt.rowFields]
         if "data_fields" in pivot_props:
@@ -347,22 +347,39 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different
     elif style_name=="font_size":
         return (diff_style or cell).font.size if cell.value is not None else None
     elif style_name=="fill_type":
-        return (diff_style or cell).fill.tagname
+        try:
+            return (diff_style or cell).fill.tagname
+        except:
+            return None
     elif style_name=="bgcolor":
-        return (diff_style or cell).fill.bgColor.rgb
+        try:
+            return (diff_style or cell).fill.bgColor.rgb
+        except:
+            return None
     elif style_name=="fgcolor":
-        return (diff_style or cell).fill.fgColor.rgb
+        try:
+            return (diff_style or cell).fill.fgColor.rgb
+        except:
+            return None
     elif style_name=="hyperlink":
         return cell.hyperlink or "" if cell.value is not None else None
     else:
         raise NotImplementedError("Unsupported Style: {:}".format(style_name))
 
-def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, List[Any]]:
+_absolute_range_pattern: Pattern[str] = re.compile( r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
+                                                        (?::
+                                                          \$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2
+                                                        )?
+                                                     """
+                                                  , re.X
+                                                  )
+def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]:
     #  function load_xlsx_styles {{{ # 
     """
     Args:
         xlsx_file (Workbook): concerned excel book
         sheet_name (str): sheet name
+        book_name (str): book name
         options (Dict[str, List[str]): dick like {"props": list of str} giving
           the concerned styles
 
@@ -398,10 +415,35 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[st
             active_cells: List[Cell] = []
             if r.type == "expression":
                 condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
+                logger.debug("Expression condition: %s", r.formula[0])
+
+                arguments: List[Any] = []
+                absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0])
+                for m in absolute_range_match:
+                    logger.debug("Absolute ranges: %s", repr(m))
+                    if m[2] is None and m[3] is None:
+                        arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1])))
+                    else:
+                        arguments.append( [ read_cell_value( book_name, sheet_name
+                                                           , coordinate="{:}{:}".format( get_column_letter(c[1])
+                                                                                       , c[0]
+                                                                                       )
+                                                           )\
+                                            for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells\
+                                          ]
+                                        )
+                logger.debug("Absolute range arguments: %s", repr(arguments))
+
                 for rge in fmt.cells:
                     for c in rge.cells:
                         cell: Cell = worksheet.cell(row=c[0], column=c[1])
-                        if condition(str(cell.value)):
+                        cell_value = read_cell_value( book_name, sheet_name
+                                                    , coordinate="{:}{:d}".format( get_column_letter(c[1])
+                                                                                 , c[0]
+                                                                                 )
+                                                    )
+                        if condition(cell_value, *arguments):
+                            logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
                             active_cells.append(cell)
             else:
                 raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
@@ -409,6 +451,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[st
             for c in active_cells:
                 style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
 
+    logger.debug(".[%s].styles: %s", sheet_name, repr(style_dict))
     return style_dict
     #  }}} function load_xlsx_styles # 
 
diff --git a/desktop_env/server/main.py b/desktop_env/server/main.py
index e66ddd4..4d9763b 100644
--- a/desktop_env/server/main.py
+++ b/desktop_env/server/main.py
@@ -387,12 +387,15 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N
             index_base += MAXIMUN_COLUMN
         return xml_node
     else:
-        for i, ch in enumerate(node):
-            # HYPERPARAMETER
-            if i>=1025:
-                logger.warning("Max width reached")
-                break
-            xml_node.append(_create_atspi_node(ch, depth+1, flag))
+        try:
+            for i, ch in enumerate(node):
+                # HYPERPARAMETER
+                if i>=1025:
+                    logger.warning("Max width reached")
+                    break
+                xml_node.append(_create_atspi_node(ch, depth+1, flag))
+        except:
+            logger.warning("Error occurred during children traversing. Has Ignored. Node: %s", lxml.etree.tostring(xml_node, encoding="unicode"))
         return xml_node
     #  }}} function _create_atspi_node # 
 
diff --git a/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json b/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json
index ad419f2..2bdfc93 100644
--- a/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json
+++ b/evaluation_examples/examples/sheetcopilot/1954cced-e748-45c4-9c26-9855b97fbc5e.json
@@ -1,7 +1,7 @@
 {
 	"id": "1954cced-e748-45c4-9c26-9855b97fbc5e",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Create a Pivot Table in a new sheet to count how many times each \"Invoice No.\" appears.",
+	"instruction": "Create a Pivot Table in a new sheet (Sheet2) to count how many times each \"Invoice No.\" appears.",
 	"source": "SheetCopilot@104",
 	"config": [
 		{
@@ -73,8 +73,8 @@
 			"rules": [
 				{
 					"type": "pivot_table",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
 					"pivot_props": [
 						"col_fields",
 						"filter",
@@ -85,4 +85,4 @@
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json b/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json
index fa158d2..ed6b20a 100644
--- a/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json
+++ b/evaluation_examples/examples/sheetcopilot/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json
@@ -73,10 +73,10 @@
 			"rules": [
 				{
 					"type": "sheet_data",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0"
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2"
 				}
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json b/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json
index a724018..2407fd3 100644
--- a/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json
+++ b/evaluation_examples/examples/sheetcopilot/1de60575-bb6e-4c3d-9e6a-2fa699f9f197.json
@@ -1,7 +1,7 @@
 {
 	"id": "1de60575-bb6e-4c3d-9e6a-2fa699f9f197",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Summarize the total revenue for each promotion type in a new sheet with the promotion names as the column headers.",
+	"instruction": "Summarize the total revenue for each promotion type in a new sheet (Sheet2) with the promotion names as the column headers.",
 	"source": "SheetCopilot@55",
 	"config": [
 		{
@@ -54,7 +54,7 @@
 			{
 				"type": "sleep",
 				"parameters": {
-					"seconds": 0.5
+					"seconds": 3.0
 				}
 			}
 		],
@@ -73,8 +73,8 @@
 			"rules": [
 				{
 					"type": "pivot_table",
-					"sheet_idx0": 0,
-					"sheet_idx1": "EI0",
+					"sheet_idx0": "RNSheet2",
+					"sheet_idx1": "ENSheet2",
 					"pivot_props": [
 						"col_fields",
 						"filter",
@@ -85,4 +85,4 @@
 			]
 		}
 	}
-}
\ No newline at end of file
+}
diff --git a/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json b/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json
index 0040efa..b4e36fe 100644
--- a/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json
+++ b/evaluation_examples/examples/sheetcopilot/21ab7b40-77c2-4ae6-8321-e00d3a086c73.json
@@ -1,7 +1,7 @@
 {
 	"id": "21ab7b40-77c2-4ae6-8321-e00d3a086c73",
 	"snapshot": "libreoffice_calc",
-	"instruction": "Please calculate the period rate for my data in a new column with header \"Period Rate (%)\", convert the results as number type, and highlight the highest result as green.",
+	"instruction": "Please calculate the period rate for my data in a new column with header \"Period Rate (%)\", convert the results as number type, and highlight the highest result with green (#00ff00) font.",
 	"source": "SheetCopilot@124",
 	"config": [
 		{
@@ -81,15 +81,10 @@
 					"sheet_idx0": 0,
 					"sheet_idx1": "EI0",
 					"props": [
-						"font_bold",
-						"font_color",
-						"bgcolor",
-						"font_name",
-						"font_italic",
-						"font_underline"
+						"font_color"
 					]
 				}
 			]
 		}
 	}
-}
\ No newline at end of file
+}