This commit is contained in:
tsuky_chen
2024-02-20 20:01:34 +08:00
69 changed files with 5330 additions and 56 deletions

View File

@@ -412,7 +412,7 @@ class SetupController:
except Exception as e:
if attempt < 14:
logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
time.sleep(1)
time.sleep(5)
else:
logger.error(f"Failed to connect after multiple attempts: {e}")
raise e
@@ -541,7 +541,7 @@ class SetupController:
except Exception as e:
if attempt < 14:
logger.error(f"Attempt {attempt + 1}: Failed to connect, retrying. Error: {e}")
time.sleep(1)
time.sleep(5)
else:
logger.error(f"Failed to connect after multiple attempts: {e}")
raise e
@@ -554,7 +554,10 @@ class SetupController:
if platform == 'googledrive':
url = 'https://drive.google.com/drive/my-drive'
page = context.new_page() # Create a new page (tab) within the existing context
page.goto(url)
try:
page.goto(url, timeout=60000)
except:
logger.warning("Opening %s exceeds time limit", url) # only for human test
logger.info(f"Opened new page: {url}")
settings = json.load(open(config['settings_file']))
email, password = settings['email'], settings['password']

View File

@@ -49,7 +49,8 @@ class DesktopEnv(gym.Env):
task_config: Dict[str, Any] = None,
tmp_dir: str = "tmp",
cache_dir: str = "cache",
screen_size: Tuple[int] = (1920, 1080)
screen_size: Tuple[int] = (1920, 1080),
headless: bool = False
):
"""
Args:
@@ -75,6 +76,7 @@ class DesktopEnv(gym.Env):
self.tmp_dir_base: str = tmp_dir
self.cache_dir_base: str = cache_dir
self.vm_screen_size = screen_size
self.headless = headless
os.makedirs(self.tmp_dir_base, exist_ok=True)
@@ -116,7 +118,8 @@ class DesktopEnv(gym.Env):
break
else:
logger.info("Starting VM...")
_execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm])
_execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm]) if not self.headless \
else _execute_command(["vmrun", "-T", "ws", "start", self.path_to_vm, "nogui"])
time.sleep(3)
except subprocess.CalledProcessError as e:
logger.error(f"Error executing command: {e.output.decode().strip()}")

View File

@@ -12,7 +12,11 @@ from .chrome import (
get_profile_name,
get_number_of_search_results,
get_googledrive_file,
get_active_tab_info
get_active_tab_info,
get_enable_do_not_track,
get_enable_enhanced_safety_browsing,
get_new_startup_page,
get_find_unpacked_extension_path
)
from .file import get_cloud_file, get_vm_file, get_cache_file
from .general import get_vm_command_line, get_vm_terminal_output

View File

@@ -363,6 +363,9 @@ def get_active_tab_info(env, config: Dict[str, str]):
break
browser.close()
print("active_tab_title: {}".format(active_tab_info.get('title', 'None')))
print("active_tab_url: {}".format(active_tab_info.get('url', 'None')))
print("active_tab_content: {}".format(active_tab_info.get('content', 'None')))
return active_tab_info
@@ -516,4 +519,133 @@ def get_googledrive_file(env, config: Dict[str, Any]) -> str:
else f"title = '{fp}' and trashed = false" for jdx, fp in enumerate(path)]
dest = config['dest'][idx]
_path_list.append(get_single_file(query, os.path.join(env.cache_dir, dest)))
return _path_list
return _path_list
def get_enable_do_not_track(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
# preference_file_path = env.controller.execute_python_command(
# "import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
# 'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
if_enable_do_not_track = data.get('enable_do_not_track', {}) # bool
return "true" if if_enable_do_not_track else "false"
except Exception as e:
logger.error(f"Error: {e}")
return "false"
def get_enable_enhanced_safety_browsing(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
# preference_file_path = env.controller.execute_python_command(
# "import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
# 'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
if_enable_do_not_track = data.get('safebrowsing', {}).get('enhanced', {}) # bool
return "true" if if_enable_do_not_track else "false"
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
def get_new_startup_page(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
# preference_file_path = env.controller.execute_python_command(
# "import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
# 'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# if data has no key called 'session', it means the chrome is on a fresh-start mode, which is a true state;
# otherwise, try to find the code number in 'restored_on_startup' in 'session'
if "session" not in data.keys():
return "true"
else:
if_enable_do_not_track = data.get('session', {}).get('restore_on_startup', {}) # int, need to be 5
return "true" if if_enable_do_not_track == 5 else "false"
except Exception as e:
logger.error(f"Error: {e}")
return "Google"
def get_find_unpacked_extension_path(env, config: Dict[str, str]):
os_type = env.vm_platform
if os_type == 'Windows':
preference_file_path = env.controller.execute_python_command("""import os; print(os.path.join(os.getenv('LOCALAPPDATA'),
'Google\\Chrome\\User Data\\Default\\Preferences'))""")['output'].strip()
elif os_type == 'Darwin':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), 'Library/Application Support/Google/Chrome/Default/Preferences'))")[
'output'].strip()
elif os_type == 'Linux':
preference_file_path = env.controller.execute_python_command(
"import os; print(os.path.join(os.getenv('HOME'), '.config/google-chrome/Default/Preferences'))")[
'output'].strip()
# preference_file_path = env.controller.execute_python_command(
# "import os; print(os.path.join(os.getenv('HOME'), 'snap/chromium/common/chromium/Default/Preferences'))")[
# 'output'].strip()
else:
raise Exception('Unsupported operating system')
try:
content = env.controller.get_file(preference_file_path)
data = json.loads(content)
# Preferences store all the path of installed extensions, return them all and let metrics try to find one matches the targeted extension path
all_extensions_path = []
all_extensions = data.get('extensions', {}).get('settings', {})
for id in all_extensions.keys():
path = all_extensions[id]["path"]
all_extensions_path.append(path)
return all_extensions_path
except Exception as e:
logger.error(f"Error: {e}")
return "Google"

View File

@@ -15,7 +15,8 @@ from .chrome import (
check_font_size,
check_enabled_experiments,
check_history_deleted,
is_expected_search_query
is_expected_search_query,
is_expected_active_tab
)
from .docs import (
compare_font_names,
@@ -51,6 +52,7 @@ from .general import (
check_json,
check_list,
exact_match,
is_in_list,
fuzzy_match,
check_include_exclude
)

View File

@@ -9,6 +9,22 @@ from desktop_env.evaluators.metrics.utils import are_lists_equal, compare_urls
logger = logging.getLogger("desktopenv.metrics.chrome")
def is_expected_active_tab(active_tab_info: Dict[str, str], rule: Dict[str, Any]) -> float:
"""
Checks if the expected active tab is open in Chrome.
"""
match_type = rule['type']
if match_type == "url":
expected_url = rule['url']
actual_url = active_tab_info['url']
print("expected_url: {}".format(expected_url))
print("actual_url: {}".format(actual_url))
return 1 if compare_urls(expected_url, actual_url) else 0
else:
logger.error(f"Unknown type: {match_type}")
return 0
def is_expected_tabs(open_tabs: List[Dict[str, str]], rule: Dict[str, Any]) -> float:
"""
Checks if the expected tabs are open in Chrome.

View File

@@ -38,7 +38,13 @@ def exact_match(result, rules) -> float:
else:
return 0.
def is_in_list(result, rules) -> float:
expect = rules["expected"]
if expect in result:
return 1.
else:
return 0.
def fuzzy_match(result, rules) -> float:
expect = rules["expected"]

View File

@@ -16,7 +16,8 @@ from openpyxl.worksheet.datavalidation import DataValidation
from openpyxl.worksheet.worksheet import Worksheet
from .utils import _match_value_to_rule, _read_cell_style, read_cell_value
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles
from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles\
, load_filters, load_pivot_tables
# from openpyxl.utils import coordinate_to_tuple
@@ -116,7 +117,7 @@ def compare_table(result: str, expected: str = None, **options) -> float:
pdworkbooke = None
worksheete_names: List[str] = None
parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] = \
parse_idx: Callable[[Union[str, int], BOOK, BOOK], Tuple[BOOK, str]] = \
functools.partial(
_parse_sheet_idx,
result_sheet_names=worksheetr_names,
@@ -135,10 +136,15 @@ def compare_table(result: str, expected: str = None, **options) -> float:
# Compare Sheet Data by Internal Value {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# precision: int as number of decimal digits, default to 4
sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke))
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke))
error_limit: int = r.get("precision", 4)
sheet1: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx0"], pdworkbookr, pdworkbooke)).round(error_limit)
sheet2: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx1"], pdworkbookr, pdworkbooke)).round(error_limit)
metric: bool = sheet1.equals(sheet2)
logger.debug("Sheet1: \n%s", str(sheet1))
logger.debug("Sheet2: \n%s", str(sheet2))
logger.debug("Sheet1 =v= Sheet2: \n%s", str(sheet1==sheet2))
logger.debug("Assertion: %s =v= %s - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Sheet Data by Internal Value #
@@ -186,8 +192,13 @@ def compare_table(result: str, expected: str = None, **options) -> float:
# sheet_idx1: as sheet_idx0
# props: list of str indicating concerned styles, see utils._read_cell_style
styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
sheet_idx1: Tuple[Book, str] = parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)
book_name1: str = parse_idx(r["sheet_idx0"], result, expected)[0]
styles1: Dict[str, List[Any]] = load_xlsx_styles(*sheet_idx1, book_name1, **r)
sheet_idx2: Tuple[Book, str] = parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)
book_name2: str = parse_idx(r["sheet_idx1"], result, expected)[0]
styles2: Dict[str, List[Any]] = load_xlsx_styles(*sheet_idx2, book_name2, **r)
# number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
# number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"]
metric: bool = styles1 == styles2
@@ -303,6 +314,29 @@ def compare_table(result: str, expected: str = None, **options) -> float:
logger.debug("Assertion: %s[cols] == %s[cols] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Check Row Properties #
elif r["type"] == "filter":
# Compare Filters {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
filters1: Dict[str, Any] = load_filters(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
filters2: Dict[str, Any] = load_filters(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = filters1==filters2
logger.debug("Assertion: %s[filter] == %s[filter] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Filters #
elif r["type"] == "pivot_table":
# Compare Pivot Tables {{{ #
# sheet_idx0: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"
# sheet_idx1: as sheet_idx0
# pivot_props: list of str, see utils.load_pivot_tables
pivots1: Dict[str, Any] = load_pivot_tables(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r)
pivots2: Dict[str, Any] = load_pivot_tables(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r)
metric: bool = pivots1==pivots2
logger.debug("Assertion: %s[pivot]==%s[pivot] - %s", r["sheet_idx0"], r["sheet_idx1"], metric)
# }}} Compare Pivot Tables #
elif r["type"] == "check_cell":
# Check Cell Properties {{{ #
# sheet_idx: 0 == "RI0" == "RNSheet1" | "EI0" == "ENSheet1"

View File

@@ -1,12 +1,13 @@
import logging
import zipfile
from typing import Any, TypeVar, Union, Iterable, Optional, Callable
from typing import Dict, List, Set, Match
from typing import Dict, List, Set, Match, Tuple, Pattern
from urllib.parse import urlparse, urlunparse
import re
import functools
import operator
import builtins
import itertools
import lxml.cssselect
import lxml.etree
@@ -16,12 +17,15 @@ from lxml.etree import _Element
from openpyxl import Workbook
from openpyxl.chart._chart import ChartBase
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.worksheet.cell_range import MultiCellRange
from openpyxl.worksheet.filters import AutoFilter, SortState
from openpyxl.worksheet.cell_range import MultiCellRange, CellRange
from openpyxl.worksheet.dimensions import DimensionHolder
from openpyxl.formatting.formatting import ConditionalFormattingList
#from openpyxl.utils import get_column_letter
from openpyxl.utils import coordinate_to_tuple, get_column_letter
from openpyxl.cell.cell import Cell
from openpyxl.styles.differential import DifferentialStyle
from openpyxl.pivot.table import TableDefinition as PivotTableDefinition
from openpyxl.pivot.cache import CacheSource as PivotCacheSource
import formulas
V = TypeVar("Value")
@@ -79,6 +83,7 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
# Available Chart Properties:
# title: str
# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
# legend: "b" | "tr" | "l" | "r" | "t"
# width: number
# height: number
# type: "scatterChart" | "lineChart" | "barChart"
@@ -103,7 +108,10 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
"""
# workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
worksheet: Worksheet = xlsx_file[sheet_name]
try:
worksheet: Worksheet = xlsx_file[sheet_name]
except KeyError:
return {}
charts: List[ChartBase] = worksheet._charts
chart_set: Dict[str, Any] = {}
@@ -133,7 +141,12 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
info: Dict[str, Any] = {}
if "title" in chart_props:
info["title"] = ch.title.tx.rich.p[0].r[0].t
try:
info["title"] = ch.title.tx.rich.p[0].r[0].t
except:
info["title"] = None
if "legend" in chart_props:
info["legend"] = ch.legend.position if ch.legend is not None else None
if "anchor" in chart_props:
info["anchor"] = [ch.anchor.editAs
, ch.anchor._from.col, ch.anchor.to.row
@@ -149,15 +162,114 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
info["direction"] = ch.barDir
if "xtitle" in chart_props:
info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
try:
info["xtitle"] = ch.x_axis.title.tx.rich.p[0].r[0].t
except:
info["xtitle"] = None
if "ytitle" in chart_props:
info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
try:
info["ytitle"] = ch.y_axis.title.tx.rich.p[0].r[0].t
except:
info["ytitle"] = None
if "ztitle" in chart_props:
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
try:
info["ztitle"] = ch.z_axis.title.tx.rich.p[0].r[0].t
except:
info["ztitle"] = None
chart_set[series] = info
logger.debug(".[%s].charts: %s", sheet_name, repr(chart_set))
return chart_set
# }}} function load_charts #
# Available Pivot Properties:
# name: str
# show_total, show_empty_row, show_empty_col, show_headers: bool
# location: str
# selection: if the concrete item selection should be checked, a list of set of tuple like (bool, index) will be returned; list will be returned instead of set if "ordered" is specified
# filter: if the filter fields should be checked; fields indices will be return in `filter_fields` item
# col_fields: indices
# row_fields: indices
# data_fields: list of str representations. the str representation is like "index;name;subtotal_type;show_data_as"; name is optional and is only returned when `data_fields_name` is specified in `pivot_props`
def load_pivot_tables(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
# function load_pivot_tables {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
options (Dict[str, List[str]]): dict like {"pivot_props": list of str}
giving the concerned pivot properties
Returns:
Dict[str, Any]: information of pivot tables, dict like
{
<str representing data source>: {
<str as property>: anything
}
}
"""
try:
worksheet: Worksheet = xlsx_file[sheet_name]
except KeyError:
return {}
pivots: List[PivotTableDefinition] = worksheet._pivots
pivot_set: Dict[str, Any] = {}
pivot_props: Set[str] = set(options.get("pivot_props", []))
for pvt in pivots:
raw_selection: List[List[tuple[Optional[bool], int]]] =\
[ [(itm.h, itm.x) for itm in f.items if itm.x is not None]\
for f in pvt.pivotFields
]
raw__selection: List[List[tuple[Optional[bool], int]]] = list(itertools.dropwhile(lambda r: len(r)==0, raw_selection))
left_bias = len(raw_selection)-len(raw__selection)
selection: List[List[tuple[Optional[bool], int]]] = list((itertools.dropwhile(lambda r: len(r)==0, reversed(raw__selection))))[::-1]
right_bias = len(raw__selection)-len(selection)
cache_source: PivotCacheSource = pvt.cache.cacheSource
cell_range1: str
cell_range2: str
cell_range1, cell_range2 = cache_source.worksheetSource.ref.split(":")
cell_range1: Tuple[int, int] = coordinate_to_tuple(cell_range1)
cell_range1 = (cell_range1[0], cell_range1[1]+left_bias)
cell_range2: Tuple[int, int] = coordinate_to_tuple(cell_range2)
cell_range2 = (cell_range2[0], cell_range2[1]-right_bias)
source: str = "{:};{:}:{:};{:}".format(cache_source.type, cell_range1, cell_range2, cache_source.worksheetSource.sheet)
info: Dict[str, Any] = {}
if "name" in pivot_props:
info["name"] = pvt.name
if "show_total" in pivot_props:
info["show_total"] = pvt.visualTotals
if "show_empty_row" in pivot_props:
info["show_empty_row"] = pvt.showEmptyRow
if "show_empty_col" in pivot_props:
info["show_empty_col"] = pvt.showEmptyCol
if "show_headers" in pivot_props:
info["show_headers"] = pvt.showHeaders
if "location" in pivot_props:
info["location"] = pvt.location
if "filter" in pivot_props or "selection" in pivot_props:
info["selection"] = selection if "ordered" in pivot_props else list(set(r) for r in selection)
if "filter" in pivot_props:
info["filter_fields"] = set(f.fld for f in pvt.pageFields)
if "col_fields" in pivot_props:
info["col_fields"] = [f.x-left_bias for f in pvt.colFields]
if "row_fields" in pivot_props:
info["row_fields"] = [f.x-left_bias for f in pvt.rowFields]
if "data_fields" in pivot_props:
info["data_fields"] = [ "{:d};{:};{:};{:}".format( f.fld-left_bias, f.name if "data_fields_name" in pivot_props else ""
, f.subtotal, f.showDataAs
)\
for f in pvt.dataFields
]
pivot_set[source] = info
logger.debug(".[%s].pivots: %s", sheet_name, repr(pivot_set))
return pivot_set
# }}} function load_pivot_tables #
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ #
@@ -210,9 +322,12 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# font_color - in aRGB, e.g., FF000000 is black
# font_bold - bool
# font_italic - bool
# font_underline - "single" | "double" | "singleAccounting" | "doubleAccounting"
# font_size - float
# fill_type - "patternFill" | "gradientFill"
# bgcolor - in aRGB, e.g., FFFF0000 is red
# fgcolor - in aRGB, e.g., FF00FFFF is yellow
# hyperlink - str
def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[DifferentialStyle] = None) -> Any:
if style_name=="number_format":
return (cell.number_format if diff_style is None else diff_style.numFmt.formatCode)\
@@ -227,21 +342,44 @@ def _read_cell_style(style_name: str, cell: Cell, diff_style: Optional[Different
return (diff_style or cell).font.bold if cell.value is not None else None
elif style_name=="font_italic":
return (diff_style or cell).font.italic if cell.value is not None else None
elif style_name=="font_underline":
return (diff_style or cell).font.underline if cell.value is not None else None
elif style_name=="font_size":
return (diff_style or cell).font.size if cell.value is not None else None
elif style_name=="fill_type":
return (diff_style or cell).fill.tagname
try:
return (diff_style or cell).fill.tagname
except:
return None
elif style_name=="bgcolor":
return (diff_style or cell).fill.bgColor.rgb
try:
return (diff_style or cell).fill.bgColor.rgb
except:
return None
elif style_name=="fgcolor":
return (diff_style or cell).fill.fgColor.rgb
try:
return (diff_style or cell).fill.fgColor.rgb
except:
return None
elif style_name=="hyperlink":
return cell.hyperlink or "" if cell.value is not None else None
else:
raise NotImplementedError("Unsupported Style: {:}".format(style_name))
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, List[Any]]:
_absolute_range_pattern: Pattern[str] = re.compile( r"""\$(?P<col1>[A-Z]{1,3})\$(?P<row1>\d+) # coord1
(?::
\$(?P<col2>[A-Z]{1,3})\$(?P<row2>\d+) # coord2
)?
"""
, re.X
)
def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **options) -> Dict[str, List[Any]]:
# function load_xlsx_styles {{{ #
"""
Args:
xlsx_file (Workbook): concerned excel book
sheet_name (str): sheet name
book_name (str): book name
options (Dict[str, List[str]): dick like {"props": list of str} giving
the concerned styles
@@ -253,7 +391,10 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[st
}
"""
worksheet: Worksheet = xlsx_file[sheet_name]
try:
worksheet: Worksheet = xlsx_file[sheet_name]
except KeyError:
return {}
style_dict: Dict[str, List[Any]] = {}
concerned_styles: List[str] = options.get("props", [])
@@ -274,10 +415,35 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[st
active_cells: List[Cell] = []
if r.type == "expression":
condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile()
logger.debug("Expression condition: %s", r.formula[0])
arguments: List[Any] = []
absolute_range_match: List[Tuple[str, str, str, str]] = _absolute_range_pattern.findall(r.formula[0])
for m in absolute_range_match:
logger.debug("Absolute ranges: %s", repr(m))
if m[2] is None and m[3] is None:
arguments.append(read_cell_value(book_name, sheet_name, coordinate="{:}{:}".format(m[0], m[1])))
else:
arguments.append( [ read_cell_value( book_name, sheet_name
, coordinate="{:}{:}".format( get_column_letter(c[1])
, c[0]
)
)\
for c in CellRange("{:}{:}:{:}{:}".format(m[0], m[1], m[2], m[3])).cells\
]
)
logger.debug("Absolute range arguments: %s", repr(arguments))
for rge in fmt.cells:
for c in rge.cells:
cell: Cell = worksheet.cell(row=c[0], column=c[1])
if condition(str(cell.value)):
cell_value = read_cell_value( book_name, sheet_name
, coordinate="{:}{:d}".format( get_column_letter(c[1])
, c[0]
)
)
if condition(cell_value, *arguments):
logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0])
active_cells.append(cell)
else:
raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type))
@@ -285,6 +451,7 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[st
for c in active_cells:
style_dict[c.coordinate] = [_read_cell_style(st, c, r.dxf) for st in concerned_styles]
logger.debug(".[%s].styles: %s", sheet_name, repr(style_dict))
return style_dict
# }}} function load_xlsx_styles #
@@ -315,7 +482,10 @@ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
Dict[Union[int, str], Dict[str, Any]]: row/column information
"""
worksheet: Worksheet = xlsx_file[sheet_name]
try:
worksheet: Worksheet = xlsx_file[sheet_name]
except KeyError:
return {}
objs: DimensionHolder = getattr(worksheet, "{:}_dimensions".format(options["obj"]))
obj_set: Dict[int, Any] = {}
@@ -328,6 +498,64 @@ def load_rows_or_cols(xlsx_file: Workbook, sheet_name: str, **options)\
return obj_set
# }}} function load_rows_or_cols #
def load_filters(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, Any]:
# function load_filters {{{ #
try:
worksheet: Worksheet = xlsx_file[sheet_name]
except KeyError:
return {}
filters: AutoFilter = worksheet.auto_filter
filter_dict: Dict[str, Any] = {}
filter_dict["ref"] = filters.ref
# filterColumn
filter_column_set: List[Dict[str, Any]] = []
for flt_clm in filters.filterColumn:
filter_column: Dict[str, Any] = {}
filter_column["col_id"] = flt_clm.colId
filter_column["hidden_button"] = flt_clm.hiddenButton
filter_column["show_button"] = flt_clm.showButton
if flt_clm.filters is not None:
filter_column["filters_blank"] = flt_clm.filters.blank
filter_column["filters"] = set(flt_clm.filters.filter)
if flt_clm.customFilters is not None:
filter_column["custom_filters_op"] = flt_clm.customFilters._and
filter_column["custom_filters"] = set( ( flt.operator
, flt.val
)\
for flt in flt_clm.customFilters.customFilter
)
filter_column_set.append(filter_column)
filter_column_set = list( sorted( filter_column_set
, key=(lambda d: d["col_id"])
)
)
filter_dict["filter_column"] = filter_column_set
# sortState
sort_state: Optional[SortState] = filters.sortState
if sort_state is not None:
sort_state_dict: Dict[str, Any] = {}
sort_state_dict["sort"] = sort_state.columnSort
sort_state_dict["case"] = sort_state.caseSensitive
sort_state_dict["method"] = sort_state.sortMethod
sort_state_dict["ref"] = sort_state.ref
sort_state_dict["condition"] = list( { "descending": cdt.descending
, "key": cdt.sortBy
, "ref": cdt.ref
, "custom_list": cdt.customList
, "dxf_id": cdt.dxfId
, "icon": cdt.iconSet
, "iconid": cdt.iconId
}\
for cdt in sort_state.sortCondition
)
filter_dict["sort_state"] = sort_state_dict
return filter_dict
# }}} function load_filters #
def _match_record(pattern: Dict[str, Any], item: Dict[str, Any]) -> bool:
return all(k in item and item[k] == val for k, val in pattern.items())
@@ -431,10 +659,12 @@ def compare_urls(url1, url2):
if __name__ == "__main__":
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
path1 = "test.xlsx"
#path1 = "../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold.xlsx"
path1 = "../../任务集/SheetCopilot/dataset/task_sheet_answers_v2/BoomerangSales/2_BoomerangSales/2_BoomerangSales_gt1.xlsx"
workbook1: Workbook = openpyxl.load_workbook(filename=path1)
worksheet1: Worksheet = workbook1.active
charts: List[ChartBase] = worksheet1._charts
#charts: List[ChartBase] = worksheet1._charts
# print(len(charts))
# print(type(charts[0]))
#
@@ -466,4 +696,74 @@ if __name__ == "__main__":
# df1 = pd.read_excel(path1)
# print(df1)
print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
#print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))
#print(type(worksheet1["A1"].hyperlink))
#print(worksheet1["A1"].hyperlink)
#print(worksheet1._charts[0].legend)
#print(worksheet1._charts[0].legend.position)
#for entr in worksheet1._charts[0].legend.legendEntry:
#print("Entr", entr.txPr.p[0].r[0].t)
#print(load_filters(workbook1, "工作表1"))
#print(worksheet1.auto_filter)
#for pvt in worksheet1._pivots:
##print(type(pvt))
##print(pvt)
#print(type(pvt.cache))
##print(pvt.cache)
#print(pvt.cache.cacheSource.type)
#print(pvt.cache.cacheSource.worksheetSource.ref)
#print(pvt.cache.cacheSource.worksheetSource.sheet)
#
#print(type(pvt.location))
#print(pvt.location)
#for f in pvt.pivotFields:
#print(type(f))
#print([(itm.h, itm.x) for itm in f.items])
##for f_itm in f.items:
##print(f_itm.n)
##print(f_itm.t)
##print(f_itm.h)
##print(f_itm.s)
##print(f_itm.sd)
##print(f_itm.f)
##print(f_itm.m)
##print(f_itm.c)
##print(f_itm.x)
##print(f_itm.d)
##print(f_itm.e)
##print(f.countASubtotal)
##print(f.countSubtotal)
##for f in pvt.dataFields:
##print(f.name)
##print(f.fld)
###print(f.baseField)
##print(f.subtotal)
##print(f.showDataAs)
##for f in pvt.rowFields:
##print(1, f.x)
##for f in pvt.rowItems:
##print(2, f.t, f.r, f.i, f.x)
##for f in pvt.colFields:
##print(3, f.x)
##for f in pvt.colItems:
##print(4, f.t, f.r, f.i, f.x)
#for f in pvt.pageFields:
#print(5, f.fld)
#for flt in pvt.filters:
#print(5, flt.fld)
#print(6, flt.mpFld)
#print(7, flt.type)
#print(8, flt.evalOrder)
#print(9, flt.id)
#print(10, flt.stringValue1)
#print(11, flt.stringValue2)
#print(load_charts(workbook1, "Sheet2", chart_props=["title", "type", "legend"]))
#print(load_filters(workbook1, "透视表_工作表1_1"))
#workbook1.save("test2.xlsx")
print( load_pivot_tables( workbook1, "Sheet2", pivot_props=[ "col_fields"
, "filter"
, "row_fields"
, "data_fields"
]
)
)

View File

@@ -387,12 +387,15 @@ def _create_atspi_node(node: Accessible, depth: int = 0, flag: Optional[str] = N
index_base += MAXIMUN_COLUMN
return xml_node
else:
for i, ch in enumerate(node):
# HYPERPARAMETER
if i>=1025:
logger.warning("Max width reached")
break
xml_node.append(_create_atspi_node(ch, depth+1, flag))
try:
for i, ch in enumerate(node):
# HYPERPARAMETER
if i>=1025:
logger.warning("Max width reached")
break
xml_node.append(_create_atspi_node(ch, depth+1, flag))
except:
logger.warning("Error occurred during children traversing. Has Ignored. Node: %s", lxml.etree.tostring(xml_node, encoding="unicode"))
return xml_node
# }}} function _create_atspi_node #