Update on Chrome examples; Refactor on logic of controlling

This commit is contained in:
Timothyxxx
2024-01-12 17:24:47 +08:00
parent 820579a5a2
commit 5a93a32958
17 changed files with 575 additions and 194 deletions

View File

@@ -1,26 +1,29 @@
import zipfile
import lxml.etree
import lxml.cssselect
from lxml.etree import _Element
import xmltodict
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.chart._chart import ChartBase
from typing import Dict, List, Set
from typing import Any
import logging
import zipfile
from typing import Any
from typing import Dict, List, Set
from urllib.parse import urlparse, urlunparse
import lxml.cssselect
import lxml.etree
import openpyxl
import xmltodict
from lxml.etree import _Element
from openpyxl import Workbook
from openpyxl.chart._chart import ChartBase
from openpyxl.worksheet.worksheet import Worksheet
logger = logging.getLogger("desktopenv.metrics.utils")
_xlsx_namespaces = [ ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_namespaces = [("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
]
_xlsx_ns_mapping = dict(_xlsx_namespaces)
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
_sparklines_selector = lxml.cssselect.CSSSelector("x14|sparkline", namespaces=_xlsx_ns_mapping)
#print(_sparklines_selector.css)
# print(_sparklines_selector.css)
def load_sparklines(xlsx_file: str) -> Dict[str, str]:
"""
This function modifies data_frame in-place
@@ -44,13 +47,14 @@ def load_sparklines(xlsx_file: str) -> Dict[str, str]:
sparklines_dict: Dict[str, str] = {}
for sp_l in sparklines:
sparkline_xml: str = lxml.etree.tostring(sp_l, encoding="unicode")
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse( sparkline_xml
, process_namespaces=True
, namespaces=_xlsx_ns_imapping
)
sparkline: Dict[str, Dict[str, str]] = xmltodict.parse(sparkline_xml
, process_namespaces=True
, namespaces=_xlsx_ns_imapping
)
sparklines_dict[sparkline["x14:sparkline"]["xm:sqref"]] = sparkline["x14:sparkline"]["xm:f"]
return sparklines_dict
# Available Chart Properties:
# title: str
# anchor: ["oneCell" | "twoCell" | "absolute", col0, row0, col1, row1]
@@ -70,7 +74,7 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
Dict[str, Any]: information of charts
"""
#workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
# workbook: Workbook = openpyxl.load_workbook(filename=xlsx_file)
worksheet: Worksheet = xlsx_file.active
charts: List[ChartBase] = worksheet._charts
@@ -79,22 +83,22 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
for ch in charts:
series: List[str] = []
for ser in ch.series:
value_num = ser.val.numRef.f\
if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f")\
else ""
value_str = ser.val.strRef.f\
if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f")\
else ""
categ_num = ser.cat.numRef.f\
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f")\
else ""
categ_str = ser.cat.strRef.f\
if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f")\
else ""
series.append( "{:},{:},{:},{:}".format( value_num, value_str
value_num = ser.val.numRef.f \
if hasattr(ser.val, "numRef") and hasattr(ser.val.numRef, "f") \
else ""
value_str = ser.val.strRef.f \
if hasattr(ser.val, "strRef") and hasattr(ser.val.strRef, "f") \
else ""
categ_num = ser.cat.numRef.f \
if hasattr(ser.cat, "numRef") and hasattr(ser.cat.numRef, "f") \
else ""
categ_str = ser.cat.strRef.f \
if hasattr(ser.cat, "strRef") and hasattr(ser.cat.strRef, "f") \
else ""
series.append("{:},{:},{:},{:}".format(value_num, value_str
, categ_num, categ_str
)
)
)
series: str = ";".join(series)
# TODO: maybe more aspects, like chart type
@@ -103,10 +107,10 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
if "title" in chart_props:
info["title"] = ch.title.tx.rich.p[0].r[0].t
if "anchor" in chart_props:
info["anchor"] = [ ch.anchor.editAs
, ch.anchor._from.col, ch.anchor.to.row
, ch.anchor.to.col, ch.anchor.to.row
]
info["anchor"] = [ch.anchor.editAs
, ch.anchor._from.col, ch.anchor.to.row
, ch.anchor.to.col, ch.anchor.to.row
]
if "width" in chart_props:
info["width"] = ch.width
if "height" in chart_props:
@@ -125,40 +129,83 @@ def load_charts(xlsx_file: Workbook, **options) -> Dict[str, Any]:
chart_set[series] = info
return chart_set
def are_lists_equal(list1, list2, comparison_func):
# First check if both lists have the same length
if len(list1) != len(list2):
return False
# Now make sure each element in one list has an equal element in the other list
for item1 in list1:
# Use the supplied function to test for an equal item
if not any(comparison_func(item1, item2) for item2 in list2):
return False
# If all items match, the lists are equal
return True
def compare_urls(url1, url2):
def normalize_url(url):
# Parse the URL
parsed_url = urlparse(url)
# If no scheme is present, assume 'http'
scheme = parsed_url.scheme if parsed_url.scheme else 'http'
# Lowercase the scheme and netloc, remove 'www.', and handle trailing slash
normalized_netloc = parsed_url.netloc.lower().replace("www.", "")
normalized_path = parsed_url.path if parsed_url.path != '/' else ''
# Reassemble the URL with normalized components
normalized_parsed_url = parsed_url._replace(scheme=scheme.lower(), netloc=normalized_netloc,
path=normalized_path)
normalized_url = urlunparse(normalized_parsed_url)
return normalized_url
# Normalize both URLs for comparison
norm_url1 = normalize_url(url1)
norm_url2 = normalize_url(url2)
# Compare the normalized URLs
return norm_url1 == norm_url2
if __name__ == "__main__":
path1 = "../../../../../任务数据/LibreOffice Calc/Create_column_charts_using_statistics_gold_line_scatter.xlsx"
workbook1: Workbook = openpyxl.load_workbook(filename=path1)
worksheet1: Worksheet = workbook1.active
charts: List[ChartBase] = worksheet1._charts
#print(len(charts))
#print(type(charts[0]))
#
#print(len(charts[0].series))
#print(type(charts[0].series[0]))
#print(type(charts[0].series[0].val))
# print(len(charts))
# print(type(charts[0]))
#
# print(len(charts[0].series))
# print(type(charts[0].series[0]))
# print(type(charts[0].series[0].val))
##print(charts[0].series[0].val)
#print(charts[0].series[0].val.numRef.f)
#
#print(type(charts[0].series[0].cat))
# print(charts[0].series[0].val.numRef.f)
#
# print(type(charts[0].series[0].cat))
##print(charts[0].series[0].cat)
#print(charts[0].series[0].cat.numRef)
#print(charts[0].series[0].cat.strRef)
#print(charts[0].series[0].cat.strRef.f)
# print(charts[0].series[0].cat.numRef)
# print(charts[0].series[0].cat.strRef)
# print(charts[0].series[0].cat.strRef.f)
#print(type(charts[0].title.tx.strRef))
#print(type(charts[0].title.tx.rich))
#print(type(charts[0].title.txPr))
#print(len(charts[0].title.tx.rich.p))
#print(len(charts[0].title.tx.rich.p[0].r))
#print(type(charts[0].title.tx.rich.p[0].r[0]))
#print(type(charts[0].title.tx.rich.p[0].r[0].t))
#print(charts[0].title.tx.rich.p[0].r[0].t)
# print(type(charts[0].title.tx.strRef))
# print(type(charts[0].title.tx.rich))
# print(type(charts[0].title.txPr))
# print(len(charts[0].title.tx.rich.p))
# print(len(charts[0].title.tx.rich.p[0].r))
# print(type(charts[0].title.tx.rich.p[0].r[0]))
# print(type(charts[0].title.tx.rich.p[0].r[0].t))
# print(charts[0].title.tx.rich.p[0].r[0].t)
#print(type(charts[0].anchor))
#print(charts[0].anchor.editAs)
#print(charts[0].anchor._from.col, charts[0].anchor.to.row)
#print(charts[0].anchor.to.col, charts[0].anchor.to.row)
# print(type(charts[0].anchor))
# print(charts[0].anchor.editAs)
# print(charts[0].anchor._from.col, charts[0].anchor.to.row)
# print(charts[0].anchor.to.col, charts[0].anchor.to.row)
#df1 = pd.read_excel(path1)
#print(df1)
# df1 = pd.read_excel(path1)
# print(df1)
print(load_charts(path1, chart_props=["title", "xtitle", "ytitle", "type"]))