Merge remote-tracking branch 'upstream/main' into fix_chrome
This commit is contained in:
@@ -1,10 +1,13 @@
|
|||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
from typing import Dict, List, Set
|
from typing import Dict, List, Set
|
||||||
from typing import Optional, Any, Union
|
from typing import Optional, Any, Union
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import requests
|
import requests
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
|
logger = logging.getLogger("desktopenv.getter.file")
|
||||||
|
|
||||||
|
|
||||||
def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any:
|
def get_content_from_vm_file(env, config: Dict[str, Any]) -> Any:
|
||||||
"""
|
"""
|
||||||
@@ -101,16 +104,42 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option
|
|||||||
|
|
||||||
for i, (p, d) in enumerate(zip(paths, dests)):
|
for i, (p, d) in enumerate(zip(paths, dests)):
|
||||||
_path = os.path.join(env.cache_dir, d)
|
_path = os.path.join(env.cache_dir, d)
|
||||||
file = env.controller.get_file(p)
|
|
||||||
if file is None:
|
try:
|
||||||
|
# Try to get file from VM
|
||||||
|
file = env.controller.get_file(p)
|
||||||
|
if file is None:
|
||||||
|
logger.warning(f"Failed to get file from VM: {p}")
|
||||||
|
if i in gives:
|
||||||
|
cache_paths.append(None)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if i in gives:
|
||||||
|
cache_paths.append(_path)
|
||||||
|
|
||||||
|
# Write file with robust error handling
|
||||||
|
try:
|
||||||
|
# Ensure cache directory exists
|
||||||
|
os.makedirs(env.cache_dir, exist_ok=True)
|
||||||
|
|
||||||
|
with open(_path, "wb") as f:
|
||||||
|
f.write(file)
|
||||||
|
logger.info(f"Successfully saved file: {_path} ({len(file)} bytes)")
|
||||||
|
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"IO error writing file {_path}: {e}")
|
||||||
|
if i in gives:
|
||||||
|
cache_paths[-1] = None # Replace the path we just added with None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error writing file {_path}: {e}")
|
||||||
|
if i in gives:
|
||||||
|
cache_paths[-1] = None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing file {p}: {e}")
|
||||||
if i in gives:
|
if i in gives:
|
||||||
cache_paths.append(None)
|
cache_paths.append(None)
|
||||||
continue
|
|
||||||
|
|
||||||
if i in gives:
|
|
||||||
cache_paths.append(_path)
|
|
||||||
with open(_path, "wb") as f:
|
|
||||||
f.write(file)
|
|
||||||
return cache_paths[0] if len(cache_paths)==1 else cache_paths
|
return cache_paths[0] if len(cache_paths)==1 else cache_paths
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -298,34 +298,84 @@ def check_json(result: str, rules: Dict[str, List[Dict[str, Union[List[str], str
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if result is None:
|
if result is None:
|
||||||
|
logger.warning("Result file path is None, returning 0.0")
|
||||||
|
return 0.
|
||||||
|
|
||||||
|
# Check if file exists
|
||||||
|
if not os.path.exists(result):
|
||||||
|
logger.warning(f"Result file does not exist: {result}, returning 0.0")
|
||||||
|
return 0.
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(result, 'r', encoding='utf-8') as f:
|
||||||
|
if is_yaml:
|
||||||
|
try:
|
||||||
|
# Use SafeLoader instead of Loader for better security and error handling
|
||||||
|
result_data: Dict[str, Any] = yaml.safe_load(f)
|
||||||
|
if result_data is None:
|
||||||
|
logger.warning(f"YAML file {result} is empty or contains only null values, returning 0.0")
|
||||||
|
return 0.
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
logger.error(f"YAML parsing error in file {result}: {e}")
|
||||||
|
logger.error(f"File content might be corrupted or have invalid YAML syntax")
|
||||||
|
return 0.
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error parsing YAML file {result}: {e}")
|
||||||
|
return 0.
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
result_data: Dict[str, Any] = json.load(f)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.error(f"JSON parsing error in file {result}: {e}")
|
||||||
|
return 0.
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error parsing JSON file {result}: {e}")
|
||||||
|
return 0.
|
||||||
|
except IOError as e:
|
||||||
|
logger.error(f"IO error reading file {result}: {e}")
|
||||||
|
return 0.
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error reading file {result}: {e}")
|
||||||
return 0.
|
return 0.
|
||||||
with open(result) as f:
|
|
||||||
if is_yaml:
|
|
||||||
result: Dict[str, Any] = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
else:
|
|
||||||
result: Dict[str, Any] = json.load(f)
|
|
||||||
|
|
||||||
expect_rules = rules.get("expect", {})
|
expect_rules = rules.get("expect", {})
|
||||||
unexpect_rules = rules.get("unexpect", {})
|
unexpect_rules = rules.get("unexpect", {})
|
||||||
|
|
||||||
metric = True
|
metric = True
|
||||||
for r in expect_rules:
|
for r in expect_rules:
|
||||||
value = result
|
value = result_data
|
||||||
for k in r["key"]:
|
try:
|
||||||
try:
|
for k in r["key"]:
|
||||||
value = value[k]
|
try:
|
||||||
except KeyError:
|
value = value[k]
|
||||||
return 0.
|
except KeyError:
|
||||||
metric = metric and _match_value_to_rule(value, r)
|
logger.debug(f"Key '{k}' not found in result data, returning 0.0")
|
||||||
|
return 0.
|
||||||
|
except TypeError:
|
||||||
|
logger.debug(f"Cannot access key '{k}' - value is not a dictionary, returning 0.0")
|
||||||
|
return 0.
|
||||||
|
metric = metric and _match_value_to_rule(value, r)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing expect rule {r}: {e}")
|
||||||
|
return 0.
|
||||||
|
|
||||||
for r in unexpect_rules:
|
for r in unexpect_rules:
|
||||||
value = result
|
value = result_data
|
||||||
for k in r["key"]:
|
try:
|
||||||
try:
|
for k in r["key"]:
|
||||||
value = value[k]
|
try:
|
||||||
except KeyError:
|
value = value[k]
|
||||||
value = None
|
except KeyError:
|
||||||
break
|
value = None
|
||||||
metric = metric and not _match_value_to_rule(value, r)
|
break
|
||||||
|
except TypeError:
|
||||||
|
value = None
|
||||||
|
break
|
||||||
|
metric = metric and not _match_value_to_rule(value, r)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing unexpect rule {r}: {e}")
|
||||||
|
return 0.
|
||||||
|
|
||||||
return float(metric)
|
return float(metric)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -73,6 +73,9 @@ def check_image_stretch_and_center(modified_ppt, original_ppt):
|
|||||||
original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13]
|
original_slide_images = [shape for shape in original_slide.shapes if shape.shape_type == 13]
|
||||||
modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13]
|
modified_slide_images = [shape for shape in modified_slide.shapes if shape.shape_type == 13]
|
||||||
|
|
||||||
|
if not original_slide_images:
|
||||||
|
return 0.
|
||||||
|
|
||||||
the_image = original_slide_images[0]
|
the_image = original_slide_images[0]
|
||||||
|
|
||||||
the_modified_image = None
|
the_modified_image = None
|
||||||
@@ -395,12 +398,38 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
|||||||
table2 = shape2.table
|
table2 = shape2.table
|
||||||
if enable_debug:
|
if enable_debug:
|
||||||
debug_logger.debug(f" Shape {shape_idx} - Comparing TABLE with {len(table1.rows)} rows and {len(table1.columns)} columns")
|
debug_logger.debug(f" Shape {shape_idx} - Comparing TABLE with {len(table1.rows)} rows and {len(table1.columns)} columns")
|
||||||
|
debug_logger.debug(f" Shape {shape_idx} - Table2 has {len(table2.rows)} rows and {len(table2.columns)} columns")
|
||||||
|
|
||||||
|
# Check if tables have the same dimensions
|
||||||
|
if len(table1.rows) != len(table2.rows) or len(table1.columns) != len(table2.columns):
|
||||||
|
if enable_debug:
|
||||||
|
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Table dimensions differ:")
|
||||||
|
debug_logger.debug(f" Table1: {len(table1.rows)} rows x {len(table1.columns)} columns")
|
||||||
|
debug_logger.debug(f" Table2: {len(table2.rows)} rows x {len(table2.columns)} columns")
|
||||||
|
return 0
|
||||||
|
|
||||||
for row_idx in range(len(table1.rows)):
|
for row_idx in range(len(table1.rows)):
|
||||||
for col_idx in range(len(table1.columns)):
|
for col_idx in range(len(table1.columns)):
|
||||||
cell1 = table1.cell(row_idx, col_idx)
|
cell1 = table1.cell(row_idx, col_idx)
|
||||||
cell2 = table2.cell(row_idx, col_idx)
|
cell2 = table2.cell(row_idx, col_idx)
|
||||||
|
|
||||||
|
# Check if cells have the same number of paragraphs
|
||||||
|
if len(cell1.text_frame.paragraphs) != len(cell2.text_frame.paragraphs):
|
||||||
|
if enable_debug:
|
||||||
|
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}] - Different number of paragraphs:")
|
||||||
|
debug_logger.debug(f" Cell1 paragraphs: {len(cell1.text_frame.paragraphs)}")
|
||||||
|
debug_logger.debug(f" Cell2 paragraphs: {len(cell2.text_frame.paragraphs)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
for para_idx, (para1, para2) in enumerate(zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs)):
|
for para_idx, (para1, para2) in enumerate(zip(cell1.text_frame.paragraphs, cell2.text_frame.paragraphs)):
|
||||||
|
# Check if paragraphs have the same number of runs
|
||||||
|
if len(para1.runs) != len(para2.runs):
|
||||||
|
if enable_debug:
|
||||||
|
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} (TABLE) - Cell [{row_idx},{col_idx}], Para {para_idx} - Different number of runs:")
|
||||||
|
debug_logger.debug(f" Para1 runs: {len(para1.runs)}")
|
||||||
|
debug_logger.debug(f" Para2 runs: {len(para2.runs)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
for run_idx, (run1, run2) in enumerate(zip(para1.runs, para2.runs)):
|
for run_idx, (run1, run2) in enumerate(zip(para1.runs, para2.runs)):
|
||||||
# Check font color
|
# Check font color
|
||||||
if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"):
|
if hasattr(run1.font.color, "rgb") and hasattr(run2.font.color, "rgb"):
|
||||||
@@ -451,6 +480,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
|||||||
if shape1.text.strip() != shape2.text.strip() and examine_text:
|
if shape1.text.strip() != shape2.text.strip() and examine_text:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
# check if the number of paragraphs are the same
|
||||||
|
if len(shape1.text_frame.paragraphs) != len(shape2.text_frame.paragraphs):
|
||||||
|
if enable_debug:
|
||||||
|
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx} - Different number of paragraphs:")
|
||||||
|
debug_logger.debug(f" Shape1 paragraphs: {len(shape1.text_frame.paragraphs)}")
|
||||||
|
debug_logger.debug(f" Shape2 paragraphs: {len(shape2.text_frame.paragraphs)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
# check if the paragraphs are the same
|
# check if the paragraphs are the same
|
||||||
para_idx = 0
|
para_idx = 0
|
||||||
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
|
for para1, para2 in zip(shape1.text_frame.paragraphs, shape2.text_frame.paragraphs):
|
||||||
@@ -487,6 +524,14 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
|||||||
if para1.level != para2.level and examine_indent:
|
if para1.level != para2.level and examine_indent:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
# check if the number of runs are the same
|
||||||
|
if len(para1.runs) != len(para2.runs):
|
||||||
|
if enable_debug:
|
||||||
|
debug_logger.debug(f" MISMATCH: Slide {slide_idx}, Shape {shape_idx}, Para {para_idx} - Different number of runs:")
|
||||||
|
debug_logger.debug(f" Para1 runs: {len(para1.runs)}")
|
||||||
|
debug_logger.debug(f" Para2 runs: {len(para2.runs)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
for run1, run2 in zip(para1.runs, para2.runs):
|
for run1, run2 in zip(para1.runs, para2.runs):
|
||||||
|
|
||||||
# check if the font properties are the same
|
# check if the font properties are the same
|
||||||
@@ -634,6 +679,12 @@ def compare_pptx_files(file1_path, file2_path, **options):
|
|||||||
debug_logger.debug(f" MISMATCH: Text differs - '{tshape1.text.strip()}' vs '{tshape2.text.strip()}'")
|
debug_logger.debug(f" MISMATCH: Text differs - '{tshape1.text.strip()}' vs '{tshape2.text.strip()}'")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
# Check if text shapes have the same number of paragraphs
|
||||||
|
if len(tshape1.text_frame.paragraphs) != len(tshape2.text_frame.paragraphs):
|
||||||
|
if enable_debug:
|
||||||
|
debug_logger.debug(f" MISMATCH: Different number of paragraphs - {len(tshape1.text_frame.paragraphs)} vs {len(tshape2.text_frame.paragraphs)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
# Compare alignment of each paragraph
|
# Compare alignment of each paragraph
|
||||||
for para_idx, (para1, para2) in enumerate(zip(tshape1.text_frame.paragraphs, tshape2.text_frame.paragraphs)):
|
for para_idx, (para1, para2) in enumerate(zip(tshape1.text_frame.paragraphs, tshape2.text_frame.paragraphs)):
|
||||||
from pptx.enum.text import PP_ALIGN
|
from pptx.enum.text import PP_ALIGN
|
||||||
|
|||||||
@@ -36,8 +36,14 @@ def _parse_sheet_idx(sheet_idx: Union[int, str]
|
|||||||
# function _parse_sheet_idx {{{ #
|
# function _parse_sheet_idx {{{ #
|
||||||
if isinstance(sheet_idx, int):
|
if isinstance(sheet_idx, int):
|
||||||
try:
|
try:
|
||||||
index: str = result_sheet_names[sheet_idx]
|
if not result_sheet_names or sheet_idx >= len(result_sheet_names):
|
||||||
except:
|
logger.error(f"Sheet index {sheet_idx} out of range. Available sheets: {result_sheet_names}")
|
||||||
|
index = ""
|
||||||
|
else:
|
||||||
|
index: str = result_sheet_names[sheet_idx]
|
||||||
|
logger.debug(f"Sheet index {sheet_idx} resolved to sheet: {index}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error resolving sheet index {sheet_idx}: {e}")
|
||||||
index = ""
|
index = ""
|
||||||
book: BOOK = result
|
book: BOOK = result
|
||||||
elif sheet_idx.startswith("RI"):
|
elif sheet_idx.startswith("RI"):
|
||||||
@@ -114,12 +120,21 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if result is None:
|
if result is None:
|
||||||
|
logger.error("Result file path is None")
|
||||||
|
return 0.
|
||||||
|
|
||||||
|
# Check if result file exists
|
||||||
|
if not os.path.exists(result):
|
||||||
|
logger.error(f"Result file not found: {result}")
|
||||||
return 0.
|
return 0.
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
logger.info(f"Loading result file: {result}")
|
||||||
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
|
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
|
||||||
pdworkbookr = pd.ExcelFile(result)
|
pdworkbookr = pd.ExcelFile(result)
|
||||||
except:
|
logger.info(f"Successfully loaded result file with sheets: {pdworkbookr.sheet_names}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load result file {result}: {e}")
|
||||||
return 0.
|
return 0.
|
||||||
worksheetr_names: List[str] = pdworkbookr.sheet_names
|
worksheetr_names: List[str] = pdworkbookr.sheet_names
|
||||||
|
|
||||||
@@ -432,19 +447,35 @@ def compare_table(result: str, expected: str = None, **options) -> float:
|
|||||||
# props: dict like {attribute: {"method": str, "ref": anything}}
|
# props: dict like {attribute: {"method": str, "ref": anything}}
|
||||||
# supported attributes: value & those supported by utils._read_cell_style
|
# supported attributes: value & those supported by utils._read_cell_style
|
||||||
|
|
||||||
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
|
try:
|
||||||
if sheet is None:
|
sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke))
|
||||||
return 0.
|
if sheet is None:
|
||||||
# data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
|
logger.error(f"Failed to load sheet for sheet_idx: {r['sheet_idx']}")
|
||||||
cell: Cell = sheet[r["coordinate"]]
|
return 0.
|
||||||
metric: bool = True
|
# data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke))
|
||||||
for prpt, rule in r["props"].items():
|
cell: Cell = sheet[r["coordinate"]]
|
||||||
if prpt == "value":
|
metric: bool = True
|
||||||
val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"])
|
for prpt, rule in r["props"].items():
|
||||||
else:
|
if prpt == "value":
|
||||||
val = _read_cell_style(prpt, cell)
|
try:
|
||||||
|
parsed_result = parse_idx(r["sheet_idx"], result, expected)
|
||||||
|
logger.debug(f"parse_idx result: {parsed_result}")
|
||||||
|
val = read_cell_value(*parsed_result, r["coordinate"])
|
||||||
|
logger.debug(f"Cell {r['coordinate']} value: {val}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to read cell value at {r['coordinate']}: {e}")
|
||||||
|
val = None
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
val = _read_cell_style(prpt, cell)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to read cell style {prpt} at {r['coordinate']}: {e}")
|
||||||
|
val = None
|
||||||
|
|
||||||
metric = metric and _match_value_to_rule(val, rule)
|
metric = metric and _match_value_to_rule(val, rule)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in check_cell processing: {e}")
|
||||||
|
return 0.
|
||||||
|
|
||||||
logger.debug("Assertion: %s[%s] :%s - %s"
|
logger.debug("Assertion: %s[%s] :%s - %s"
|
||||||
, r["sheet_idx"], r["coordinate"]
|
, r["sheet_idx"], r["coordinate"]
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import functools
|
|||||||
import itertools
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import operator
|
import operator
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import zipfile
|
import zipfile
|
||||||
#import pandas as pd
|
#import pandas as pd
|
||||||
@@ -33,10 +34,11 @@ V = TypeVar("Value")
|
|||||||
|
|
||||||
logger = logging.getLogger("desktopenv.metrics.utils")
|
logger = logging.getLogger("desktopenv.metrics.utils")
|
||||||
|
|
||||||
_xlsx_namespaces = [("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main")
|
_xlsx_namespaces = [
|
||||||
, ("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main")
|
("oo", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"),
|
||||||
, ("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
("x14", "http://schemas.microsoft.com/office/spreadsheetml/2009/9/main"),
|
||||||
]
|
("xm", "http://schemas.microsoft.com/office/excel/2006/main")
|
||||||
|
]
|
||||||
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
_xlsx_ns_mapping = dict(_xlsx_namespaces)
|
||||||
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
_xlsx_ns_imapping = dict(map(lambda itm: (itm[1], itm[0]), _xlsx_namespaces))
|
||||||
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
|
_xlsx_ns_imapping["http://schemas.openxmlformats.org/spreadsheetml/2006/main"] = None
|
||||||
@@ -282,6 +284,13 @@ _shared_str_value_selector = lxml.cssselect.CSSSelector("oo|t", namespaces=_xlsx
|
|||||||
|
|
||||||
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
||||||
# read_cell_value {{{ #
|
# read_cell_value {{{ #
|
||||||
|
logger.debug(f"Reading cell value from {xlsx_file}, sheet: {sheet_name}, coordinate: {coordinate}")
|
||||||
|
|
||||||
|
# Check if file exists
|
||||||
|
if not os.path.exists(xlsx_file):
|
||||||
|
logger.error(f"Excel file not found: {xlsx_file}")
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
with zipfile.ZipFile(xlsx_file, "r") as z_f:
|
||||||
try:
|
try:
|
||||||
@@ -308,9 +317,17 @@ def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
|
|||||||
, namespaces=_xlsx_ns_mapping
|
, namespaces=_xlsx_ns_mapping
|
||||||
)(sheet)
|
)(sheet)
|
||||||
if len(cells) == 0:
|
if len(cells) == 0:
|
||||||
|
logger.debug(f"Cell {coordinate} not found in sheet {sheet_name}")
|
||||||
return None
|
return None
|
||||||
cell: _Element = cells[0]
|
cell: _Element = cells[0]
|
||||||
except zipfile.BadZipFile:
|
except zipfile.BadZipFile as e:
|
||||||
|
logger.error(f"Bad zip file {xlsx_file}: {e}")
|
||||||
|
return None
|
||||||
|
except KeyError as e:
|
||||||
|
logger.error(f"Sheet {sheet_name} not found in {xlsx_file}: {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error reading {xlsx_file}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")
|
cell: Dict[str, str] = xmltodict.parse(lxml.etree.tostring(cell, encoding="unicode")
|
||||||
|
|||||||
@@ -369,9 +369,10 @@ class AnthropicAgent:
|
|||||||
)
|
)
|
||||||
|
|
||||||
except (APIError, APIStatusError, APIResponseValidationError) as e:
|
except (APIError, APIStatusError, APIResponseValidationError) as e:
|
||||||
self.logger.exception(f"Anthropic API error: {str(e)}")
|
logger.exception(f"Anthropic API error: {str(e)}")
|
||||||
try:
|
try:
|
||||||
self.logger.warning("Retrying with backup API key...")
|
logger.warning("Retrying with backup API key...")
|
||||||
|
|
||||||
backup_client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY_BACKUP"), max_retries=4)
|
backup_client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY_BACKUP"), max_retries=4)
|
||||||
|
|
||||||
if self.model_name == "claude-3-7-sonnet-20250219" or self.model_name == "claude-4-opus-20250514" or self.model_name == "claude-4-sonnet-20250514":
|
if self.model_name == "claude-3-7-sonnet-20250219" or self.model_name == "claude-4-opus-20250514" or self.model_name == "claude-4-sonnet-20250514":
|
||||||
@@ -393,13 +394,13 @@ class AnthropicAgent:
|
|||||||
tools=tools,
|
tools=tools,
|
||||||
betas=betas,
|
betas=betas,
|
||||||
)
|
)
|
||||||
self.logger.info("Successfully used backup API key")
|
logger.info("Successfully used backup API key")
|
||||||
except Exception as backup_e:
|
except Exception as backup_e:
|
||||||
self.logger.exception(f"Backup API call also failed: {str(backup_e)}")
|
logger.exception(f"Backup API call also failed: {str(backup_e)}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.exception(f"Error in Anthropic API: {str(e)}")
|
logger.exception(f"Error in Anthropic API: {str(e)}")
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
response_params = _response_to_params(response)
|
response_params = _response_to_params(response)
|
||||||
@@ -434,9 +435,15 @@ class AnthropicAgent:
|
|||||||
actions = ["DONE"]
|
actions = ["DONE"]
|
||||||
return reasonings, actions
|
return reasonings, actions
|
||||||
|
|
||||||
def reset(self, *args, **kwargs):
|
def reset(self, _logger = None, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
Reset the agent's state.
|
Reset the agent's state.
|
||||||
"""
|
"""
|
||||||
|
global logger
|
||||||
|
if _logger:
|
||||||
|
logger = _logger
|
||||||
|
else:
|
||||||
|
logger = logging.getLogger("desktopenv.agent")
|
||||||
self.messages = []
|
self.messages = []
|
||||||
self.logger.info(f"{self.class_name} reset.")
|
logger.info(f"{self.class_name} reset.")
|
||||||
|
|
||||||
|
|||||||
@@ -11,4 +11,4 @@ MODEL_NAME=computer-use-preview
|
|||||||
MAX_STEPS=100
|
MAX_STEPS=100
|
||||||
FLASK_PORT=80
|
FLASK_PORT=80
|
||||||
FLASK_HOST=0.0.0.0
|
FLASK_HOST=0.0.0.0
|
||||||
FLASK_DEBUG=true
|
FLASK_DEBUG=false
|
||||||
134
monitor/main.py
134
monitor/main.py
@@ -1,14 +1,17 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from functools import cache
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
|
import subprocess
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from flask import Flask, render_template_string, jsonify, send_file, request, render_template
|
from flask import Flask, render_template_string, jsonify, send_file, request, render_template
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
@@ -38,12 +41,51 @@ OBSERVATION_TYPE=os.getenv("OBSERVATION_TYPE", "screenshot")
|
|||||||
MODEL_NAME=os.getenv("MODEL_NAME", "computer-use-preview")
|
MODEL_NAME=os.getenv("MODEL_NAME", "computer-use-preview")
|
||||||
MAX_STEPS = int(os.getenv("MAX_STEPS", "150"))
|
MAX_STEPS = int(os.getenv("MAX_STEPS", "150"))
|
||||||
|
|
||||||
|
def initialize_default_config():
|
||||||
|
"""Initialize default configuration from the first available config in results directory"""
|
||||||
|
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
|
||||||
|
|
||||||
|
if os.path.exists(RESULTS_BASE_PATH):
|
||||||
|
try:
|
||||||
|
# Scan for the first available configuration
|
||||||
|
for action_space in os.listdir(RESULTS_BASE_PATH):
|
||||||
|
action_space_path = os.path.join(RESULTS_BASE_PATH, action_space)
|
||||||
|
if os.path.isdir(action_space_path):
|
||||||
|
for obs_type in os.listdir(action_space_path):
|
||||||
|
obs_path = os.path.join(action_space_path, obs_type)
|
||||||
|
if os.path.isdir(obs_path):
|
||||||
|
for model_name in os.listdir(obs_path):
|
||||||
|
model_path = os.path.join(obs_path, model_name)
|
||||||
|
if os.path.isdir(model_path):
|
||||||
|
# Use the first available configuration as default
|
||||||
|
ACTION_SPACE = action_space
|
||||||
|
OBSERVATION_TYPE = obs_type
|
||||||
|
MODEL_NAME = model_name
|
||||||
|
RESULTS_PATH = model_path
|
||||||
|
print(f"Initialized default config: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error scanning results directory for default config: {e}")
|
||||||
|
|
||||||
|
# Fallback to original environment-based path if no configs found
|
||||||
|
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
|
print(f"Using fallback config from environment: {ACTION_SPACE}/{OBSERVATION_TYPE}/{MODEL_NAME}")
|
||||||
|
|
||||||
|
# Initialize default configuration
|
||||||
|
initialize_default_config()
|
||||||
|
|
||||||
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
|
|
||||||
|
if RESULTS_PATH not in TASK_STATUS_CACHE:
|
||||||
|
# Initialize cache for this results path
|
||||||
|
TASK_STATUS_CACHE[RESULTS_PATH] = {}
|
||||||
|
|
||||||
|
@cache
|
||||||
def load_task_list():
|
def load_task_list():
|
||||||
with open(TASK_CONFIG_PATH, 'r') as f:
|
with open(TASK_CONFIG_PATH, 'r') as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
|
@cache
|
||||||
def get_task_info(task_type, task_id):
|
def get_task_info(task_type, task_id):
|
||||||
task_file = os.path.join(EXAMPLES_BASE_PATH, task_type, f"{task_id}.json")
|
task_file = os.path.join(EXAMPLES_BASE_PATH, task_type, f"{task_id}.json")
|
||||||
if os.path.exists(task_file):
|
if os.path.exists(task_file):
|
||||||
@@ -183,8 +225,8 @@ def get_task_status_brief(task_type, task_id):
|
|||||||
# Check if the status is already cached
|
# Check if the status is already cached
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
last_cache_time = None
|
last_cache_time = None
|
||||||
if cache_key in TASK_STATUS_CACHE:
|
if cache_key in TASK_STATUS_CACHE[RESULTS_PATH]:
|
||||||
cached_status, cached_time = TASK_STATUS_CACHE[cache_key]
|
cached_status, cached_time = TASK_STATUS_CACHE[RESULTS_PATH][cache_key]
|
||||||
last_cache_time = cached_time
|
last_cache_time = cached_time
|
||||||
# If cached status is "Done", check if it's within the stability period
|
# If cached status is "Done", check if it's within the stability period
|
||||||
if cached_status["status"].startswith("Done"):
|
if cached_status["status"].startswith("Done"):
|
||||||
@@ -312,7 +354,7 @@ def get_task_status_brief(task_type, task_id):
|
|||||||
# Cache the status if it is done or error
|
# Cache the status if it is done or error
|
||||||
if status.startswith("Done") or status == "Error":
|
if status.startswith("Done") or status == "Error":
|
||||||
current_time = last_cache_time if last_cache_time else current_time
|
current_time = last_cache_time if last_cache_time else current_time
|
||||||
TASK_STATUS_CACHE[cache_key] = (status_dict, current_time)
|
TASK_STATUS_CACHE[RESULTS_PATH][cache_key] = (status_dict, current_time)
|
||||||
|
|
||||||
return status_dict
|
return status_dict
|
||||||
|
|
||||||
@@ -434,6 +476,90 @@ def api_task_detail(task_type, task_id):
|
|||||||
"status": task_status
|
"status": task_status
|
||||||
})
|
})
|
||||||
|
|
||||||
|
@app.route('/api/config')
|
||||||
|
def api_config():
|
||||||
|
"""Get configuration information from environment variables - deprecated, use /api/current-config instead"""
|
||||||
|
config_info = {
|
||||||
|
"task_config_path": TASK_CONFIG_PATH,
|
||||||
|
"results_base_path": RESULTS_BASE_PATH,
|
||||||
|
"action_space": ACTION_SPACE,
|
||||||
|
"observation_type": OBSERVATION_TYPE,
|
||||||
|
"model_name": MODEL_NAME,
|
||||||
|
"max_steps": MAX_STEPS,
|
||||||
|
"examples_base_path": EXAMPLES_BASE_PATH
|
||||||
|
}
|
||||||
|
return jsonify(config_info)
|
||||||
|
|
||||||
|
@app.route('/api/available-configs')
|
||||||
|
def api_available_configs():
|
||||||
|
"""Get all available configuration combinations by scanning the results directory"""
|
||||||
|
configs = []
|
||||||
|
|
||||||
|
if os.path.exists(RESULTS_BASE_PATH):
|
||||||
|
try:
|
||||||
|
# Scan action spaces
|
||||||
|
for action_space in os.listdir(RESULTS_BASE_PATH):
|
||||||
|
action_space_path = os.path.join(RESULTS_BASE_PATH, action_space)
|
||||||
|
if os.path.isdir(action_space_path):
|
||||||
|
# Scan observation types
|
||||||
|
for obs_type in os.listdir(action_space_path):
|
||||||
|
obs_path = os.path.join(action_space_path, obs_type)
|
||||||
|
if os.path.isdir(obs_path):
|
||||||
|
# Scan model names
|
||||||
|
for model_name in os.listdir(obs_path):
|
||||||
|
model_path = os.path.join(obs_path, model_name)
|
||||||
|
if os.path.isdir(model_path):
|
||||||
|
configs.append({
|
||||||
|
"action_space": action_space,
|
||||||
|
"observation_type": obs_type,
|
||||||
|
"model_name": model_name,
|
||||||
|
"path": model_path
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error scanning results directory: {e}")
|
||||||
|
|
||||||
|
return jsonify(configs)
|
||||||
|
|
||||||
|
@app.route('/api/current-config')
|
||||||
|
def api_current_config():
|
||||||
|
"""Get current configuration"""
|
||||||
|
return jsonify({
|
||||||
|
"action_space": ACTION_SPACE,
|
||||||
|
"observation_type": OBSERVATION_TYPE,
|
||||||
|
"model_name": MODEL_NAME,
|
||||||
|
"max_steps": MAX_STEPS,
|
||||||
|
"results_path": RESULTS_PATH
|
||||||
|
})
|
||||||
|
|
||||||
|
@app.route('/api/set-config', methods=['POST'])
|
||||||
|
def api_set_config():
|
||||||
|
"""Set current configuration"""
|
||||||
|
global ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME, RESULTS_PATH
|
||||||
|
|
||||||
|
data = request.get_json()
|
||||||
|
if not data:
|
||||||
|
return jsonify({"error": "No data provided"}), 400
|
||||||
|
|
||||||
|
# Update global variables
|
||||||
|
ACTION_SPACE = data.get('action_space', ACTION_SPACE)
|
||||||
|
OBSERVATION_TYPE = data.get('observation_type', OBSERVATION_TYPE)
|
||||||
|
MODEL_NAME = data.get('model_name', MODEL_NAME)
|
||||||
|
|
||||||
|
# Update results path
|
||||||
|
RESULTS_PATH = os.path.join(RESULTS_BASE_PATH, ACTION_SPACE, OBSERVATION_TYPE, MODEL_NAME)
|
||||||
|
|
||||||
|
if RESULTS_PATH not in TASK_STATUS_CACHE:
|
||||||
|
# Initialize cache for this results path
|
||||||
|
TASK_STATUS_CACHE[RESULTS_PATH] = {}
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"action_space": ACTION_SPACE,
|
||||||
|
"observation_type": OBSERVATION_TYPE,
|
||||||
|
"model_name": MODEL_NAME,
|
||||||
|
"max_steps": MAX_STEPS,
|
||||||
|
"results_path": RESULTS_PATH
|
||||||
|
})
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# Check if necessary directories exist
|
# Check if necessary directories exist
|
||||||
if not os.path.exists(TASK_CONFIG_PATH):
|
if not os.path.exists(TASK_CONFIG_PATH):
|
||||||
@@ -447,4 +573,4 @@ if __name__ == '__main__':
|
|||||||
port = 8080
|
port = 8080
|
||||||
debug = os.getenv("FLASK_DEBUG", "false").lower() == "true"
|
debug = os.getenv("FLASK_DEBUG", "false").lower() == "true"
|
||||||
|
|
||||||
app.run(host=host, port=port, debug=debug)
|
app.run(host=host, port=port, debug=debug, threaded=True)
|
||||||
@@ -1,5 +1,63 @@
|
|||||||
/* filepath: /home/adlsdztony/codes/OSWorld/monitor/static/index.css */
|
/* filepath: /home/adlsdztony/codes/OSWorld/monitor/static/index.css */
|
||||||
body { font-family: 'Segoe UI', Arial, sans-serif; margin: 0; padding: 0; background: linear-gradient(135deg, #f4f6fa 0%, #e9f0f9 100%); }
|
body { font-family: 'Segoe UI', Arial, sans-serif; margin: 0; padding: 0; background: linear-gradient(135deg, #f4f6fa 0%, #e9f0f9 100%); }
|
||||||
|
|
||||||
|
.layout-container {
|
||||||
|
position: relative;
|
||||||
|
max-width: 1200px;
|
||||||
|
margin: 20px auto;
|
||||||
|
padding: 0 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
background: #fff;
|
||||||
|
border-radius: 14px;
|
||||||
|
box-shadow: 0 8px 32px rgba(0,0,0,0.1);
|
||||||
|
padding: 36px 44px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Floating Config Sidebar */
|
||||||
|
.config-sidebar {
|
||||||
|
position: fixed;
|
||||||
|
top: 20px;
|
||||||
|
left: -280px;
|
||||||
|
width: 300px;
|
||||||
|
height: calc(100vh - 40px);
|
||||||
|
z-index: 1000;
|
||||||
|
transition: left 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-sidebar:hover {
|
||||||
|
left: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-toggle-btn {
|
||||||
|
position: absolute;
|
||||||
|
right: -50px;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
width: 50px;
|
||||||
|
height: 50px;
|
||||||
|
background: linear-gradient(135deg, #007bff, #0056b3);
|
||||||
|
border-radius: 0 25px 25px 0;
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
color: white;
|
||||||
|
font-size: 1.2em;
|
||||||
|
cursor: pointer;
|
||||||
|
box-shadow: 2px 0 10px rgba(0,0,0,0.2);
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-toggle-btn:hover {
|
||||||
|
background: linear-gradient(135deg, #0056b3, #004085);
|
||||||
|
transform: translateY(-50%) scale(1.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-sidebar:hover .config-toggle-btn {
|
||||||
|
opacity: 0.8;
|
||||||
|
}
|
||||||
|
|
||||||
.main-container { max-width: 1100px; margin: 40px auto; background: #fff; border-radius: 14px; box-shadow: 0 8px 32px rgba(0,0,0,0.1); padding: 36px 44px; }
|
.main-container { max-width: 1100px; margin: 40px auto; background: #fff; border-radius: 14px; box-shadow: 0 8px 32px rgba(0,0,0,0.1); padding: 36px 44px; }
|
||||||
h1 { font-size: 2.5em; margin-bottom: 24px; color: #1a237e; text-align: center; position: relative; }
|
h1 { font-size: 2.5em; margin-bottom: 24px; color: #1a237e; text-align: center; position: relative; }
|
||||||
h1:after { content: ''; display: block; width: 80px; height: 4px; background: linear-gradient(90deg, #007bff, #00c6ff); margin: 12px auto 0; border-radius: 2px; }
|
h1:after { content: ''; display: block; width: 80px; height: 4px; background: linear-gradient(90deg, #007bff, #00c6ff); margin: 12px auto 0; border-radius: 2px; }
|
||||||
@@ -125,6 +183,18 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
|
|||||||
text-shadow: 0 1px 2px rgba(0,0,0,0.05);
|
text-shadow: 0 1px 2px rgba(0,0,0,0.05);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.accuracy-percentage {
|
||||||
|
font-size: 0.7em;
|
||||||
|
font-weight: 600;
|
||||||
|
color: #ffffff;
|
||||||
|
margin-left: 8px;
|
||||||
|
background: rgba(255, 255, 255, 0.1);
|
||||||
|
padding: 4px 8px;
|
||||||
|
border-radius: 12px;
|
||||||
|
display: inline-block;
|
||||||
|
vertical-align: middle;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
.stat-card span {
|
.stat-card span {
|
||||||
font-size: 2em;
|
font-size: 2em;
|
||||||
@@ -197,8 +267,9 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
|
|||||||
|
|
||||||
.task-type-stats {
|
.task-type-stats {
|
||||||
display: flex;
|
display: flex;
|
||||||
gap: 16px;
|
|
||||||
flex-wrap: wrap;
|
flex-wrap: wrap;
|
||||||
|
gap: 8px;
|
||||||
|
align-items: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
.task-stat {
|
.task-stat {
|
||||||
@@ -228,6 +299,22 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
|
|||||||
color: #b71c1c;
|
color: #b71c1c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Task type statistics styles */
|
||||||
|
.task-stat.score {
|
||||||
|
color: #ffc107;
|
||||||
|
background: rgba(255, 193, 7, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.task-stat.steps {
|
||||||
|
color: #17a2b8;
|
||||||
|
background: rgba(23, 162, 184, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.task-stat.rate {
|
||||||
|
color: #28a745;
|
||||||
|
background: rgba(40, 167, 69, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
.tasks-container {
|
.tasks-container {
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
transition: all 0.4s cubic-bezier(.4,0,.2,1);
|
transition: all 0.4s cubic-bezier(.4,0,.2,1);
|
||||||
@@ -427,3 +514,174 @@ h2 { color: #0056b3; margin-top: 32px; font-size: 1.6em; }
|
|||||||
background: #a5c7e5;
|
background: #a5c7e5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Configuration Panel Styles */
|
||||||
|
.config-panel {
|
||||||
|
background: #fff;
|
||||||
|
border-radius: 0 14px 14px 0;
|
||||||
|
box-shadow: 0 8px 32px rgba(0,0,0,0.15);
|
||||||
|
overflow: hidden;
|
||||||
|
height: 100%;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
padding: 16px 20px;
|
||||||
|
background: linear-gradient(135deg, #6c757d, #495057);
|
||||||
|
color: white;
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-header i {
|
||||||
|
margin-right: 10px;
|
||||||
|
font-size: 1.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-header span {
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 1.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-content {
|
||||||
|
padding: 20px;
|
||||||
|
flex: 1;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-selector {
|
||||||
|
margin-bottom: 20px;
|
||||||
|
padding-bottom: 15px;
|
||||||
|
border-bottom: 1px solid #dee2e6;
|
||||||
|
}
|
||||||
|
|
||||||
|
.selector-item {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.selector-item label {
|
||||||
|
font-weight: 600;
|
||||||
|
color: #495057;
|
||||||
|
font-size: 0.9em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.selector-item select {
|
||||||
|
padding: 8px 12px;
|
||||||
|
border: 2px solid #e9ecef;
|
||||||
|
border-radius: 6px;
|
||||||
|
background: white;
|
||||||
|
font-size: 0.9em;
|
||||||
|
color: #495057;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.selector-item select:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: #007bff;
|
||||||
|
box-shadow: 0 0 0 3px rgba(0,123,255,0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.selector-item select:hover {
|
||||||
|
border-color: #007bff;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-list {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-item {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
background: #f8f9fa;
|
||||||
|
padding: 12px;
|
||||||
|
border-radius: 8px;
|
||||||
|
border-left: 4px solid #007bff;
|
||||||
|
transition: all 0.3s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-item:hover {
|
||||||
|
transform: translateX(3px);
|
||||||
|
box-shadow: 0 4px 12px rgba(0,123,255,0.15);
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-label {
|
||||||
|
font-weight: 600;
|
||||||
|
color: #495057;
|
||||||
|
margin-bottom: 5px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
text-transform: uppercase;
|
||||||
|
color: #495057;
|
||||||
|
font-size: 0.85em;
|
||||||
|
margin-bottom: 6px;
|
||||||
|
text-transform: uppercase;
|
||||||
|
letter-spacing: 0.5px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-value {
|
||||||
|
color: #007bff;
|
||||||
|
font-family: 'Courier New', monospace;
|
||||||
|
font-size: 0.9em;
|
||||||
|
font-weight: 600;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-path {
|
||||||
|
font-size: 0.8em;
|
||||||
|
line-height: 1.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive design for sidebar layout */
|
||||||
|
@media (max-width: 1024px) {
|
||||||
|
.config-sidebar {
|
||||||
|
left: -250px;
|
||||||
|
width: 250px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-toggle-btn {
|
||||||
|
right: -40px;
|
||||||
|
width: 40px;
|
||||||
|
height: 40px;
|
||||||
|
font-size: 1em;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.layout-container {
|
||||||
|
padding: 0 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.main-content {
|
||||||
|
padding: 20px 25px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-sidebar {
|
||||||
|
left: -220px;
|
||||||
|
width: 220px;
|
||||||
|
height: calc(100vh - 20px);
|
||||||
|
top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-toggle-btn {
|
||||||
|
right: -35px;
|
||||||
|
width: 35px;
|
||||||
|
height: 35px;
|
||||||
|
font-size: 0.9em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-content {
|
||||||
|
padding: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.config-item {
|
||||||
|
padding: 10px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
document.addEventListener('DOMContentLoaded', () => {
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
fetchTasks();
|
fetchAvailableConfigs().then(() => {
|
||||||
|
fetchConfig();
|
||||||
|
fetchTasks();
|
||||||
|
});
|
||||||
// Bind filter functionality
|
// Bind filter functionality
|
||||||
document.getElementById('total-tasks').parentElement.addEventListener('click', () => setTaskFilter('all'));
|
document.getElementById('total-tasks').parentElement.addEventListener('click', () => setTaskFilter('all'));
|
||||||
document.getElementById('active-tasks').parentElement.addEventListener('click', () => setTaskFilter('active'));
|
document.getElementById('active-tasks').parentElement.addEventListener('click', () => setTaskFilter('active'));
|
||||||
@@ -9,6 +12,9 @@ document.addEventListener('DOMContentLoaded', () => {
|
|||||||
|
|
||||||
let allTaskData = null;
|
let allTaskData = null;
|
||||||
let currentFilter = 'all';
|
let currentFilter = 'all';
|
||||||
|
let availableConfigs = [];
|
||||||
|
let currentConfig = null;
|
||||||
|
let categoryStats = {};
|
||||||
|
|
||||||
function refreshPage() {
|
function refreshPage() {
|
||||||
// Save expanded state before refresh
|
// Save expanded state before refresh
|
||||||
@@ -31,8 +37,8 @@ function fetchTasksForRefresh() {
|
|||||||
fetch('/api/tasks/brief')
|
fetch('/api/tasks/brief')
|
||||||
.then(response => response.json())
|
.then(response => response.json())
|
||||||
.then(data => {
|
.then(data => {
|
||||||
// Update stored data
|
|
||||||
allTaskData = data;
|
allTaskData = data;
|
||||||
|
categoryStats = calculateCategoryStats(data);
|
||||||
// Only update statistics and task status, do not fully re-render
|
// Only update statistics and task status, do not fully re-render
|
||||||
updateStatistics(data);
|
updateStatistics(data);
|
||||||
updateTaskStatus(data);
|
updateTaskStatus(data);
|
||||||
@@ -148,6 +154,7 @@ function fetchTasks() {
|
|||||||
.then(response => response.json())
|
.then(response => response.json())
|
||||||
.then(data => {
|
.then(data => {
|
||||||
allTaskData = data;
|
allTaskData = data;
|
||||||
|
categoryStats = calculateCategoryStats(data);
|
||||||
renderTasks(data);
|
renderTasks(data);
|
||||||
updateStatistics(data);
|
updateStatistics(data);
|
||||||
})
|
})
|
||||||
@@ -208,13 +215,15 @@ function updateStatistics(data) {
|
|||||||
document.getElementById('completed-tasks').textContent = completedTasks;
|
document.getElementById('completed-tasks').textContent = completedTasks;
|
||||||
document.getElementById('error-tasks').textContent = errorTasks;
|
document.getElementById('error-tasks').textContent = errorTasks;
|
||||||
|
|
||||||
// Update score display with formatted score
|
// Update score display with formatted score and accuracy percentage
|
||||||
const scoreDisplay = document.getElementById('score-display');
|
const scoreDisplay = document.getElementById('score-display');
|
||||||
if (completedTasks > 0) {
|
if (completedTasks > 0) {
|
||||||
const scoreFormatted = totalScore.toFixed(2);
|
const scoreFormatted = totalScore.toFixed(2);
|
||||||
scoreDisplay.innerHTML = `<span>${scoreFormatted}</span> / <span>${completedTasks}</span>`;
|
const averageScore = totalScore / completedTasks;
|
||||||
|
const accuracyPercentage = (averageScore * 100).toFixed(1);
|
||||||
|
scoreDisplay.innerHTML = `<span>${scoreFormatted}</span> / <span>${completedTasks}</span> <span class="accuracy-percentage">(${accuracyPercentage}%)</span>`;
|
||||||
} else {
|
} else {
|
||||||
scoreDisplay.innerHTML = '<span>0.00</span> / <span>0</span>';
|
scoreDisplay.innerHTML = '<span>0.00</span> / <span>0</span> <span class="accuracy-percentage">(0.0%)</span>';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Highlight the currently selected statistics card
|
// Highlight the currently selected statistics card
|
||||||
@@ -279,6 +288,10 @@ function renderTasks(data) {
|
|||||||
// Create header with task type name and statistics
|
// Create header with task type name and statistics
|
||||||
const typeHeader = document.createElement('div');
|
const typeHeader = document.createElement('div');
|
||||||
typeHeader.className = 'task-type-header';
|
typeHeader.className = 'task-type-header';
|
||||||
|
|
||||||
|
// Get category stats for this task type
|
||||||
|
const stats = categoryStats[taskType] || {};
|
||||||
|
|
||||||
typeHeader.innerHTML = `
|
typeHeader.innerHTML = `
|
||||||
<span class="task-type-name"><i class="fas fa-layer-group"></i> ${taskType}</span>
|
<span class="task-type-name"><i class="fas fa-layer-group"></i> ${taskType}</span>
|
||||||
<div class="task-type-stats">
|
<div class="task-type-stats">
|
||||||
@@ -286,6 +299,9 @@ function renderTasks(data) {
|
|||||||
<span class="task-stat"><i class="fas fa-tasks"></i> ${tasks.length} total</span>
|
<span class="task-stat"><i class="fas fa-tasks"></i> ${tasks.length} total</span>
|
||||||
<span class="task-stat running"><i class="fas fa-running"></i> ${runningCount} active</span>
|
<span class="task-stat running"><i class="fas fa-running"></i> ${runningCount} active</span>
|
||||||
<span class="task-stat completed"><i class="fas fa-check-circle"></i> ${completedCount} completed</span>
|
<span class="task-stat completed"><i class="fas fa-check-circle"></i> ${completedCount} completed</span>
|
||||||
|
${stats.avg_score ? `<span class="task-stat score"><i class="fas fa-star"></i> ${stats.avg_score} avg score</span>` : ''}
|
||||||
|
${stats.avg_steps ? `<span class="task-stat steps"><i class="fas fa-chart-line"></i> ${stats.avg_steps} avg steps</span>` : ''}
|
||||||
|
${stats.completion_rate ? `<span class="task-stat rate"><i class="fas fa-percentage"></i> ${stats.completion_rate}% completed</span>` : ''}
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
typeSection.appendChild(typeHeader);
|
typeSection.appendChild(typeHeader);
|
||||||
@@ -453,7 +469,181 @@ function renderTasks(data) {
|
|||||||
container.appendChild(typeSection);
|
container.appendChild(typeSection);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
// add auto-refresh with time interval 10 seconds
|
|
||||||
setInterval(() => {
|
function fetchAvailableConfigs() {
|
||||||
refreshPage();
|
return fetch('/api/available-configs')
|
||||||
}, 10000); // 10 seconds interval
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
availableConfigs = data;
|
||||||
|
populateConfigSelect();
|
||||||
|
return data;
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error fetching available configs:', error);
|
||||||
|
return [];
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function populateConfigSelect() {
|
||||||
|
const select = document.getElementById('config-select');
|
||||||
|
select.innerHTML = '';
|
||||||
|
|
||||||
|
if (availableConfigs.length === 0) {
|
||||||
|
select.innerHTML = '<option value="">No configurations found in results directory</option>';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add available configurations
|
||||||
|
availableConfigs.forEach((config, index) => {
|
||||||
|
const option = document.createElement('option');
|
||||||
|
option.value = index;
|
||||||
|
option.textContent = `${config.action_space} / ${config.observation_type} / ${config.model_name}`;
|
||||||
|
select.appendChild(option);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function changeConfiguration() {
|
||||||
|
const select = document.getElementById('config-select');
|
||||||
|
const selectedIndex = select.value;
|
||||||
|
|
||||||
|
if (selectedIndex === '' || selectedIndex < 0 || selectedIndex >= availableConfigs.length) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedConfig = availableConfigs[selectedIndex];
|
||||||
|
|
||||||
|
// Send configuration change request
|
||||||
|
fetch('/api/set-config', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(selectedConfig)
|
||||||
|
})
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
currentConfig = data;
|
||||||
|
displayConfig(data);
|
||||||
|
// Refresh tasks with new configuration
|
||||||
|
fetchTasks();
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error setting config:', error);
|
||||||
|
displayConfigError();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function fetchConfig() {
|
||||||
|
return fetch('/api/current-config')
|
||||||
|
.then(response => response.json())
|
||||||
|
.then(data => {
|
||||||
|
currentConfig = data;
|
||||||
|
displayConfig(data);
|
||||||
|
updateConfigSelect();
|
||||||
|
return data;
|
||||||
|
})
|
||||||
|
.catch(error => {
|
||||||
|
console.error('Error fetching config:', error);
|
||||||
|
displayConfigError();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateConfigSelect() {
|
||||||
|
if (!currentConfig || availableConfigs.length === 0) return;
|
||||||
|
|
||||||
|
const select = document.getElementById('config-select');
|
||||||
|
const currentConfigIndex = availableConfigs.findIndex(config =>
|
||||||
|
config.action_space === currentConfig.action_space &&
|
||||||
|
config.observation_type === currentConfig.observation_type &&
|
||||||
|
config.model_name === currentConfig.model_name
|
||||||
|
);
|
||||||
|
|
||||||
|
if (currentConfigIndex !== -1) {
|
||||||
|
select.value = currentConfigIndex;
|
||||||
|
} else {
|
||||||
|
// Current config not found in available configs, select the first one if available
|
||||||
|
if (availableConfigs.length > 0) {
|
||||||
|
select.value = 0;
|
||||||
|
console.warn('Current config not found in available configs, defaulting to first available config');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function displayConfig(config) {
|
||||||
|
document.getElementById('action-space').textContent = config.action_space || 'N/A';
|
||||||
|
document.getElementById('observation-type').textContent = config.observation_type || 'N/A';
|
||||||
|
document.getElementById('model-name').textContent = config.model_name || 'N/A';
|
||||||
|
document.getElementById('max-steps').textContent = config.max_steps || 'N/A';
|
||||||
|
}
|
||||||
|
|
||||||
|
function displayConfigError() {
|
||||||
|
const configValues = document.querySelectorAll('.config-value');
|
||||||
|
configValues.forEach(element => {
|
||||||
|
element.textContent = 'Error loading';
|
||||||
|
element.style.color = '#dc3545';
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
function calculateCategoryStats(data) {
|
||||||
|
const stats = {};
|
||||||
|
|
||||||
|
Object.entries(data).forEach(([taskType, tasks]) => {
|
||||||
|
let totalTasks = tasks.length;
|
||||||
|
let completedTasks = 0;
|
||||||
|
let runningTasks = 0;
|
||||||
|
let errorTasks = 0;
|
||||||
|
let totalScore = 0;
|
||||||
|
let totalSteps = 0;
|
||||||
|
let completedWithSteps = 0;
|
||||||
|
|
||||||
|
tasks.forEach(task => {
|
||||||
|
const status = task.status.status;
|
||||||
|
|
||||||
|
if (['Done', 'Done (Message Exit)', 'Done (Max Steps)', 'Done (Thought Exit)'].includes(status)) {
|
||||||
|
completedTasks++;
|
||||||
|
|
||||||
|
// Calculate score if available
|
||||||
|
if (task.status.result) {
|
||||||
|
try {
|
||||||
|
const score = parseFloat(task.status.result);
|
||||||
|
if (!isNaN(score) && score >= 0 && score <= 1) {
|
||||||
|
totalScore += score;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Ignore parsing errors
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate steps for completed tasks
|
||||||
|
if (task.status.progress && task.status.progress > 0) {
|
||||||
|
totalSteps += task.status.progress;
|
||||||
|
completedWithSteps++;
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (['Running', 'Preparing', 'Initializing'].includes(status)) {
|
||||||
|
runningTasks++;
|
||||||
|
|
||||||
|
} else if (status === 'Error') {
|
||||||
|
errorTasks++;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Calculate averages
|
||||||
|
const avgScore = completedTasks > 0 ? totalScore / completedTasks : 0;
|
||||||
|
const avgSteps = completedWithSteps > 0 ? totalSteps / completedWithSteps : 0;
|
||||||
|
const completionRate = totalTasks > 0 ? (completedTasks / totalTasks * 100) : 0;
|
||||||
|
|
||||||
|
stats[taskType] = {
|
||||||
|
total_tasks: totalTasks,
|
||||||
|
completed_tasks: completedTasks,
|
||||||
|
running_tasks: runningTasks,
|
||||||
|
error_tasks: errorTasks,
|
||||||
|
total_score: Math.round(totalScore * 100) / 100,
|
||||||
|
avg_score: Math.round(avgScore * 10000) / 10000,
|
||||||
|
avg_steps: Math.round(avgSteps * 10) / 10,
|
||||||
|
completion_rate: Math.round(completionRate * 10) / 10
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
|||||||
@@ -12,19 +12,62 @@
|
|||||||
<link rel="stylesheet" href="/static/index.css">
|
<link rel="stylesheet" href="/static/index.css">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div class="main-container">
|
<div class="layout-container">
|
||||||
<h1>OSWorld Monitor <span class="system-status online">System Online</span></h1>
|
<!-- Floating Config Button and Sidebar -->
|
||||||
|
<div class="config-sidebar" id="config-sidebar">
|
||||||
<!-- Score Display Banner -->
|
<div class="config-toggle-btn">
|
||||||
<div class="score-banner">
|
<i class="fas fa-cogs"></i>
|
||||||
<div class="score-content">
|
</div>
|
||||||
<i class="fas fa-star"></i>
|
<div class="config-panel">
|
||||||
<span class="score-label">Score:</span>
|
<div class="config-header">
|
||||||
<span id="score-display" class="score-value">Loading...</span>
|
<i class="fas fa-cogs"></i>
|
||||||
|
<span>Configuration</span>
|
||||||
|
</div>
|
||||||
|
<div class="config-content">
|
||||||
|
<div class="config-selector">
|
||||||
|
<div class="selector-item">
|
||||||
|
<label for="config-select">Select Configuration:</label>
|
||||||
|
<select id="config-select" onchange="changeConfiguration()">
|
||||||
|
<option value="">Loading configurations...</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="config-list">
|
||||||
|
<div class="config-item">
|
||||||
|
<span class="config-label">Action Space:</span>
|
||||||
|
<span class="config-value" id="action-space">Loading...</span>
|
||||||
|
</div>
|
||||||
|
<div class="config-item">
|
||||||
|
<span class="config-label">Observation:</span>
|
||||||
|
<span class="config-value" id="observation-type">Loading...</span>
|
||||||
|
</div>
|
||||||
|
<div class="config-item">
|
||||||
|
<span class="config-label">Model:</span>
|
||||||
|
<span class="config-value" id="model-name">Loading...</span>
|
||||||
|
</div>
|
||||||
|
<div class="config-item">
|
||||||
|
<span class="config-label">Max Steps:</span>
|
||||||
|
<span class="config-value" id="max-steps">Loading...</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="dashboard-stats">
|
<!-- Main Content -->
|
||||||
|
<div class="main-content">
|
||||||
|
<h1>OSWorld Monitor <span class="system-status online">System Online</span></h1>
|
||||||
|
|
||||||
|
<!-- Score Display Banner -->
|
||||||
|
<div class="score-banner">
|
||||||
|
<div class="score-content">
|
||||||
|
<i class="fas fa-star"></i>
|
||||||
|
<span class="score-label">Score:</span>
|
||||||
|
<span id="score-display" class="score-value">Loading...</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="dashboard-stats">
|
||||||
<div class="stat-card">
|
<div class="stat-card">
|
||||||
<i class="fas fa-running"></i>
|
<i class="fas fa-running"></i>
|
||||||
<span id="active-tasks">Loading...</span>
|
<span id="active-tasks">Loading...</span>
|
||||||
@@ -46,10 +89,11 @@
|
|||||||
<div class="stat-label">Total Tasks</div>
|
<div class="stat-label">Total Tasks</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div id="task-container">
|
<div id="task-container">
|
||||||
<div class="loading-spinner">
|
<div class="loading-spinner">
|
||||||
<div class="spinner"></div>
|
<div class="spinner"></div>
|
||||||
<div>Loading task data...</div>
|
<div>Loading task data...</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
Reference in New Issue
Block a user