From cc21c3a6b195b7c9447397fb3b93574866ab4035 Mon Sep 17 00:00:00 2001 From: Timothyxxx <384084775@qq.com> Date: Sun, 28 Jan 2024 21:19:18 +0800 Subject: [PATCH] Fix some errors found in calc examples --- desktop_env/evaluators/metrics/__init__.py | 2 +- desktop_env/evaluators/metrics/pdf.py | 2 + desktop_env/evaluators/metrics/table.py | 224 ++++++++++-------- .../4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json | 5 +- .../6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json | 2 +- .../a01fbce3-2793-461f-ab86-43680ccbae25.json | 2 +- experiment_a11y_tree.py | 27 +-- experiment_screenshot.py | 34 ++- mm_agents/gpt_4v_agent.py | 13 + 9 files changed, 186 insertions(+), 125 deletions(-) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index f368699..82b889e 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -19,7 +19,7 @@ from .slides import check_presenter_console_disable, check_image_stretch_and_cen compare_pptx_files, check_strikethrough, \ check_slide_orientation_Portrait, evaluate_presentation_fill_to_rgb_distance, check_left_panel # from .table import check_sheet_list, check_xlsx_freeze, check_xlsx_zoom, check_data_validations -from .table import compare_table +from .table import compare_table, compare_csv from .thunderbird import check_thunderbird_prefs, check_thunderbird_filter from .vlc import is_vlc_playing, is_vlc_recordings_folder, is_vlc_fullscreen, compare_images, compare_audios, \ diff --git a/desktop_env/evaluators/metrics/pdf.py b/desktop_env/evaluators/metrics/pdf.py index d607733..d6644d1 100644 --- a/desktop_env/evaluators/metrics/pdf.py +++ b/desktop_env/evaluators/metrics/pdf.py @@ -6,6 +6,8 @@ from pypdf import PdfReader def check_pdf_pages(pdf_file: str, rules: Dict[str, Any]) -> float: + if pdf_file is None: + return 0.0 reader = PdfReader(pdf_file) nb_pages: int = len(reader.pages) return float(getattr(operator, rules["relation"])(nb_pages, rules["ref_value"])) diff --git a/desktop_env/evaluators/metrics/table.py b/desktop_env/evaluators/metrics/table.py index d7e4faf..4e8bac6 100644 --- a/desktop_env/evaluators/metrics/table.py +++ b/desktop_env/evaluators/metrics/table.py @@ -1,32 +1,35 @@ +import functools +import itertools import logging -#import operator +import os.path +# import operator from numbers import Number from typing import Any, Union, cast, Callable, Iterable from typing import Dict, List, Tuple -import os.path -import itertools -import functools import openpyxl import pandas as pd from openpyxl import Workbook -from openpyxl.worksheet.worksheet import Worksheet -#from openpyxl.worksheet.cell_range import MultiCellRange -from openpyxl.worksheet.datavalidation import DataValidation from openpyxl.cell.cell import Cell -#from openpyxl.utils import coordinate_to_tuple +# from openpyxl.worksheet.cell_range import MultiCellRange +from openpyxl.worksheet.datavalidation import DataValidation +from openpyxl.worksheet.worksheet import Worksheet -from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles from .utils import _match_value_to_rule, _read_cell_style, read_cell_value +from .utils import load_charts, load_sparklines, load_rows_or_cols, load_xlsx_styles + +# from openpyxl.utils import coordinate_to_tuple logger = logging.getLogger("desktopenv.metric.table") BOOK = Union[pd.ExcelFile, Workbook, str] -def _parse_sheet_idx( sheet_idx: Union[int, str] - , result: BOOK, expected: BOOK - , result_sheet_names: List[str] - , expected_sheet_names: List[str] - ) -> Tuple[BOOK, str]: + + +def _parse_sheet_idx(sheet_idx: Union[int, str] + , result: BOOK, expected: BOOK + , result_sheet_names: List[str] + , expected_sheet_names: List[str] + ) -> Tuple[BOOK, str]: # function _parse_sheet_idx {{{ # if isinstance(sheet_idx, int): index: str = result_sheet_names[sheet_idx] @@ -49,7 +52,10 @@ def _parse_sheet_idx( sheet_idx: Union[int, str] return book, index # }}} function _parse_sheet_idx # + SHEET = Union[pd.DataFrame, Worksheet, List[str]] + + def _load_sheet(book: BOOK, index: str) -> SHEET: # function _load_sheet {{{ # if isinstance(book, str): @@ -57,12 +63,12 @@ def _load_sheet(book: BOOK, index: str) -> SHEET: csv_name: str = "{:}-{:}.csv".format(os.path.splitext(book)[0], index) with open(csv_name) as f: - csv_lines: List[str] = list( itertools.dropwhile( lambda l: len(l)==0 - , map( lambda l: l.strip() - , reversed(f.read().splitlines()) - ) + csv_lines: List[str] = list(itertools.dropwhile(lambda l: len(l) == 0 + , map(lambda l: l.strip() + , reversed(f.read().splitlines()) + ) ) - ) + ) return csv_lines if isinstance(book, pd.ExcelFile): return pd.read_excel(book, index) @@ -72,7 +78,8 @@ def _load_sheet(book: BOOK, index: str) -> SHEET: raise NotImplementedError("Not supported workbook format") # }}} function _load_sheet # -def compare_table(result: str, expected: str, **options) -> float: + +def compare_table(result: str, expected: str = None, **options) -> float: # function compare_table {{{ # """ Args: @@ -99,21 +106,28 @@ def compare_table(result: str, expected: str, **options) -> float: return 0. worksheetr_names: List[str] = pdworkbookr.sheet_names - xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected) - pdworkbooke = pd.ExcelFile(expected) - worksheete_names: List[str] = pdworkbooke.sheet_names + if expected is not None: - parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] =\ - functools.partial( _parse_sheet_idx - , result_sheet_names=worksheetr_names - , expected_sheet_names=worksheete_names - ) + xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected) + pdworkbooke = pd.ExcelFile(expected) + worksheete_names: List[str] = pdworkbooke.sheet_names + else: + xlworkbooke: Workbook = None + pdworkbooke = None + worksheete_names: List[str] = None + + parse_idx: Callable[[Union[str, int], BOOK, BOOK], BOOK] = \ + functools.partial( + _parse_sheet_idx, + result_sheet_names=worksheetr_names, + expected_sheet_names=worksheete_names + ) passes = True for r in options["rules"]: if r["type"] == "sheet_name": # Compare Sheet Names {{{ # - metric: bool = worksheetr_names==worksheete_names + metric: bool = worksheetr_names == worksheete_names logger.debug("Assertion: %s.sheet_names == %s.sheet_names - %s", result, expected, metric) # }}} Compare Sheet Names # @@ -174,8 +188,8 @@ def compare_table(result: str, expected: str, **options) -> float: styles1: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke), **r) styles2: Dict[str, List[Any]] = load_xlsx_styles(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke), **r) - #number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"] - #number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"] + # number_formats1: List[str] = [c.number_format.lower() for col in sheet1.iter_cols() for c in col if c.value is not None and c.data_type=="n"] + # number_formats2: List[str] = [c.number_format.lower() for col in sheet2.iter_cols() for c in col if c.value is not None and c.data_type=="n"] metric: bool = styles1 == styles2 logger.debug("Assertion: %s.style == %s.style - %s", r["sheet_idx0"], r["sheet_idx1"], metric) # }}} Compare Style (Also Conditional Formatiing) # @@ -188,11 +202,11 @@ def compare_table(result: str, expected: str, **options) -> float: sheet1: Worksheet = _load_sheet(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke)) sheet2: Worksheet = _load_sheet(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke)) metric: bool = sheet1.freeze_panes == sheet2.freeze_panes - logger.debug( "Assertion: %s.freeze(%s) == %s.freeze(%s) - %s" - , r["sheet_idx0"], sheet1.freeze_panes - , r["sheet_idx1"], sheet2.freeze_panes - , metric - ) + logger.debug("Assertion: %s.freeze(%s) == %s.freeze(%s) - %s" + , r["sheet_idx0"], sheet1.freeze_panes + , r["sheet_idx1"], sheet2.freeze_panes + , metric + ) # }}} Compare Freezing # elif r["type"] == "zoom": @@ -204,7 +218,8 @@ def compare_table(result: str, expected: str, **options) -> float: sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke)) zoom_scale: Number = sheet.sheet_view.zoomScale or 100. metric: bool = _match_value_to_rule(zoom_scale, r) - logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"], metric) + logger.debug("Assertion: %s.zoom(%.1f) %s %.1f - %s", r["sheet_idx"], zoom_scale, r["method"], r["ref"], + metric) # }}} Check Zooming # elif r["type"] == "data_validation": @@ -231,15 +246,15 @@ def compare_table(result: str, expected: str, **options) -> float: sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke)) data_validators: List[DataValidation] = sheet.data_validations.dataValidation - total_metric = len(data_validators)>=len(r["dv_props"]) + total_metric = len(data_validators) >= len(r["dv_props"]) for dat_vldt in data_validators: metric = False for prpt in r["dv_props"]: - metric = metric or all( _match_value_to_rule( getattr(dat_vldt, attrbt) + metric = metric or all(_match_value_to_rule(getattr(dat_vldt, attrbt) , mr - )\ - for attrbt, mr in prpt.items() - ) + ) \ + for attrbt, mr in prpt.items() + ) if metric: break total_metric = total_metric and metric @@ -256,14 +271,14 @@ def compare_table(result: str, expected: str, **options) -> float: # sheet_idx1: as sheet_idx0 # props: list of str, see utils.load_rows_or_cols - rows1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke) - , obj="row" - , **r - ) - rows2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke) - , obj="row" - , **r - ) + rows1: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke) + , obj="row" + , **r + ) + rows2: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke) + , obj="row" + , **r + ) logger.debug("Rows1: %s", repr(rows1)) logger.debug("Rows2: %s", repr(rows2)) metric: bool = rows1 == rows2 @@ -276,14 +291,14 @@ def compare_table(result: str, expected: str, **options) -> float: # sheet_idx1: as sheet_idx0 # props: list of str, see utils.load_rows_or_cols - cols1: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke) - , obj="column" - , **r - ) - cols2: Dict[str, Any] = load_rows_or_cols( *parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke) - , obj="column" - , **r - ) + cols1: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx0"], xlworkbookr, xlworkbooke) + , obj="column" + , **r + ) + cols2: Dict[str, Any] = load_rows_or_cols(*parse_idx(r["sheet_idx1"], xlworkbookr, xlworkbooke) + , obj="column" + , **r + ) metric: bool = cols1 == cols2 logger.debug("Assertion: %s[cols] == %s[cols] - %s", r["sheet_idx0"], r["sheet_idx1"], metric) # }}} Check Row Properties # @@ -296,21 +311,21 @@ def compare_table(result: str, expected: str, **options) -> float: # supported attributes: value & those supported by utils._read_cell_style sheet: Worksheet = _load_sheet(*parse_idx(r["sheet_idx"], xlworkbookr, xlworkbooke)) - #data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke)) + # data_frame: pd.DataFrame = _load_sheet(*parse_idx(r["sheet_idx"], pdworkbookr, pdworkbooke)) cell: Cell = sheet[r["coordinate"]] metric: bool = True for prpt, rule in r["props"].items(): - if prpt=="value": + if prpt == "value": val = read_cell_value(*parse_idx(r["sheet_idx"], result, expected), r["coordinate"]) else: val = _read_cell_style(prpt, cell) metric = metric and _match_value_to_rule(val, rule) - logger.debug( "Assertion: %s[%s] :%s - %s" - , r["sheet_idx"], r["coordinate"] - , repr(r["props"]), metric - ) + logger.debug("Assertion: %s[%s] :%s - %s" + , r["sheet_idx"], r["coordinate"] + , repr(r["props"]), metric + ) # }}} Check Cell Properties # else: @@ -323,6 +338,7 @@ def compare_table(result: str, expected: str, **options) -> float: return float(passes) # }}} function compare_table # + def compare_csv(result: str, expected: str, **options) -> float: if result is None: return 0. @@ -338,9 +354,10 @@ def compare_csv(result: str, expected: str, **options) -> float: result_lines = map(str.lower, result_lines) expected_lines = map(str.lower, expected_lines) - metric: bool = list(result_lines)==list(expected_lines) + metric: bool = list(result_lines) == list(expected_lines) return float(metric) + if __name__ == '__main__': import datetime import sys @@ -360,7 +377,8 @@ if __name__ == '__main__': stdout_handler.setLevel(logging.INFO) sdebug_handler.setLevel(logging.DEBUG) - formatter = logging.Formatter(fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") + formatter = logging.Formatter( + fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s") file_handler.setFormatter(formatter) debug_handler.setFormatter(formatter) stdout_handler.setFormatter(formatter) @@ -376,49 +394,49 @@ if __name__ == '__main__': path1 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number.xlsx" path2 = "../../任务数据/LibreOffice Calc/Multiply_Time_Number_gold.xlsx" - rules = [ { "type": "check_cell" - , "sheet_idx": 0 - , "coordinate": "E3" - , "props": { "value": { "method": "approx:0.001" - , "ref": 191.6667 - } - } + rules = [{"type": "check_cell" + , "sheet_idx": 0 + , "coordinate": "E3" + , "props": {"value": {"method": "approx:0.001" + , "ref": 191.6667 + } + } } - ] - print( compare_table( path1, path2 + ] + print(compare_table(path1, path2 , rules=rules ) - ) - print( compare_table( path2, path2 + ) + print(compare_table(path2, path2 , rules=rules ) - ) + ) # Row Properties - #path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx" - #path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx" - #workbook: Workbook = openpyxl.load_workbook(filename=path1) - #worksheet: Worksheet = workbook.active - #for r_no, dms in worksheet.column_dimensions.items(): - #print(r_no, type(r_no), type(dms), dms.hidden) + # path1 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA.xlsx" + # path2 = "../../任务数据/LibreOffice Calc/Date_Budget_Variance_HideNA_gold.xlsx" + # workbook: Workbook = openpyxl.load_workbook(filename=path1) + # worksheet: Worksheet = workbook.active + # for r_no, dms in worksheet.column_dimensions.items(): + # print(r_no, type(r_no), type(dms), dms.hidden) # Conditional Formats - #import formulas - #path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx" - #path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx" - #path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx" - #workbook: Workbook = openpyxl.load_workbook(filename=path2) - #worksheet: Worksheet = workbook.active - #print(worksheet.conditional_formatting) - #for itm in worksheet.conditional_formatting: - #print(itm.cells) - #for r in itm.rules: - #print( r.type, r.formula, r.dxf.font.color.rgb - #, r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb - #) - #condition = formulas.Parser().ast("=" + r.formula[0])[1].compile() - ##print(r.type, r.operator, r.dxfId, r.dxf) - #for r in itm.cells: - #for c in r.cells: - #value = worksheet.cell(row=c[0], column=c[1]).value - #print(value, condition(str(value))) + # import formulas + # path1 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days.xlsx" + # path2 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold.xlsx" + # path3 = "../../任务数据/LibreOffice Calc/Calendar_Highlight_Weekend_Days_gold_test.xlsx" + # workbook: Workbook = openpyxl.load_workbook(filename=path2) + # worksheet: Worksheet = workbook.active + # print(worksheet.conditional_formatting) + # for itm in worksheet.conditional_formatting: + # print(itm.cells) + # for r in itm.rules: + # print( r.type, r.formula, r.dxf.font.color.rgb + # , r.dxf.fill.fgColor.rgb, r.dxf.fill.bgColor.rgb + # ) + # condition = formulas.Parser().ast("=" + r.formula[0])[1].compile() + ##print(r.type, r.operator, r.dxfId, r.dxf) + # for r in itm.cells: + # for c in r.cells: + # value = worksheet.cell(row=c[0], column=c[1]).value + # print(value, condition(str(value))) diff --git a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json index d0c5a47..0e8b6d8 100644 --- a/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json +++ b/evaluation_examples/examples/libreoffice_calc/4f07fbe9-70de-4927-a4d5-bb28bc12c52c.json @@ -92,8 +92,9 @@ ], "dest": [ "Padding_Decimals_In_Formular_gold.xlsx", - "Padding_Decimals_In_Formular_gold-Sheet1.xlsx" - ] + "Padding_Decimals_In_Formular_gold-Sheet1.csv" + ], + "multi": true }, "options": { "rules": [ diff --git a/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json b/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json index 980aa57..c985a2e 100644 --- a/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json +++ b/evaluation_examples/examples/libreoffice_calc/6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5.json @@ -88,7 +88,7 @@ "type": "vm_file", "path": [ "/home/user/Keep_Two_decimal_points.xlsx", - "/home/user/Keep_Two_decimal_points-Sheet1.xlsx" + "/home/user/Keep_Two_decimal_points-Sheet1.csv" ], "dest": [ "Keep_Two_decimal_points.xlsx", diff --git a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json index b19b14a..ce5b308 100644 --- a/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json +++ b/evaluation_examples/examples/libreoffice_calc/a01fbce3-2793-461f-ab86-43680ccbae25.json @@ -7,7 +7,7 @@ { "type": "download", "parameters": { - "file": [ + "files": [ { "url": "https://drive.usercontent.google.com/download?id=1uT0axjo9lwkKu6hYVnsAL2FCrdH0DLUv&export=download&authuser=0&confirm=t&uuid=e7da6304-9c7a-4862-8a30-9f2284b843da&at=APZUnTVNHThpAZJmF6IuPckFvslw:1704187618838", "path": "/home/user/Set_Decimal_Separator_Dot.xlsx" diff --git a/experiment_a11y_tree.py b/experiment_a11y_tree.py index 5a8a5ad..40836da 100644 --- a/experiment_a11y_tree.py +++ b/experiment_a11y_tree.py @@ -186,24 +186,21 @@ if __name__ == '__main__': ] calc_list = [ - # "eb03d19a-b88d-4de4-8a64-ca0ac66f426b", - # "0bf05a7d-b28b-44d2-955a-50b41e24012a", - # "7a4e4bc8-922c-4c84-865c-25ba34136be1", - # "2bd59342-0664-4ccb-ba87-79379096cc08", - # "ecb0df7a-4e8d-4a03-b162-053391d3afaf", - # "7efeb4b1-3d19-4762-b163-63328d66303b", - # "4e6fcf72-daf3-439f-a232-c434ce416af6", - # "6054afcb-5bab-4702-90a0-b259b5d3217c", - # "abed40dc-063f-4598-8ba5-9fe749c0615d", - # "01b269ae-2111-4a07-81fd-3fcd711993b0", - # "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14", - # "0cecd4f3-74de-457b-ba94-29ad6b5dafb6", + "eb03d19a-b88d-4de4-8a64-ca0ac66f426b", + "0bf05a7d-b28b-44d2-955a-50b41e24012a", + "7a4e4bc8-922c-4c84-865c-25ba34136be1", + "2bd59342-0664-4ccb-ba87-79379096cc08", + "ecb0df7a-4e8d-4a03-b162-053391d3afaf", + "7efeb4b1-3d19-4762-b163-63328d66303b", + "4e6fcf72-daf3-439f-a232-c434ce416af6", + "6054afcb-5bab-4702-90a0-b259b5d3217c", + "abed40dc-063f-4598-8ba5-9fe749c0615d", + "01b269ae-2111-4a07-81fd-3fcd711993b0", + "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14", + "0cecd4f3-74de-457b-ba94-29ad6b5dafb6", "4188d3a4-077d-46b7-9c86-23e1a036f6c1", "51b11269-2ca8-4b2a-9163-f21758420e78", "7e429b8d-a3f0-4ed0-9b58-08957d00b127", - "f5a90742-3fa2-40fc-a564-f29b054e0337", - "22df9241-f8d7-4509-b7f1-37e501a823f7", - "1434ca3e-f9e3-4db8-9ca7-b4c653be7d17", "347ef137-7eeb-4c80-a3bb-0951f26a8aff", "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5", "3aaa4e37-dc91-482e-99af-132a612d40f3", diff --git a/experiment_screenshot.py b/experiment_screenshot.py index 28b0f1e..f490d69 100644 --- a/experiment_screenshot.py +++ b/experiment_screenshot.py @@ -167,5 +167,35 @@ if __name__ == '__main__': "480bcfea-d68f-4aaa-a0a9-2589ef319381", "af630914-714e-4a24-a7bb-f9af687d3b91" ] - for example_id in chrome_list: - main("chrome", example_id) + calc_list = [ + "eb03d19a-b88d-4de4-8a64-ca0ac66f426b", + "0bf05a7d-b28b-44d2-955a-50b41e24012a", + "7a4e4bc8-922c-4c84-865c-25ba34136be1", + "2bd59342-0664-4ccb-ba87-79379096cc08", + "ecb0df7a-4e8d-4a03-b162-053391d3afaf", + "7efeb4b1-3d19-4762-b163-63328d66303b", + "4e6fcf72-daf3-439f-a232-c434ce416af6", + "6054afcb-5bab-4702-90a0-b259b5d3217c", + "abed40dc-063f-4598-8ba5-9fe749c0615d", + "01b269ae-2111-4a07-81fd-3fcd711993b0", + "8b1ce5f2-59d2-4dcc-b0b0-666a714b9a14", + "0cecd4f3-74de-457b-ba94-29ad6b5dafb6", + "4188d3a4-077d-46b7-9c86-23e1a036f6c1", + "51b11269-2ca8-4b2a-9163-f21758420e78", + "7e429b8d-a3f0-4ed0-9b58-08957d00b127", + "347ef137-7eeb-4c80-a3bb-0951f26a8aff", + "6e99a1ad-07d2-4b66-a1ce-ece6d99c20a5", + "3aaa4e37-dc91-482e-99af-132a612d40f3", + "37608790-6147-45d0-9f20-1137bb35703d", + "f9584479-3d0d-4c79-affa-9ad7afdd8850", + "d681960f-7bc3-4286-9913-a8812ba3261a", + "21df9241-f8d7-4509-b7f1-37e501a823f7", + "1334ca3e-f9e3-4db8-9ca7-b4c653be7d17", + "357ef137-7eeb-4c80-a3bb-0951f26a8aff", + "aa3a8974-2e85-438b-b29e-a64df44deb4b", + "a01fbce3-2793-461f-ab86-43680ccbae25", + "4f07fbe9-70de-4927-a4d5-bb28bc12c52c", + ] + + for example_id in calc_list: + main("libreoffice_calc", example_id) diff --git a/mm_agents/gpt_4v_agent.py b/mm_agents/gpt_4v_agent.py index 7656a0a..ff8d0d0 100644 --- a/mm_agents/gpt_4v_agent.py +++ b/mm_agents/gpt_4v_agent.py @@ -505,6 +505,7 @@ class GPT4v_Agent: @backoff.on_exception( backoff.expo, (APIError, RateLimitError, APIConnectionError, ServiceUnavailableError, InvalidRequestError), + max_tries=3 ) def call_llm(self, payload): response = requests.post( @@ -514,6 +515,18 @@ class GPT4v_Agent: ) if response.status_code != 200: + if response.json()['error']['code'] == "context_length_exceeded": + print("Context length exceeded. Retrying with a smaller context.") + payload["messages"] = payload["messages"][-1:] + retry_response = requests.post( + "https://api.openai.com/v1/chat/completions", + headers=self.headers, + json=payload + ) + if retry_response.status_code != 200: + print("Failed to call LLM: " + retry_response.text) + return "" + print("Failed to call LLM: " + response.text) return "" else: