From c7017a476de20cb15c1f7a3fd39efb4904e29e0c Mon Sep 17 00:00:00 2001 From: shenzhennan <1641225799szn@gmail.com> Date: Fri, 18 Jul 2025 07:14:35 +0000 Subject: [PATCH 1/3] fix impress instruction 0a211154 --- .../0a211154-fda0-48d0-9274-eaac4ce5486d.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluation_examples/examples/libreoffice_impress/0a211154-fda0-48d0-9274-eaac4ce5486d.json b/evaluation_examples/examples/libreoffice_impress/0a211154-fda0-48d0-9274-eaac4ce5486d.json index c98378a..5f400f5 100644 --- a/evaluation_examples/examples/libreoffice_impress/0a211154-fda0-48d0-9274-eaac4ce5486d.json +++ b/evaluation_examples/examples/libreoffice_impress/0a211154-fda0-48d0-9274-eaac4ce5486d.json @@ -1,7 +1,7 @@ { "id": "0a211154-fda0-48d0-9274-eaac4ce5486d", "snapshot": "libreoffice_impress", - "instruction": "Set the background color of slides with a real person picture as yellow and set the title of slide 2 as \"Let's start\".", + "instruction": "Set the background color to yellow for any slide that contains one or more images of real people, and set the title of slide 2 as \"Let's start\".", "source": "https://arxiv.org/pdf/2311.01767.pdf", "config": [ { From 7fb1cee5757cc57940a34f44a4e6b4892a695e27 Mon Sep 17 00:00:00 2001 From: Zilong Zhou Date: Fri, 18 Jul 2025 19:52:03 +0800 Subject: [PATCH 2/3] fix: img path error (#271) * feat&style: add task status configuration and clear cache functionality; enhance UI styles * feat&refactor: enhance current configuration API and improve cache clearing logic * refactor&style: simplify task status update logic and improve page refresh mechanism * refactor&feat: streamline default configuration retrieval and enhance cache initialization logic * feat&refactor: add caching to default configuration retrieval and streamline task status logic * feat&style: add collapsible section for additional model parameters and enhance styling for config items * refactor&style: remove floating action button and clean up related styles * fix: update video and screenshot sources to include action space, observation type, and model name parameters --- monitor/templates/task_detail.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/monitor/templates/task_detail.html b/monitor/templates/task_detail.html index 4bf6f95..ced6f52 100644 --- a/monitor/templates/task_detail.html +++ b/monitor/templates/task_detail.html @@ -73,7 +73,7 @@
Loading video...
@@ -103,7 +103,7 @@ {% endif %} {% if step.screenshot_file %}
- Step {{ step.step_num }} Screenshot
{% endif %} @@ -155,7 +155,7 @@ }, 2000); // Directly check video source availability with fetch API - fetch('/task/{{ task_type }}/{{ task_id }}/recording', {method: 'HEAD'}) + fetch('/task/{{ task_type }}/{{ task_id }}/recording?action_space={{ action_space }}&observation_type={{ observation_type }}&model_name={{ model_name }}', {method: 'HEAD'}) .then(function(response) { if (response.ok && videoStatus.textContent === 'Loading video...') { // If HEAD request succeeds but video events haven't fired From 53ffc05042a52a7eaa0b5be30a3d2132d6837cc2 Mon Sep 17 00:00:00 2001 From: Danyang Zhang Date: Fri, 18 Jul 2025 21:28:48 +0800 Subject: [PATCH 3/3] Calc eval fix (#272) * ver Jun17th updating annotations * ver Jun17th corrected annotation of 1d17 added check for cell merge * ver Jun17th updated several annotations * ver Jun20th fixed set-up config of 2bd59342-0664-4ccb-ba87-79379096cc08 * fix: Enhance instructions in LibreOffice Calc examples for clarity and specificity, including details on using Pivot Tables, column placements, and revenue calculations. * ver Jun21st updating calc evals * ver Jun22nd fixed an impress task * ver Jun22ndv2 adjusted several calc tasks * Clean scalfolds * ver Jul18th added two try-excepts to handle possible formula parsing and calculation failures --------- Co-authored-by: BowenBryanWang Co-authored-by: yuanmengqi --- desktop_env/evaluators/metrics/utils.py | 18 ++++++++++++++---- .../01b269ae-2111-4a07-81fd-3fcd711993b0.json | 2 +- .../0326d92d-d218-48a8-9ca1-981cd6d064c7.json | 2 +- .../035f41ba-6653-43ab-aa63-c86d449d62e5.json | 2 +- .../04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json | 2 +- .../0bf05a7d-b28b-44d2-955a-50b41e24012a.json | 2 +- .../0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json | 2 +- .../1d17d234-e39d-4ed7-b46f-4417922a4e7c.json | 2 +- .../26a8440e-c166-4c50-aef4-bfb77314b46b.json | 2 +- .../2bd59342-0664-4ccb-ba87-79379096cc08.json | 2 +- .../347ef137-7eeb-4c80-a3bb-0951f26a8aff.json | 2 +- .../535364ea-05bd-46ea-9937-9f55c68507e8.json | 2 +- .../aa3a8974-2e85-438b-b29e-a64df44deb4b.json | 2 +- .../eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json | 2 +- .../ecb0df7a-4e8d-4a03-b162-053391d3afaf.json | 2 +- .../f9584479-3d0d-4c79-affa-9ad7afdd8850.json | 2 +- .../af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json | 2 +- .../82e3c869-49f6-4305-a7ce-f3e64a0618e7.json | 2 +- .../869de13e-bef9-4b91-ba51-f6708c40b096.json | 2 +- 19 files changed, 32 insertions(+), 22 deletions(-) diff --git a/desktop_env/evaluators/metrics/utils.py b/desktop_env/evaluators/metrics/utils.py index 1136655..c3280eb 100644 --- a/desktop_env/evaluators/metrics/utils.py +++ b/desktop_env/evaluators/metrics/utils.py @@ -459,7 +459,11 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt for r in fmt.rules: active_cells: List[Cell] = [] if r.type == "expression": - condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile() + try: + condition: Callable[[str], bool] = formula_parser.ast("=" + r.formula[0])[1].compile() + except: + logger.exception("Formula parsing error: %s. Skipping.", repr(r.formula[0])) + continue logger.debug("Expression condition: %s", r.formula[0]) arguments: List[Any] = [] @@ -493,9 +497,15 @@ def load_xlsx_styles(xlsx_file: Workbook, sheet_name: str, book_name: str, **opt if nb_contiguous_nothings>50: break continue - elif condition(cell_value, *arguments): - logger.debug("Active Cell %s(%s) for %s", repr(cell), str(cell_value), r.formula[0]) - active_cells.append(cell) + else: + try: + satisfies_condition: bool = condition(cell_value, *arguments) + except: + logger.exception("Error in formula calculation with cell value %d", repr(cell_value)) + satisfies_condition = False + if satisfies_condition: + logger.debug("Active Cell %s(%s) for %s", repr(cell), repr(cell_value), r.formula[0]) + active_cells.append(cell) else: raise NotImplementedError("Not Implemented Condition Type: {:}".format(r.type)) diff --git a/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json b/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json index 0d169d5..b68f9a6 100644 --- a/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json +++ b/evaluation_examples/examples/libreoffice_calc/01b269ae-2111-4a07-81fd-3fcd711993b0.json @@ -82,4 +82,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/0326d92d-d218-48a8-9ca1-981cd6d064c7.json b/evaluation_examples/examples/libreoffice_calc/0326d92d-d218-48a8-9ca1-981cd6d064c7.json index 144be86..804a060 100644 --- a/evaluation_examples/examples/libreoffice_calc/0326d92d-d218-48a8-9ca1-981cd6d064c7.json +++ b/evaluation_examples/examples/libreoffice_calc/0326d92d-d218-48a8-9ca1-981cd6d064c7.json @@ -91,4 +91,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/035f41ba-6653-43ab-aa63-c86d449d62e5.json b/evaluation_examples/examples/libreoffice_calc/035f41ba-6653-43ab-aa63-c86d449d62e5.json index db50cf5..f2fe2a6 100644 --- a/evaluation_examples/examples/libreoffice_calc/035f41ba-6653-43ab-aa63-c86d449d62e5.json +++ b/evaluation_examples/examples/libreoffice_calc/035f41ba-6653-43ab-aa63-c86d449d62e5.json @@ -87,4 +87,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json b/evaluation_examples/examples/libreoffice_calc/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json index 7cd054c..c38131c 100644 --- a/evaluation_examples/examples/libreoffice_calc/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json +++ b/evaluation_examples/examples/libreoffice_calc/04d9aeaf-7bed-4024-bedb-e10e6f00eb7f.json @@ -82,4 +82,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json b/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json index 72a968f..73e3b8f 100644 --- a/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json +++ b/evaluation_examples/examples/libreoffice_calc/0bf05a7d-b28b-44d2-955a-50b41e24012a.json @@ -109,4 +109,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json b/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json index f0b43f1..106315c 100644 --- a/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json +++ b/evaluation_examples/examples/libreoffice_calc/0cecd4f3-74de-457b-ba94-29ad6b5dafb6.json @@ -95,4 +95,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json b/evaluation_examples/examples/libreoffice_calc/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json index 3c964fa..4149a06 100644 --- a/evaluation_examples/examples/libreoffice_calc/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json +++ b/evaluation_examples/examples/libreoffice_calc/1d17d234-e39d-4ed7-b46f-4417922a4e7c.json @@ -176,4 +176,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/26a8440e-c166-4c50-aef4-bfb77314b46b.json b/evaluation_examples/examples/libreoffice_calc/26a8440e-c166-4c50-aef4-bfb77314b46b.json index a150c9c..e3122f9 100644 --- a/evaluation_examples/examples/libreoffice_calc/26a8440e-c166-4c50-aef4-bfb77314b46b.json +++ b/evaluation_examples/examples/libreoffice_calc/26a8440e-c166-4c50-aef4-bfb77314b46b.json @@ -82,4 +82,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json index aa074c4..c915953 100644 --- a/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json +++ b/evaluation_examples/examples/libreoffice_calc/2bd59342-0664-4ccb-ba87-79379096cc08.json @@ -32,4 +32,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json index d87d82b..df7eeab 100644 --- a/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json +++ b/evaluation_examples/examples/libreoffice_calc/347ef137-7eeb-4c80-a3bb-0951f26a8aff.json @@ -91,4 +91,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/535364ea-05bd-46ea-9937-9f55c68507e8.json b/evaluation_examples/examples/libreoffice_calc/535364ea-05bd-46ea-9937-9f55c68507e8.json index 5a03d94..4592c67 100644 --- a/evaluation_examples/examples/libreoffice_calc/535364ea-05bd-46ea-9937-9f55c68507e8.json +++ b/evaluation_examples/examples/libreoffice_calc/535364ea-05bd-46ea-9937-9f55c68507e8.json @@ -88,4 +88,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json b/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json index 8ba693a..c27a27d 100644 --- a/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json +++ b/evaluation_examples/examples/libreoffice_calc/aa3a8974-2e85-438b-b29e-a64df44deb4b.json @@ -61,4 +61,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json b/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json index 2e616fb..e9ea661 100644 --- a/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json +++ b/evaluation_examples/examples/libreoffice_calc/eb03d19a-b88d-4de4-8a64-ca0ac66f426b.json @@ -82,4 +82,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json b/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json index bd95ca4..b254536 100644 --- a/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json +++ b/evaluation_examples/examples/libreoffice_calc/ecb0df7a-4e8d-4a03-b162-053391d3afaf.json @@ -109,4 +109,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json b/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json index 63a9c16..7cfcb77 100644 --- a/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json +++ b/evaluation_examples/examples/libreoffice_calc/f9584479-3d0d-4c79-affa-9ad7afdd8850.json @@ -82,4 +82,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/libreoffice_impress/af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json b/evaluation_examples/examples/libreoffice_impress/af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json index 04e388b..2e8f445 100644 --- a/evaluation_examples/examples/libreoffice_impress/af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json +++ b/evaluation_examples/examples/libreoffice_impress/af2d657a-e6b3-4c6a-9f67-9e3ed015974c.json @@ -83,4 +83,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json index c58e773..0171168 100644 --- a/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json +++ b/evaluation_examples/examples/multi_apps/82e3c869-49f6-4305-a7ce-f3e64a0618e7.json @@ -112,4 +112,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +} diff --git a/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json b/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json index 81411cf..0a12b5f 100644 --- a/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json +++ b/evaluation_examples/examples/multi_apps/869de13e-bef9-4b91-ba51-f6708c40b096.json @@ -168,4 +168,4 @@ "proxy": false, "fixed_ip": false, "possibility_of_env_change": "low" -} \ No newline at end of file +}