From 5778078596a703f68fdc3aae272429fa8a51f3d0 Mon Sep 17 00:00:00 2001 From: yuanmengqi Date: Tue, 8 Jul 2025 10:35:47 +0000 Subject: [PATCH] fix some multi_apps tasks --- desktop_env/evaluators/metrics/docs.py | 7 +++++++ .../multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json | 2 +- .../multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json | 7 +++++++ .../multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json | 2 +- .../multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json | 2 +- .../multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json | 6 ++++++ evaluation_examples/settings/proxy/dataimpulse.json | 4 ++-- 7 files changed, 25 insertions(+), 5 deletions(-) diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 908a387..2e78711 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -86,6 +86,7 @@ def compare_docx_files(file1, file2, **options): ignore_case = options.get('ignore_case', False) ignore_order = options.get('ignore_order', False) content_only = options.get('content_only', False) + delete_empty_lines = options.get('delete_empty_lines', False) if not file1 or not file2: return 0 @@ -119,6 +120,9 @@ def compare_docx_files(file1, file2, **options): if ignore_order: doc1_paragraphs = sorted(doc1_paragraphs) doc2_paragraphs = sorted(doc2_paragraphs) + if delete_empty_lines: + doc1_paragraphs = [p for p in doc1_paragraphs if p.strip()] + doc2_paragraphs = [p for p in doc2_paragraphs if p.strip()] elif file1.endswith('.odt') and file2.endswith('.odt'): try: doc1 = load(file1) @@ -131,6 +135,9 @@ def compare_docx_files(file1, file2, **options): if ignore_order: doc1_paragraphs = sorted(doc1_paragraphs) doc2_paragraphs = sorted(doc2_paragraphs) + if delete_empty_lines: + doc1_paragraphs = [p for p in doc1_paragraphs if p.strip()] + doc2_paragraphs = [p for p in doc2_paragraphs if p.strip()] else: # Unsupported file types or mismatch print("Unsupported file types or mismatch between file types.") diff --git a/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json index fd48796..b74ed3d 100644 --- a/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json +++ b/evaluation_examples/examples/multi_apps/778efd0a-153f-4842-9214-f05fc176b877.json @@ -28,7 +28,7 @@ { "type": "launch", "parameters": { - "command": "VLC_VERBOSE=-1 vlc --aout=dummy --repeat '/home/user/Desktop/planet.mp4'", + "command": "vlc", "shell": true } } diff --git a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json index 5e2dffa..b1ad263 100644 --- a/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json +++ b/evaluation_examples/examples/multi_apps/9f3bb592-209d-43bc-bb47-d77d9df56504.json @@ -64,6 +64,13 @@ "command": "echo password | sudo -S pip install pysrt", "shell": "true" } + }, + { + "type": "command", + "parameters": { + "command": "echo osworld-public-evaluation | sudo -S pip install pysrt", + "shell": "true" + } } ], "func": "exact_match", diff --git a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json index e2c8ca4..a3ad108 100644 --- a/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json +++ b/evaluation_examples/examples/multi_apps/a82b78bb-7fde-4cb3-94a4-035baf10bcf0.json @@ -1,7 +1,7 @@ { "id": "a82b78bb-7fde-4cb3-94a4-035baf10bcf0", "snapshot": "libreoffice_calc", - "instruction": "I'm really enjoying this paper. Could you please locate the personal webpages of the initial author and the last three authors? Please include them in a browser bookmark folder titled 'Liked Authors.'", + "instruction": "I'm really enjoying this paper. Could you please locate the personal webpages of the initial author and the last three authors? Please include them in a browser bookmark folder titled 'Liked Authors' under the 'Bookmarks bar'.", "source": "authors", "config": [ { diff --git a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json index 95a24b9..41e3f95 100644 --- a/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json +++ b/evaluation_examples/examples/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652.json @@ -76,7 +76,7 @@ }, "expected": { "type": "cloud_file", - "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652/authors-gt.xlsx", + "path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/b5062e3e-641c-4e3a-907b-ac864d2e7652/authors-ground_truth.xlsx", "dest": "authors-gt.xlsx" }, "options": { diff --git a/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json index eae5b81..8301da1 100644 --- a/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json +++ b/evaluation_examples/examples/multi_apps/bb83cab4-e5c7-42c7-a67b-e46068032b86.json @@ -55,6 +55,12 @@ } ], "func": "compare_docx_files", + "options": { + "ignore_blanks": true, + "ignore_order": true, + "content_only": true, + "delete_empty_lines": true + }, "result": { "type": "vm_file", "path": "/home/user/Desktop/script.docx", diff --git a/evaluation_examples/settings/proxy/dataimpulse.json b/evaluation_examples/settings/proxy/dataimpulse.json index d7f1cfb..5f7c0a4 100644 --- a/evaluation_examples/settings/proxy/dataimpulse.json +++ b/evaluation_examples/settings/proxy/dataimpulse.json @@ -2,8 +2,8 @@ { "host": "gw.dataimpulse.com", "port": 823, - "username": "67e5faf31654b923f06b", - "password": "26a3158d346abdfa", + "username": "e750e5abb74376d28361", + "password": "e5ec245537e1e76a", "protocol": "http", "provider": "dataimpulse", "type": "residential",