From 2d493759e3a83c21dfc653345f237036373e6d4f Mon Sep 17 00:00:00 2001 From: tsuky_chen <3107760494@qq.com> Date: Sat, 30 Dec 2023 18:21:39 +0800 Subject: [PATCH] add eval libreoffice write compare content --- desktop_env/evaluators/metrics/__init__.py | 2 +- desktop_env/evaluators/metrics/docs.py | 25 +++++++++++-- .../adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json | 36 +++++++++++++++++-- .../e528b65e-1107-4b8c-8988-490e4fece599.json | 36 +++++++++++++++++-- .../ecc2413d-8a48-416e-a3a2-d30106ca36cb.json | 20 ++++++++++- .../f178a4a9-d090-4b56-bc4c-4b72a61a035d.json | 2 +- 6 files changed, 110 insertions(+), 11 deletions(-) diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index c881b27..b137c87 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -1,3 +1,3 @@ from .table import compare_table, compare_with_sparklines, compare_with_charts from .table import check_sheet_list, check_xlsx_freeze -from .docs import find_default_font, contains_page_break \ No newline at end of file +from .docs import find_default_font, contains_page_break, compare_docx_files \ No newline at end of file diff --git a/desktop_env/evaluators/metrics/docs.py b/desktop_env/evaluators/metrics/docs.py index 6399b31..6953dd8 100644 --- a/desktop_env/evaluators/metrics/docs.py +++ b/desktop_env/evaluators/metrics/docs.py @@ -2,8 +2,6 @@ import xml.etree.ElementTree as ET from docx import Document -from lxml import etree - def find_default_font(expected, config_file_path): """Find the default font in LibreOffice Writer.""" default_font = None @@ -36,6 +34,29 @@ def contains_page_break(docx_file): if br is not None and '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type' in br.attrib and br.attrib['{http://schemas.openxmlformats.org/wordprocessingml/2006/main}type'] == 'page': return 1 return 0 + +def compare_docx_files(file1, file2): + + doc1 = Document(file1) + doc2 = Document(file2) + + doc1_paragraphs = [p.text for p in doc1.paragraphs] + doc2_paragraphs = [p.text for p in doc2.paragraphs] + + if len(doc1_paragraphs) != len(doc2_paragraphs): + return 0 + + # Compare each paragraph + for p1, p2 in zip(doc1_paragraphs, doc2_paragraphs): + if p1 != p2: + return 0 + + return 1 + +# file1 = 'path/to/file1.docx' +# file2 = 'path/to/file2.docx' + +# print(are_docx_files_same(file1, file2)) # Replace 'your_document.docx' with the path to your document # result = contains_page_break('your_document.docx') # print(result) diff --git a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json index a3bb270..8cd9da2 100644 --- a/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json +++ b/evaluation_examples/examples/libreoffice_writer/adf5e2c3-64c7-4644-b7b6-d2f0167927e7.json @@ -3,10 +3,40 @@ "snapshot": "libreoffice_writer", "instruction": "Helping me adding CITATION_TEXT to my reference list, and add a cross reference after the word \"WHERE_WE_ADD_REFERENCE\"", "source": "https://seekstar.github.io/2022/04/11/libreoffice%E5%BC%95%E7%94%A8%E6%96%87%E7%8C%AE/", - "config": [], + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://drive.google.com/uc?export=download&id=1boNZ3JuqUx2apMqExL3UX9A4Sopi77ke7yKnIO3cpbg", + "path": "Desktop/Add_Citation_Cross_Reference.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "Desktop/Add_Citation_Cross_Reference.docx" + } + } + ], "trajectory": "trajectories/", "related_apps": [ "libreoffice_writer" ], - "evaluator": "evaluation_dir" -} + "evaluator": { + "func": "compare_docx_files", + "expected": { + "type": "cloud_file", + "path": "https://doc-14-20-docstext.googleusercontent.com/export/hglu5spf1pl3kjo47q5hl516lk/erhjsu6vpod00o7ruf95hlnqkk/1703931360000/108888117743638485671/108888117743638485671/1tiN6A9-zQ2gfDPTWAYK5mv5JbUe2Y_PqPIyuvsu7PgA?format=docx&dat=AOBvIb1uRgycIK4pNup7ZrnJqwNjYOgTUlrhxAc8DnBWzUt9zDxLm3e4s0KQytzQ1qvFZaBr8-ymrVv7Mmb7ovpVk4k8sgS_2MRD1m-tMUDiUEGFtoxrECd4Xoaspuwb-BZttyU1cCdY3U12qcNWy5Cts_uys6ouKZok01Z7s1J233udfrMbXvDt_X-HeNo_7e6Bh64ZC4ohHOKZddsuayKYxPTKpgnho_8FPuWXqZDKyfYRDoTXxGWv-WrZSVqRSHP6GMtBdWc1-QBuWzH_iRTM64joeveSDppMjMeB5bjdJQ7EXf-EjA8MjSxtvQQGBmun7PoZ-W7fLmQ1E3fZKJ5BwQDOIJHDCBar83iHHoXOUJ1Q5UbkKcCS0nJ_pprCzRYXLSeVfN0_bdGuY2lSE8GhX-yGlyGIjAIZK-YulOFXwV0--4aD10rh43A5GLmSLeNZe6maUU33j1V-zUtp1qPgRk3SnPJENNOXf-sOYAvQqSgROSBvAwElqgHUMD_ROK692M7_7OtFe4sjs0eVnBzROEHy-ZznXqdSXJj6-2vloXHWfswPfE-Mq5kc7F1zX4CY6H1kQ-zgHzeLX-qQA6YmgZPJ0pLzFkAkBiMAjPigA_2dy7jk-niePSbZ9DcgYoX6iv6MkJ0y6JE_HQF7Gr6kDBiOjOyDp7gFoMj35F41Fac1wpSJmoiUEGLg0qGRBZ6BPc54m-AAFuy-2s4BUUtPgk-FlTD1jSpHDXLbJ-VQFglx1CYpfqFAnmIE8yseQPh3GqQYyCtCfD-zzO-CRTT9A-XOQVuH27npfk2gMDKtGwJr7XhNL8lL9b8540uTjt9nFnmNfDZCFK01VULdHZesSBedqM4iApgVVnjok8nmYw14e9WSgJOdjeiYAwI", + "dest": "Add_Citation_Cross_Reference_Gold.docx" + }, + "result": { + "type": "vm_file", + "path": "Desktop/Add_Citation_Cross_Reference.xlsx", + "dest": "Add_Citation_Cross_Reference.xlsx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/e528b65e-1107-4b8c-8988-490e4fece599.json b/evaluation_examples/examples/libreoffice_writer/e528b65e-1107-4b8c-8988-490e4fece599.json index e7576ab..7c3656e 100644 --- a/evaluation_examples/examples/libreoffice_writer/e528b65e-1107-4b8c-8988-490e4fece599.json +++ b/evaluation_examples/examples/libreoffice_writer/e528b65e-1107-4b8c-8988-490e4fece599.json @@ -3,10 +3,40 @@ "snapshot": "libreoffice_writer", "instruction": "Capitalize the first letter of all words.", "source": "https://www.youtube.com/watch?v=l25Evu4ohKg", - "config": [], + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://doc-08-20-docstext.googleusercontent.com/export/hglu5spf1pl3kjo47q5hl516lk/50aih0mm4k09m25qfa3qrhiuk4/1703929965000/108888117743638485671/108888117743638485671/1NcJF7vVondy_r7toxlB9hLPAZa_SE2J8Q8jwMXS8VUA?format=docx&dat=AOBvIb2I6t_SHXA1k8jP6BwfHmYlD9vYHbrSA2-mYotTRWVDp45o4YRrwJXoy21qBnq92d646pP7IQH6-gXTi7oDIcGqD0iYR6CLD1s2PyYX2F8wQ703_cw61GoVYGf8BoorXDY6Y44dfXY0j2RigWDdbimS1rSLUy3TGEmTl8jZq71zrNUKiS25zCsfuXONsexH0tGI1d8LfnVKrFLCQvIlrVF7lV9lgi4lJhuUwIIKF1JdziNoNBohbCuhv-h4iGPRyoFxC4hZAOVJEHy1wIvBA64rNsw1N_nplLxx42I7MC9-3F24Lkxi3xfJ81nEYSx8ma5D9V_AHLLRLmIrpPKYk1s47qPQxbSGrcO1362WJeMxb8lys71APnPwfWbodnxZLdJR2x2WfdYiQWpZGRzBf3-CeQmORrMQDSwWOHsEGMiCw8qTKevzhY1s4aZWBxpQO7ocCoL1gLOxxEvj4eSLvxp2S1u_dFjjr-dcMxt9-Xu210BGd-1Q6kUYzexRuI6I1vkWxDFn7GHgkVf-RhbMT52W_FFOo2Um4rXIfV62W5_nZrmJjz6KNOGdAbIJdkmTrS_lESb6GDmkOFwNarmTlZVOCDN-On7HGaYF1KvX0hobR0559-wKetJj2diqCDOlDXFemtkzvX-CDCRBnDmQxq1ZaQEsjAHhu7sE9jlZT0ywUHV3VpKBcepolqaCRAhX0gCf1cSht7LDHODeX9Hbn3tz810aYlETCJQo9QScbN87i4IV3qFbezwymMi0ZDLgWW0BtEa1gFMY89om6YGscnCHUHGCGy_GW2XscOiQq3rJngCmuu4Ivfta_7GB0e9NeflOIO3wlCpTlw6aQVh9sIB0MMTpDaZ6V1SXSnPInj15tLbCiAPXzNxfg-8", + "path": "Desktop/Capitalize_First_Letter.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "Desktop/Capitalize_First_Letter.docx" + } + } + ], "trajectory": "trajectories/", "related_apps": [ "libreoffice_writer" ], - "evaluator": "evaluation_dir" -} + "evaluator": { + "func": "compare_docx_files", + "expected": { + "type": "cloud_file", + "path": "https://doc-10-20-docstext.googleusercontent.com/export/hglu5spf1pl3kjo47q5hl516lk/tj4rm8e6bt50lht09qgev340nk/1703930035000/108888117743638485671/108888117743638485671/1vmb6LphTM8jKf2K7fkrmeWhy_wEgMtvthJx7KFtBwhE?format=docx&dat=AOBvIb1sqCYIZBDWtevTgWhVtsxSMJl1RuUy-MFBRUZszTYkE1Y_qhzC_RfgEvMx5kJz_GECf3mFQkPQq6Re7HFsKVaDU4_KXQE13ZM1cCIEtAEJ7UKZPcI55xO_dMh7ig_1wArgt_jviaEmA5RJcdLHu-omc7W23lcTfPZQpZTextkU7vtgQJGceYeC25JIdRPpsTYVXvqhD5Bjtq-ArRQO4f3huW-TfFKdiVh3MFjkuMx8fMg3l60l8JH_lUqw2BqCqzQDVeD_ajYrmzrFQMP5rFXj353S5HtCiAdSlClI1I6nRMLAELwtsgkqEIc5pwNxcOUKZU1lHkCl1wljzOkrLxRSPlQ1Hb2h0YbRVbPARBB6ywe5QooHn9HatQr_4hkzMTRug4Qv-fo39-F5Uy5bNeGPlK4tDtOUPUDUQs0Kbnn_gT9zzSUCXj4BW85tmNtCNc-Akt4_tPXGyEqlNELpFeBaK27EETF6S93N7C5OU9SfYbL8u29YgTLq1229JmJ3dcUr8yDv2oFLx9x_PNbAStSYABZaDCi1B5B2gPSUvxdQ7CtkoFodD0e7XwBWqDi3jC1N2LdBa8mUsIkFVJvI3PmixODcgzJb5MTkKBwWKHw0UqV-Zsl2whtWEEMeeu6HdgsIiuzSs56dUDsOIJXhu2PfIojjyoX91-NeffGEVQ5-w9l3_EfNpOUHLli3_Ju8w5YvjNoS9gU-g2HTdljnWydN0j0jiz1otjiE0oQxMzVqvWNMa3Qap2vPvQMVoOB_7SwBzcEVmi-SnitWvrXIXs3o585Qc6MBeDQ20D0VhJGsFJ8vVqxtDI8AOIC-t8NaYatFoKXuQLJckJ1wcqA7NmFxWa2hWU79l6dwPztsK9w0VJQyMSwJOMFPXWU", + "dest": "Capitalize_First_Letter_Gold.docx" + }, + "result": { + "type": "vm_file", + "path": "Desktop/Capitalize_First_Letter.xlsx", + "dest": "Capitalize_First_Letter.xlsx" + } + } +} \ No newline at end of file diff --git a/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json b/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json index 6a802e7..5e35598 100644 --- a/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json +++ b/evaluation_examples/examples/libreoffice_writer/ecc2413d-8a48-416e-a3a2-d30106ca36cb.json @@ -3,7 +3,25 @@ "snapshot": "libreoffice_writer", "instruction": "Insert a blank page here", "source": "https://www.quora.com/How-can-I-insert-a-blank-page-on-libreoffice", - "config": [], + "config": [ + { + "type": "download", + "parameters": { + "files": [ + { + "url": "https://doc-0s-20-docstext.googleusercontent.com/export/hglu5spf1pl3kjo47q5hl516lk/nqen29nemcpds1sk8t0ck303a8/1703926705000/108888117743638485671/108888117743638485671/10KfmCo_A5M04w4TA_qzPkd6vw04K3xzBzluIgeRCXuo?format=docx&dat=AOBvIb208vnoN2v6hNgA1FJ6jqegOR6oyVuKcoeroXXtmMiXWTao5vsteKCqxz2lnj5rioDVslkXyDQPCKYBnAJhtvKsIE8cJ7V2DtYuL9jFKYtJgClq2RsfpGE0hgvlLuoLZmeaRLaEJv10NTPGRTULPYwgN-RqLdnyG4EojXxrTKxk4TfVM7xAqob7uE24vFyGMH4uMctC2obdXNsDKnZ4dM1eao-3ABtlyPzSVpT891ziY_SMclP7l7y7cLqw4S11zbbnteVYpM5ytRlgpWKFrKvCmX8gRKpT0ENcNlL4ILJdi3KOAhU97X3vQNkS0LyqmAzKjBeuxz5tD3CuAj7LN1xu2t-DaLVvvrZPm_-XHFGvbHdy5wY66HtMqqPmaRb9_898Tl5ODZxfd5XP1CCRw-c8ohA2Jmdl86Scr8XDA7C_mAT8m_E1FLvJLJhJ_TyL74H0-TRiAPA2noaX2PUmOt4G1qFF_aIOn56iPPt3hB3eHvgthD0bVuW3TZUyr6cP4ZM_TF7g9awhXa1xWusltHItieNfNaJOPiI4Lacon_uICbbpSvEhuq5-apCsnwXpKIvK18UKP5u1Fw1Zb8AhAocJpHLxej87mInzYfFr7XAdf1kiPPxh1zRL2yW_Qe-J4YxWn0oBRrNrf_IgfQK_z9QRXgzzS3xaby2AsmA0qMNHIbMT73Uvha0TvO5UxozPc-aejeuaoi7ot27uSAwd0Cd4Yi-d4e7qfqVgNqvLl-psT9ZZ7cWq8vhU2lPiHrlmhVIwiWjf-s57gRNyXN99SY7MLG-b_JhOI43JgzZzfhjMc0EG2UrCxEEiOOGCp57BwH9FjqM1SQSenAlPmy28e8wCShBwZba_WUbwStumKQakIkwYqeoc0VoJN38", + "path": "Desktop/Insert_Blank_Page.docx" + } + ] + } + }, + { + "type": "open", + "parameters": { + "path": "Desktop/Insert_Blank_Page.docx" + } + } + ], "trajectory": "trajectories/", "related_apps": [ "libreoffice_writer" diff --git a/evaluation_examples/examples/libreoffice_writer/f178a4a9-d090-4b56-bc4c-4b72a61a035d.json b/evaluation_examples/examples/libreoffice_writer/f178a4a9-d090-4b56-bc4c-4b72a61a035d.json index 62c7dee..29a7834 100644 --- a/evaluation_examples/examples/libreoffice_writer/f178a4a9-d090-4b56-bc4c-4b72a61a035d.json +++ b/evaluation_examples/examples/libreoffice_writer/f178a4a9-d090-4b56-bc4c-4b72a61a035d.json @@ -9,7 +9,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1rwhniaClEkF8XFzdfaNUA6GmAiy4syMZ&export=download&authuser=0&confirm=t&uuid=6fdd5b04-85f4-45e1-ad74-368f8f2a82ab&at=APZUnTUP-JxPxLfNls6jXWghblQ5:1701766091851", + "url": "https://doc-10-20-docstext.googleusercontent.com/export/hglu5spf1pl3kjo47q5hl516lk/jq5b3etcrfdc25il9orjsk8jgo/1703926500000/108888117743638485671/108888117743638485671/1ufVZd-Uibt9pVClmK9BceqMR6iQNSekH5ECxysnPehY?format=docx&dat=AOBvIb3K-ByHFQ8OY7SbFlbA41gbWygryhR0tjcDhZuUWmdje6d2VxzZsK00RoorX_LOOjpnln1zFpw9-W1PLbjKMx1-cOGZfuVpqBiL3mOiYLdQPxqqPgrRKjzJzeD0SZOCK96nu8wIGoY-tDVwAoGzf98-lxjDOO1Z3slrW4YeTUPZQ17EusYw75S8FzBIMxW9UGzMPMtubUK_JVrHQOU-ghu8bz0atPRrkB44ysWeF0W063sg03ysAnb1557Ie0p3RgrcMc9aeGtKvQFCo0Tr7BkR93D2klp6M5pDMJekgtUGxurwiEmNeZ6nRhp-bYoev1uesAhGzZONVi_1DtaHvGzL6MGMIzfV5rWtMXbFI1CBwtP00AuF5qFOD6l2wkRVogas48MWOxBCX-bcUHOxezVDmxb0ohfCveIDMq0s8ebY5HggfrE9I8pMs-2GNPABUSr4S7MkRO-2yzy-j8pgTtzO3QRc146gd9Hci6aYoAnBIludK31AsLckcVba-OrEyB7Lx31sfzvdITS8nZ4Cg_JWMV9CugNgF_8w0SprvDMw9vsoEjYaJpY2Z_K445GGENY7dGRQbGmBhLeP9wJBXHsNhObWKV71BrPm2wSOJLrFU2iLa5jLY7mkz7xKhq3e9dDttus9c6A0KPj1f54YAsvZ_SEPbE1WBVzMYPD3MV-6yw2KbKgZxYQ9A0lf87KoffIbA24Y2S97FBuOWJ5ZVN2rz02PbpXyuMf1fcnUb8JpAm6ewwArKqtmIJg20hySiYOtZUgfQvjwBaDrMhQjKGKYiLXIEdGTWVQuuTGQhG8pqd4StbxUsCwdMiFOFVXV0mNNncz3QZEOPF5fgW564KuE9qFClhq620ve61mgg6_3S2kQ9RhHYaShvuI", "path": "Desktop/Set_Default_Font.docx" } ]