Merge branch 'main' of github.com:xlang-ai/OSWorld

This commit is contained in:
yuanmengqi
2025-07-03 16:57:49 +00:00
16 changed files with 68 additions and 39 deletions

View File

@@ -193,7 +193,7 @@ def structure_check_by_mse(img1, img2, threshold=0.03):
(np.array(img1, dtype=np.float32) / 255
- np.array(img2, dtype=np.float32) / 255) ** 2)
structure_same = True if mse < threshold else False
print("MSE: ", mse)
print(f"MSE: {mse}, threshold: {threshold}")
return structure_same
@@ -204,7 +204,7 @@ def structure_check_by_ssim(img1, img2, threshold=0.9):
return similarity >= threshold
def check_brightness_decrease_and_structure_sim(src_path, tgt_path):
def check_brightness_decrease_and_structure_sim(src_path, tgt_path, threshold=0.03):
"""
Check the brightness of src is lower than tgt and the structures are similar
gimp:7a4deb26-d57d-4ea9-9a73-630f66a7b568
@@ -219,13 +219,15 @@ def check_brightness_decrease_and_structure_sim(src_path, tgt_path):
brightness_src = calculate_brightness(img_src)
brightness_tgt = calculate_brightness(img_tgt)
brightness_reduced = brightness_tgt > brightness_src
# print(f"Brightness src: {brightness_src}, tgt: {brightness_tgt}, reduced: {brightness_reduced}")
# Normalize and compare images
target_brightness = 128
img_src_normalized = normalize_brightness(img_src, target_brightness)
img_tgt_normalized = normalize_brightness(img_tgt, target_brightness)
structure_same = structure_check_by_mse(img_src_normalized, img_tgt_normalized)
structure_same = structure_check_by_mse(img_src_normalized, img_tgt_normalized, threshold=threshold)
if brightness_reduced and structure_same:
return 1.
else:

View File

@@ -63,11 +63,13 @@ def compare_epub(result: str, expected: str) -> float:
result_files: List[str] = process_epub(result)
expected_files: List[str] = process_epub(expected)
metric: float = 1.
metric: float = 0.
for f1, f2 in zip(result_files, expected_files):
current_metric: float = diff_text_file(f1, f2)
logger.debug("%s vs %s: %f", f1, f2, current_metric)
metric *= current_metric
metric += current_metric
if len(result_files) > 0:
metric /= len(result_files)
return metric

View File

@@ -1,7 +1,7 @@
{
"id": "00fa164e-2612-4439-992e-157d019a8436",
"snapshot": "libreoffice_writer",
"instruction": "I need to include the experiment results from \"~/Documents/awesome-desktop/expe-results.xlsx\" into the currently writing report. Specifically, extract the results of LLM-based models and insert a table into the \"Main Results\" section of my report.",
"instruction": "I need to include the experiment results from \"~/Documents/awesome-desktop/expe-results.xlsx\" into the currently writing report. Specifically, extract the results of GPT-4 and insert a table into the \"Main Results\" section of my report.",
"source": "authors",
"config": [
{

View File

@@ -1,7 +1,7 @@
{
"id": "02ce9a50-7af2-47ed-8596-af0c230501f8",
"snapshot": "libreoffice_writer",
"instruction": "I am currently utilizing LibreOffice Writer to compose a Linux tutorial, and I intend to display the outcomes generated by executing the \"ls\" command in /home/user. Kindly execute this command and save the screenshot as 'ls.png' on the Desktop.",
"instruction": "I am currently utilizing LibreOffice Writer to compose a Linux tutorial, and I intend to display the outcomes generated by executing the \"ls\" command in /home/user. Kindly execute this command and save the screenshot of the terminal as 'ls.png' on the Desktop.",
"source": "authors",
"config": [
{
@@ -54,7 +54,7 @@
"type": "rule",
"rules": {
"type": "text",
"text": " Ls"
"text": "ls"
}
}
},

View File

@@ -1,7 +1,7 @@
{
"id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
"snapshot": "multiapps",
"instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to ~/Test/Speed (if the dir does not exist, create it).",
"instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, copy the results in speedtest.net/results, and save them to ~/Test/Speed/results.txt (if the dir does not exist, create it). Each metric occupies one line, with the metric name and its value separated by a single space.",
"source": "https://www.speedtest.net/",
"config": [
{
@@ -54,16 +54,21 @@
"browser"
],
"evaluator": {
"func": "compare_time_in_speedtest_results",
"func": "file_contains",
"result": {
"type": "vm_file",
"path": "/home/user/Test/Speed/Speedtest Results Export-.csv",
"dest": "Speedtest Results Export-.csv",
"time_suffix": true
"path": "/home/user/Test/Speed/results.txt",
"dest": "results.txt"
},
"expected": {
"type": "time_diff_range",
"diff_range_in_minutes": "60"
"type": "rule",
"rules": {
"expected": [
"Ping",
"Download",
"Upload"
]
}
}
},
"proxy": true

View File

@@ -1,7 +1,7 @@
{
"id": "3680a5ee-6870-426a-a997-eba929a0d25c",
"snapshot": "libreoffice_calc",
"instruction": "I have file1.xlsx and file2.ods on the Desktop and each has one column. Help me use only the command line to merge these two columns into one LibreOffice Calc file called output.csv and open it from terminal.",
"instruction": "I have file1.xlsx and file2.ods on my Desktop, each containing a single column. Using only the command line, help me merge these two columns into a single column by concatenating the strings from both rows, save the result as ~/Desktop/output.csv, and open it in LibreOffice Calc from the terminal",
"source": "https://unix.stackexchange.com/questions/510850/how-to-open-calc-from-terminal-and-insert-files",
"config": [
{

View File

@@ -1,7 +1,7 @@
{
"id": "3f05f3b9-29ba-4b6b-95aa-2204697ffc06",
"snapshot": "os",
"instruction": "I have a collection of MP3s with blank meta data, but already named with their artists and titles. I've heard that Picard or Kid3 may help, but I'm unfamiliar with them. Can you help me to fix the meta data?",
"instruction": "I have a collection of MP3s with blank meta data, but already named with their artists and titles. I've heard that Picard or Kid3 may help, but I'm unfamiliar with them. Can you help me to fix the meta data \"title\" and \"artist\"?",
"source": "authors",
"config": [
{

View File

@@ -1,7 +1,7 @@
{
"id": "415ef462-bed3-493a-ac36-ca8c6d23bf1b",
"snapshot": "thunderbird",
"instruction": "There's an e-mail containing the AWS invoice for December saved in local \"Bills\" folder. Extract the invoice PDF to the my receipts folder. Follow the file name pattern of the old files and update a record in my tally book.",
"instruction": "There's an e-mail containing the AWS invoice for December saved in local \"Bills\" folder. Extract the invoice PDF to the my receipts folder. Follow the file name pattern of the old files and append a record at the end of my tally book.",
"source": "authors",
"config": [
{
@@ -202,4 +202,4 @@
]
},
"proxy": false
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "48d05431-6cd5-4e76-82eb-12b60d823f7d",
"snapshot": "os",
"instruction": "When I ran \"conda install datasets\" in terminal, I got \"conda: command not found\". Could you help me solve it?",
"instruction": "When I ran \"conda install datasets\" in terminal, I got \"conda: command not found\". Could you help me solve it so that I can use conda command right away?",
"source": "authors",
"config": [
{
@@ -45,17 +45,18 @@
"os",
"chrome"
],
"evaluator": {
"func": "is_in_list",
"func": "exact_match",
"result": {
"type": "vm_command_line",
"command": "conda list",
"command": "grep -q 'conda initialize' ~/.bashrc && echo 1 || echo 0",
"shell": true
},
"expected": {
"type": "rule",
"rules": {
"expected": "packages in environment at"
"expected": "1\n"
}
}
},

View File

@@ -30,15 +30,18 @@
],
"evaluator": {
"func": "check_brightness_decrease_and_structure_sim",
"result": {
"expected": {
"type": "vm_file",
"path": "/home/user/Desktop/background.png",
"dest": "background.png"
},
"expected": {
"result": {
"type": "cloud_file",
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487/back.png",
"dest": "image_original.png"
},
"options": {
"threshold": 0.15
}
},
"proxy": false

View File

@@ -1,7 +1,7 @@
{
"id": "58565672-7bfe-48ab-b828-db349231de6b",
"snapshot": "chrome",
"instruction": "Can you assist me by opening the first link in the latest email in Bills folder from Thunderbird and displaying it in a new Chrome tab?",
"instruction": "Can you assist me by opening the first link in the latest email in Bills folder and displaying it in a new Chrome tab?",
"source": "https://superuser.com/questions/1792660/open-link-from-other-application-does-not-open-the-url-in-firefox",
"config": [
{

View File

@@ -9,6 +9,7 @@
"parameters": {
"command": [
"google-chrome",
"--proxy-server=http://127.0.0.1:18888",
"--remote-debugging-port=1337"
]
}

View File

@@ -65,8 +65,15 @@
"expected": [
"Scottsdale",
"Atlanta",
"Lake Tahoe",
"Banff",
[
"Lake Tahoe",
"Stateline"
],
[
"Banff",
"Alberta's Rockies",
"Albertas Rockies"
],
"Beijing",
[
"Montreal",
@@ -87,7 +94,11 @@
"Barcelona",
"Toulon",
"Sydney",
"Long Beach",
[
"Los Angeles",
"Long Beach",
"LA"
],
"Vancouver",
"Stockholm",
[
@@ -95,7 +106,11 @@
"Montréal"
],
"New Orleans",
"Long Beach",
[
"Los Angeles",
"Long Beach",
"LA"
],
"Vancouver"
]
}

View File

@@ -1,7 +1,7 @@
{
"id": "15c3b339-88f7-4a86-ab16-e71c58dcb01e",
"snapshot": "thunderbird",
"instruction": "Help me access my outlook account with address \"anonym-x2024@outlook.com\" and password 'Wlv(z._6y|a,rrjfQuQhIi\\$;' (without ')",
"instruction": "Help me access my outlook account with address \"anonym-x2024@outlook.com\" and password 'password' (without ') in Thunderbird. It doesn't mather if Thunderbird remind of login or connection failure. Just finish the account setup and I will check the things like password mannually later.",
"source": "https://www.wikihow.com/Access-Gmail-With-Mozilla-Thunderbird",
"config": [
{
@@ -85,16 +85,16 @@
{
"url": "imap://outlook.office365.com",
"user": "anonym-x2024@outlook.com",
"password": "Wlv(z._6y|a,rrjfQuQhIi\\$;"
"password": "password"
},
{
"url": "smtp://smtp.office365.com",
"user": "anonym-x2024@outlook.com",
"password": "Wlv(z._6y|a,rrjfQuQhIi\\$;"
"password": "password"
}
]
}
}
},
"proxy": false
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "3f28fe4f-5d9d-4994-a456-efd78cfae1a3",
"snapshot": "thunderbird",
"instruction": "Set up a signature using my name and affiliation. My name is Anonym and my affiliation is XYZ Lab.",
"instruction": "Set up a plain text signature for my email account in Thunderbird. The first line is my name \"Anonym\" and the second line is my affiliation \"XYZ Lab\".",
"source": "https://www.adsigner.com/user-manual/signatures/setup-email-client-thunderbird/#:~:text=is%20probably%20hidden.-,Right%20click%20on%20the%20empty%20space%20at%20the%20top%20of,signature%20from%20a%20file%20instead.",
"config": [
{
@@ -70,7 +70,7 @@
"expect": {
"mail.identity.id1.htmlSigText": {
"method": "re.S",
"ref": "Anonym.+XYZ Lab"
"ref": "Anonym\\nXYZ Lab"
}
}
}
@@ -78,4 +78,4 @@
"func": "check_thunderbird_prefs"
},
"proxy": false
}
}

View File

@@ -1,7 +1,7 @@
{
"id": "7b1e1ff9-bb85-49be-b01d-d6424be18cd0",
"snapshot": "thunderbird",
"instruction": "Could you help me open up the Thunderbird profile manager utility?",
"instruction": "Could you help me open up the profile management tabpage in Thunderbird? I want the profile management tabpage inside Thunderbird app, but not the profile chooser dialog during app launch.",
"source": "https://www.quora.com/How-do-I-open-a-Thunderbird-profile-manager-utility",
"config": [
{
@@ -58,4 +58,4 @@
"func": "check_accessibility_tree"
},
"proxy": false
}
}