fix: fix multiapp tasks (#229)
Co-authored-by: adlsdztony <zzl0712@connect.hku.hk>
This commit is contained in:
@@ -193,7 +193,7 @@ def structure_check_by_mse(img1, img2, threshold=0.03):
|
||||
(np.array(img1, dtype=np.float32) / 255
|
||||
- np.array(img2, dtype=np.float32) / 255) ** 2)
|
||||
structure_same = True if mse < threshold else False
|
||||
print("MSE: ", mse)
|
||||
print(f"MSE: {mse}, threshold: {threshold}")
|
||||
return structure_same
|
||||
|
||||
|
||||
@@ -204,7 +204,7 @@ def structure_check_by_ssim(img1, img2, threshold=0.9):
|
||||
return similarity >= threshold
|
||||
|
||||
|
||||
def check_brightness_decrease_and_structure_sim(src_path, tgt_path):
|
||||
def check_brightness_decrease_and_structure_sim(src_path, tgt_path, threshold=0.03):
|
||||
"""
|
||||
Check the brightness of src is lower than tgt and the structures are similar
|
||||
gimp:7a4deb26-d57d-4ea9-9a73-630f66a7b568
|
||||
@@ -219,13 +219,15 @@ def check_brightness_decrease_and_structure_sim(src_path, tgt_path):
|
||||
brightness_src = calculate_brightness(img_src)
|
||||
brightness_tgt = calculate_brightness(img_tgt)
|
||||
brightness_reduced = brightness_tgt > brightness_src
|
||||
|
||||
# print(f"Brightness src: {brightness_src}, tgt: {brightness_tgt}, reduced: {brightness_reduced}")
|
||||
|
||||
# Normalize and compare images
|
||||
target_brightness = 128
|
||||
img_src_normalized = normalize_brightness(img_src, target_brightness)
|
||||
img_tgt_normalized = normalize_brightness(img_tgt, target_brightness)
|
||||
|
||||
structure_same = structure_check_by_mse(img_src_normalized, img_tgt_normalized)
|
||||
structure_same = structure_check_by_mse(img_src_normalized, img_tgt_normalized, threshold=threshold)
|
||||
if brightness_reduced and structure_same:
|
||||
return 1.
|
||||
else:
|
||||
|
||||
@@ -63,11 +63,13 @@ def compare_epub(result: str, expected: str) -> float:
|
||||
result_files: List[str] = process_epub(result)
|
||||
expected_files: List[str] = process_epub(expected)
|
||||
|
||||
metric: float = 1.
|
||||
metric: float = 0.
|
||||
for f1, f2 in zip(result_files, expected_files):
|
||||
current_metric: float = diff_text_file(f1, f2)
|
||||
logger.debug("%s vs %s: %f", f1, f2, current_metric)
|
||||
metric *= current_metric
|
||||
metric += current_metric
|
||||
if len(result_files) > 0:
|
||||
metric /= len(result_files)
|
||||
return metric
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "00fa164e-2612-4439-992e-157d019a8436",
|
||||
"snapshot": "libreoffice_writer",
|
||||
"instruction": "I need to include the experiment results from \"~/Documents/awesome-desktop/expe-results.xlsx\" into the currently writing report. Specifically, extract the results of LLM-based models and insert a table into the \"Main Results\" section of my report.",
|
||||
"instruction": "I need to include the experiment results from \"~/Documents/awesome-desktop/expe-results.xlsx\" into the currently writing report. Specifically, extract the results of GPT-4 and insert a table into the \"Main Results\" section of my report.",
|
||||
"source": "authors",
|
||||
"config": [
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "02ce9a50-7af2-47ed-8596-af0c230501f8",
|
||||
"snapshot": "libreoffice_writer",
|
||||
"instruction": "I am currently utilizing LibreOffice Writer to compose a Linux tutorial, and I intend to display the outcomes generated by executing the \"ls\" command in /home/user. Kindly execute this command and save the screenshot as 'ls.png' on the Desktop.",
|
||||
"instruction": "I am currently utilizing LibreOffice Writer to compose a Linux tutorial, and I intend to display the outcomes generated by executing the \"ls\" command in /home/user. Kindly execute this command and save the screenshot of the terminal as 'ls.png' on the Desktop.",
|
||||
"source": "authors",
|
||||
"config": [
|
||||
{
|
||||
@@ -54,7 +54,7 @@
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"type": "text",
|
||||
"text": " Ls"
|
||||
"text": "ls"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "26660ad1-6ebb-4f59-8cba-a8432dfe8d38",
|
||||
"snapshot": "multiapps",
|
||||
"instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, export the measurement results, and save them to ~/Test/Speed (if the dir does not exist, create it).",
|
||||
"instruction": "I want to test the quality of the network environment my laptop is currently in. Please measure my network situation through speedtest.net, copy the results in speedtest.net/results, and save them to ~/Test/Speed/results.txt (if the dir does not exist, create it). Each metric occupies one line, with the metric name and its value separated by a single space.",
|
||||
"source": "https://www.speedtest.net/",
|
||||
"config": [
|
||||
{
|
||||
@@ -54,16 +54,21 @@
|
||||
"browser"
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "compare_time_in_speedtest_results",
|
||||
"func": "file_contains",
|
||||
"result": {
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Test/Speed/Speedtest Results Export-.csv",
|
||||
"dest": "Speedtest Results Export-.csv",
|
||||
"time_suffix": true
|
||||
"path": "/home/user/Test/Speed/results.txt",
|
||||
"dest": "results.txt"
|
||||
},
|
||||
"expected": {
|
||||
"type": "time_diff_range",
|
||||
"diff_range_in_minutes": "60"
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"expected": [
|
||||
"Ping",
|
||||
"Download",
|
||||
"Upload"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"proxy": true
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "3680a5ee-6870-426a-a997-eba929a0d25c",
|
||||
"snapshot": "libreoffice_calc",
|
||||
"instruction": "I have file1.xlsx and file2.ods on the Desktop and each has one column. Help me use only the command line to merge these two columns into one LibreOffice Calc file called output.csv and open it from terminal.",
|
||||
"instruction": "I have file1.xlsx and file2.ods on my Desktop, each containing a single column. Using only the command line, help me merge these two columns into a single column by concatenating the strings from both rows, save the result as ~/Desktop/output.csv, and open it in LibreOffice Calc from the terminal",
|
||||
"source": "https://unix.stackexchange.com/questions/510850/how-to-open-calc-from-terminal-and-insert-files",
|
||||
"config": [
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "3f05f3b9-29ba-4b6b-95aa-2204697ffc06",
|
||||
"snapshot": "os",
|
||||
"instruction": "I have a collection of MP3s with blank meta data, but already named with their artists and titles. I've heard that Picard or Kid3 may help, but I'm unfamiliar with them. Can you help me to fix the meta data?",
|
||||
"instruction": "I have a collection of MP3s with blank meta data, but already named with their artists and titles. I've heard that Picard or Kid3 may help, but I'm unfamiliar with them. Can you help me to fix the meta data \"title\" and \"artist\"?",
|
||||
"source": "authors",
|
||||
"config": [
|
||||
{
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "48d05431-6cd5-4e76-82eb-12b60d823f7d",
|
||||
"snapshot": "os",
|
||||
"instruction": "When I ran \"conda install datasets\" in terminal, I got \"conda: command not found\". Could you help me solve it?",
|
||||
"instruction": "When I ran \"conda install datasets\" in terminal, I got \"conda: command not found\". Could you help me solve it so that I can use conda command right away?",
|
||||
"source": "authors",
|
||||
"config": [
|
||||
{
|
||||
@@ -45,17 +45,18 @@
|
||||
"os",
|
||||
"chrome"
|
||||
],
|
||||
|
||||
"evaluator": {
|
||||
"func": "is_in_list",
|
||||
"func": "exact_match",
|
||||
"result": {
|
||||
"type": "vm_command_line",
|
||||
"command": "conda list",
|
||||
"command": "grep -q 'conda initialize' ~/.bashrc && echo 1 || echo 0",
|
||||
"shell": true
|
||||
},
|
||||
"expected": {
|
||||
"type": "rule",
|
||||
"rules": {
|
||||
"expected": "packages in environment at"
|
||||
"expected": "1\n"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -30,15 +30,18 @@
|
||||
],
|
||||
"evaluator": {
|
||||
"func": "check_brightness_decrease_and_structure_sim",
|
||||
"result": {
|
||||
"expected": {
|
||||
"type": "vm_file",
|
||||
"path": "/home/user/Desktop/background.png",
|
||||
"dest": "background.png"
|
||||
},
|
||||
"expected": {
|
||||
"result": {
|
||||
"type": "cloud_file",
|
||||
"path": "https://huggingface.co/datasets/xlangai/ubuntu_osworld_file_cache/resolve/main/multi_apps/4c26e3f3-3a14-4d86-b44a-d3cedebbb487/back.png",
|
||||
"dest": "image_original.png"
|
||||
},
|
||||
"options": {
|
||||
"threshold": 0.15
|
||||
}
|
||||
},
|
||||
"proxy": false
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"id": "58565672-7bfe-48ab-b828-db349231de6b",
|
||||
"snapshot": "chrome",
|
||||
"instruction": "Can you assist me by opening the first link in the latest email in Bills folder from Thunderbird and displaying it in a new Chrome tab?",
|
||||
"instruction": "Can you assist me by opening the first link in the latest email in Bills folder and displaying it in a new Chrome tab?",
|
||||
"source": "https://superuser.com/questions/1792660/open-link-from-other-application-does-not-open-the-url-in-firefox",
|
||||
"config": [
|
||||
{
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
"parameters": {
|
||||
"command": [
|
||||
"google-chrome",
|
||||
"--proxy-server=http://127.0.0.1:18888",
|
||||
"--remote-debugging-port=1337"
|
||||
]
|
||||
}
|
||||
|
||||
@@ -65,8 +65,15 @@
|
||||
"expected": [
|
||||
"Scottsdale",
|
||||
"Atlanta",
|
||||
"Lake Tahoe",
|
||||
"Banff",
|
||||
[
|
||||
"Lake Tahoe",
|
||||
"Stateline"
|
||||
],
|
||||
[
|
||||
"Banff",
|
||||
"Alberta's Rockies",
|
||||
"Alberta’s Rockies"
|
||||
],
|
||||
"Beijing",
|
||||
[
|
||||
"Montreal",
|
||||
@@ -87,7 +94,11 @@
|
||||
"Barcelona",
|
||||
"Toulon",
|
||||
"Sydney",
|
||||
"Long Beach",
|
||||
[
|
||||
"Los Angeles",
|
||||
"Long Beach",
|
||||
"LA"
|
||||
],
|
||||
"Vancouver",
|
||||
"Stockholm",
|
||||
[
|
||||
@@ -95,7 +106,11 @@
|
||||
"Montréal"
|
||||
],
|
||||
"New Orleans",
|
||||
"Long Beach",
|
||||
[
|
||||
"Los Angeles",
|
||||
"Long Beach",
|
||||
"LA"
|
||||
],
|
||||
"Vancouver"
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user