diff --git a/desktop_env/evaluators/getters/file.py b/desktop_env/evaluators/getters/file.py index d6e2087..2cb81ab 100644 --- a/desktop_env/evaluators/getters/file.py +++ b/desktop_env/evaluators/getters/file.py @@ -86,14 +86,16 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option if not config.get("multi", False): paths: List[str] = [config["path"]] dests: List[str] = [config["dest"]] - print(config) + if "time_suffix" in config.keys() and config["time_suffix"]: + if "time_format" in config.keys(): + time_format = config["time_format"] + # Insert time before . in file type suffix + paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths] + dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests] + else: + paths: List[str] = config["path"] + dests: List[str] = config["dest"] - if "time_suffix" in config.keys() and config["time_suffix"]: - if "time_format" in config.keys(): - time_format = config["time_format"] - # Insert time before . in file type suffix - paths = [p.split(".")[0] + datetime.now().strftime(time_format) + "." + p.split(".")[1] if "." in p else p for p in paths] - dests = [d.split(".")[0] + datetime.now().strftime(time_format) + "." + d.split(".")[1] if "." in d else d for d in dests] cache_paths: List[str] = [] @@ -113,7 +115,6 @@ def get_vm_file(env, config: Dict[str, Any]) -> Union[Optional[str], List[Option cache_paths.append(_path) with open(_path, "wb") as f: f.write(file) - # debug return cache_paths[0] if len(cache_paths)==1 else cache_paths diff --git a/desktop_env/evaluators/metrics/__init__.py b/desktop_env/evaluators/metrics/__init__.py index 6fdcf8d..c3f7f85 100644 --- a/desktop_env/evaluators/metrics/__init__.py +++ b/desktop_env/evaluators/metrics/__init__.py @@ -68,7 +68,7 @@ from .general import ( compare_time_in_speedtest_results, is_included_all_json_objects, is_gold_text_included_in_pdf, - check_csv_line_number, + check_line_number, file_contains, compare_terminal_and_txt, fuzzy_place_math, diff --git a/desktop_env/evaluators/metrics/general.py b/desktop_env/evaluators/metrics/general.py index 7ff7bf2..a764810 100644 --- a/desktop_env/evaluators/metrics/general.py +++ b/desktop_env/evaluators/metrics/general.py @@ -1,4 +1,5 @@ import csv +import os import datetime import difflib import functools @@ -91,23 +92,24 @@ def fuzzy_match(result, rules) -> float: def fuzzy_place_math(result_file_path, rules) -> float: + if result_file_path is None: + return 0. expect = rules["expected"] # a list of possible answers # read list.docx, and get all texts out, overlook blank lines, remove blanks before and after each line doc = Document(result_file_path) words_list = [] for para in doc.paragraphs: words_list.extend(para.text.split()) - # Print out the list of extracted words - print("Your Answers: ") - print(words_list) fuzzy_score_list = [] for word in words_list: max_score = 0 for ans in expect: - score = fuzz.ratio(word, ans) + score = fuzz.ratio(word, ans)/100 max_score = max(max_score, score) fuzzy_score_list.append(max_score) - return sum(fuzzy_score_list) / len(fuzzy_score_list) + if len(fuzzy_score_list) != 3: + return 0. + return sum(fuzzy_score_list) / 3 def check_csv(result: str, rules: Dict[str, List[Dict[str, str]]]) -> float: """ @@ -304,29 +306,40 @@ def check_direct_json_object(result, rules) -> float: result = result.replace("'", '"') # load json object result = json.loads(result) - print("result: ") - print(result) - print("expected: ") - print(rules["expected"]) if result is None: return 0. + try: + expect_in_result = rules.get("expect_in_result", False) + if not expect_in_result: + expected_json = rules["expected"] + for key in expected_json.keys(): + expected_value = expected_json.get(key) + if expected_value != result.get(key): + return 0. + return 1.0 + else: + expected_json = rules["expected"] - expect_in_result = rules.get("expect_in_result", False) - if not expect_in_result: - expected_json = rules["expected"] - for key in expected_json.keys(): - expected_value = expected_json.get(key) - if expected_value != result.get(key): - return 0. - return 1.0 - else: - expected_json = rules["expected"] - for key in expected_json.keys(): - expected_value = expected_json.get(key) - if expected_value not in result.get(key): - return 0. - return 1.0 - + for key in expected_json.keys(): + if isinstance(expected_json.get(key), List): + flag = 0 + expected_value_list = expected_json.get(key) + for each_expected_value in expected_value_list: + if each_expected_value in result.get(key): + flag = 1 + break + if flag == 0: + return 0. + elif isinstance(expected_json.get(key), str): + if expected_json.get(key) not in result.get(key): + return 0. + else: + logger.debug("check_direct_json_object: expected value type not supported") + return 0. + return 1.0 + except: + logger.debug("check_direct_json_object: result is not a valid json object") + return 0. def compare_time_in_speedtest_results(speedtest_result_path, time_diff): if not speedtest_result_path: @@ -397,24 +410,37 @@ def is_gold_text_included_in_pdf(pdf_file_path, gold_text_path): def file_contains(file_path, config): # file_path ends with .txt if not file_path: - return 1 - with open(file_path, 'r') as f: - file_text = f.read() - for text in config["expected"]: - if text not in file_text: - return 0 - return 1 + return 0. + try: + with open(file_path, 'r') as f: + file_text = f.read() + for text in config["expected"]: + if text not in file_text: + logger.debug(f"file_contains: {text} not found in {file_path}") + return 0. + except: + logger.debug("file_contains: file not found or not readable") + return 0. + return 1. -def check_csv_line_number(file_path, line_number): - # check file_path suffix - if not file_path.endswith('.csv'): - return 0 - # check line number - with open(file_path, 'r') as f: - reader = csv.reader(f) - line_count = sum(1 for row in reader) - return 1 if line_count == int(line_number["expected"]) else 0 +def check_line_number(file_path, line_number): + # check if file_path exists + if file_path is None or not os.path.isfile(file_path): + return 0. + timeRegex = "([01]\d|2[0-3]):[0-5]\d:[0-5]\d" + # check if the string that matches the timeRegex in this txt file equals to line_number["expected"] + try: + with open(file_path, 'r') as f: + line_count = 0 + for line in f: + if re.search(timeRegex, line): + line_count += 1 + # if line_count equals to line_number["expected"], return 1, else return 0 + return 1 if line_count == int(line_number["expected"]) else 0 + except: + logger.debug("check_line_number: file not found or not readable") + return 0. def compare_terminal_and_txt(txt_file_path, terminal_output): diff --git a/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json b/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json index 8cf2e38..4e76c1d 100644 --- a/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json +++ b/evaluation_examples/examples/multi_apps/2373b66a-092d-44cb-bfd7-82e86e7a3b4d.json @@ -1,7 +1,7 @@ { "id": "2373b66a-092d-44cb-bfd7-82e86e7a3b4d", "snapshot": "multiapps", - "instruction": "I want to understand the resource usage of my Ubuntu system under normal workloads. Please use the `sar` command in the `sysstat` toolkit to monitor the system CPU usage, evaluate the status once every second for 30 seconds, output the results to \"System_Resources_Report.txt\" under Desktop, and convert the txt to csv file with the same name.", + "instruction": "I want to understand the resource usage of my Ubuntu system under normal workloads. Please use the `sar` command in the `sysstat` toolkit to monitor system activity, evaluate the status once every second for 30 seconds, output the results to \"System_Resources_Report.txt\" under Desktop.", "source": "author", "config": [ { @@ -17,7 +17,7 @@ "os", "calc" ], "evaluator": { - "func": ["file_contains", "check_csv_line_number"], + "func": ["file_contains", "check_line_number"], "result": [ { @@ -27,8 +27,8 @@ }, { "type": "vm_file", - "path": "/home/user/Desktop/System_Resources_Report.csv", - "dest": "System_Resources_Report.csv" + "path": "/home/user/Desktop/System_Resources_Report.txt", + "dest": "System_Resources_Report.txt" } ], "expected": @@ -36,13 +36,13 @@ { "type": "rule", "rules" :{ - "expected": ["tps", "rtps", "wtps", "dtps", "bread/s", "bwrtn/s", "bdscd/s", "Average", "Linux"] + "expected": ["CPU", "%user","%nice","%system", "%iowait", "%steal", "%idle"] } }, { "type": "rule", "rules": { - "expected": "33" + "expected": "31" } }] } diff --git a/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json b/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json index 07d4c0d..4ce5836 100644 --- a/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json +++ b/evaluation_examples/examples/multi_apps/7ff48d5b-2df2-49da-b500-a5150ffc7f18.json @@ -33,6 +33,24 @@ "chrome", "os", "writer" ], "evaluator": { + "postconfig":[ + { + "type": "activate_window", + "parameters": { + "window_name": "AllLocations.docx - LibreOffice Writer" + } + }, + { + "type": "execute", + "parameters": { + "command": [ + "python", + "-c", + "import pyautogui; import time; pyautogui.hotkey(\"ctrl\", \"s\"); time.sleep(0.5); pyautogui.press(\"enter\");" + ] + } + } + ], "func": "fuzzy_place_math", "result": { "type": "vm_file", diff --git a/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json b/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json index 9628868..95a068e 100644 --- a/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json +++ b/evaluation_examples/examples/multi_apps/ce2b64a2-ddc1-4f91-8c7d-a88be7121aac.json @@ -20,7 +20,7 @@ "parameters": { "files": [ { - "url": "https://drive.usercontent.google.com/download?id=1iquZNI4ktx2915srQ1MMlhcwCuXnmq5T&export=download&authuser=0&confirm=t&uuid=e0bfd86f-a0f1-4a9e-8344-0f645eb379dd&at=APZUnTWQnCd7bNtf8ZN4KfkQUaJ9:1709972645590", + "url": "https://drive.usercontent.google.com/download?id=1XLBhnopqQrTw3bxRWOyfTCslfawFyOaZ&export=download&authuser=0&confirm=t&uuid=f41a09c1-8319-4a50-b8c3-4fc58ba51c00&at=APZUnTV3uiv7wKiSvoCekJG_aVo3:1710051227101", "path": "/home/user/Pictures/picture2.jpg" } @@ -68,9 +68,9 @@ "type": "rule", "rules":{ "expected":{ - "ec076282f61ba74642e94b5a6a1250c6988204d59d9b02936606b6b8ef1e4433": "Kilimanjaro", - "999957c8de835bfa420d198270e7a6b079ee20ff53a3f214491e8134768a7c0b": "Himalayas", - "79f45d40d8413d4e81f1b9734ea39e58622cafd79e12bab32959643fc245147c": "Hua" + "ec076282f61ba74642e94b5a6a1250c6988204d59d9b02936606b6b8ef1e4433": ["Kilimanjaro"], + "6ed4239ecc2be3ec15ad65a78c5c823b9004d640b8cc83a6a7af5930f354de91": ["Himalayas", "Everest", "Sagarmatha"], + "79f45d40d8413d4e81f1b9734ea39e58622cafd79e12bab32959643fc245147c": ["Hua", "hua"] }, "expect_in_result": true } diff --git a/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json b/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json index c751d7b..2ec27c1 100644 --- a/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json +++ b/evaluation_examples/examples/multi_apps/da922383-bfa4-4cd3-bbad-6bebab3d7742.json @@ -1,7 +1,7 @@ { "id": "da922383-bfa4-4cd3-bbad-6bebab3d7742", "snapshot": "multiapps", - "instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs you are opening now in pdf format to /home/user/Documents/Blogs.", + "instruction": "I browsed a lot of interesting blog articles today. I hope to store these articles in my local designated folder just like zotero stores papers. Please download the blogs opening now in pdf format and save them in their tile to /home/user/Documents/Blogs.", "source": "authors", "config": [ {