Add one-click download and setup script for OSWorld virtual machine; Make up show_result.py
This commit is contained in:
71
show_result.py
Normal file
71
show_result.py
Normal file
@@ -0,0 +1,71 @@
|
||||
import os
|
||||
|
||||
|
||||
def get_result(action_space, use_model, observation_type, result_dir):
|
||||
target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
|
||||
if not os.path.exists(target_dir):
|
||||
print("New experiment, no result yet.")
|
||||
return None
|
||||
|
||||
all_result = []
|
||||
domain_result = {}
|
||||
all_result_for_analysis = {}
|
||||
|
||||
for domain in os.listdir(target_dir):
|
||||
domain_path = os.path.join(target_dir, domain)
|
||||
if os.path.isdir(domain_path):
|
||||
for example_id in os.listdir(domain_path):
|
||||
example_path = os.path.join(domain_path, example_id)
|
||||
if os.path.isdir(example_path):
|
||||
if "result.txt" in os.listdir(example_path):
|
||||
# empty all files under example_id
|
||||
if domain not in domain_result:
|
||||
domain_result[domain] = []
|
||||
result = open(os.path.join(example_path, "result.txt"), "r").read()
|
||||
try:
|
||||
domain_result[domain].append(float(result))
|
||||
except:
|
||||
domain_result[domain].append(float(bool(result)))
|
||||
|
||||
if domain not in all_result_for_analysis:
|
||||
all_result_for_analysis[domain] = {}
|
||||
all_result_for_analysis[domain][example_id] = domain_result[domain][-1]
|
||||
|
||||
try:
|
||||
result = open(os.path.join(example_path, "result.txt"), "r").read()
|
||||
try:
|
||||
all_result.append(float(result))
|
||||
except:
|
||||
all_result.append(float(bool(result)))
|
||||
except:
|
||||
all_result.append(0.0)
|
||||
|
||||
for domain in domain_result:
|
||||
print("Domain:", domain, "Runned:", len(domain_result[domain]), "Success Rate:",
|
||||
sum(domain_result[domain]) / len(domain_result[domain]) * 100, "%")
|
||||
|
||||
print(">>>>>>>>>>>>>")
|
||||
print("Office", "Success Rate:", sum(
|
||||
domain_result["libreoffice_calc"] + domain_result["libreoffice_impress"] + domain_result[
|
||||
"libreoffice_writer"]) / len(
|
||||
domain_result["libreoffice_calc"] + domain_result["libreoffice_impress"] + domain_result[
|
||||
"libreoffice_writer"]) * 100, "%")
|
||||
print("Daily", "Success Rate:",
|
||||
sum(domain_result["vlc"] + domain_result["thunderbird"] + domain_result["chrome"]) / len(
|
||||
domain_result["vlc"] + domain_result["thunderbird"] + domain_result["chrome"]) * 100, "%")
|
||||
print("Professional", "Success Rate:", sum(domain_result["gimp"] + domain_result["vs_code"]) / len(
|
||||
domain_result["gimp"] + domain_result["vs_code"]) * 100, "%")
|
||||
|
||||
with open(os.path.join(target_dir, "all_result.json"), "w") as f:
|
||||
f.write(str(all_result_for_analysis))
|
||||
|
||||
if not all_result:
|
||||
print("New experiment, no result yet.")
|
||||
return None
|
||||
else:
|
||||
print("Runned:", len(all_result), "Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
|
||||
return all_result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
get_result("pyautogui", "gpt-4-vision-preview", "screenshot", "./results")
|
||||
Reference in New Issue
Block a user