ver Feb1st
human evaluation and SoM experiments on Thunderbird
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"id": "06fe7178-4491-4589-810f-2e2bc9502122",
|
"id": "06fe7178-4491-4589-810f-2e2bc9502122",
|
||||||
"snapshot": "thunderbird",
|
"snapshot": "thunderbird",
|
||||||
"instruction": "Could you help me back up all the email files in my profile to ~/emails.bak? Please save them separately in eml format.",
|
"instruction": "Could you help me back up all the email files in my inbox to ~/emails.bak? Please save them separately in eml format.",
|
||||||
"source": "https://www.quora.com/How-do-I-backup-email-files-in-Mozilla-Thunderbird",
|
"source": "https://www.quora.com/How-do-I-backup-email-files-in-Mozilla-Thunderbird",
|
||||||
"config": [
|
"config": [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"id": "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
|
"id": "2ad9387a-65d8-4e33-ad5b-7580065a27ca",
|
||||||
"snapshot": "thunderbird",
|
"snapshot": "thunderbird",
|
||||||
"instruction": "Create two local folders for me: COMPANY and UNIVERSITY.",
|
"instruction": "Create two local folders in Thunderbird for me: COMPANY and UNIVERSITY.",
|
||||||
"source": "https://support.mozilla.org/bm/questions/1027435",
|
"source": "https://support.mozilla.org/bm/questions/1027435",
|
||||||
"config": [
|
"config": [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -46,7 +46,7 @@ logger = logging.getLogger("desktopenv.experiment")
|
|||||||
#PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
|
#PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
|
||||||
|
|
||||||
|
|
||||||
PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
|
PATH_TO_VM = "/mnt/data1/david/os-images/Ubuntu-1218/Ubuntu.vmx"
|
||||||
|
|
||||||
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
|
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
|
||||||
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
|
trajectory_recording_path = os.path.join(example_trajectory_dir, "trajectory.json")
|
||||||
@@ -198,6 +198,22 @@ if __name__ == '__main__':
|
|||||||
, "aa3a8974-2e85-438b-b29e-a64df44deb4b"
|
, "aa3a8974-2e85-438b-b29e-a64df44deb4b"
|
||||||
, "a01fbce3-2793-461f-ab86-43680ccbae25"
|
, "a01fbce3-2793-461f-ab86-43680ccbae25"
|
||||||
, "4f07fbe9-70de-4927-a4d5-bb28bc12c52c"
|
, "4f07fbe9-70de-4927-a4d5-bb28bc12c52c"
|
||||||
|
# 42, ^ calc, v thunderbird
|
||||||
|
, "bb5e4c0d-f964-439c-97b6-bdb9747de3f4"
|
||||||
|
, "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
|
||||||
|
, "12086550-11c0-466b-b367-1d9e75b3910e"
|
||||||
|
, "06fe7178-4491-4589-810f-2e2bc9502122"
|
||||||
|
, "6766f2b8-8a72-417f-a9e5-56fcaa735837"
|
||||||
|
, "e1e75309-3ddb-4d09-92ec-de869c928143"
|
||||||
|
, "3d1682a7-0fb0-49ae-a4dc-a73afd2d06d5"
|
||||||
|
, "35253b65-1c19-4304-8aa4-6884b8218fc0"
|
||||||
|
, "d088f539-cab4-4f9a-ac92-9999fc3a656e"
|
||||||
|
, "2ad9387a-65d8-4e33-ad5b-7580065a27ca"
|
||||||
|
, "480bcfea-d68f-4aaa-a0a9-2589ef319381"
|
||||||
|
, "030eeff7-b492-4218-b312-701ec99ee0cc"
|
||||||
|
, "94760984-3ff5-41ee-8347-cf1af709fea0"
|
||||||
|
, "99146c54-4f37-4ab8-9327-5f3291665e1e"
|
||||||
|
, "c9e7eaf2-b1a1-4efc-a982-721972fa9f02"
|
||||||
]
|
]
|
||||||
for example_id in xx_list[18:]:
|
for example_id in xx_list[42:]:
|
||||||
main("libreoffice_calc", example_id)
|
main("thunderbird", example_id)
|
||||||
|
|||||||
2
main.py
2
main.py
@@ -47,7 +47,7 @@ def human_agent():
|
|||||||
Runs the Gym environment with human input.
|
Runs the Gym environment with human input.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with open("evaluation_examples/examples/thunderbird/030eeff7-b492-4218-b312-701ec99ee0cc.json", "r") as f:
|
with open("evaluation_examples/examples/thunderbird/c9e7eaf2-b1a1-4efc-a982-721972fa9f02.json", "r") as f:
|
||||||
example = json.load(f)
|
example = json.load(f)
|
||||||
example["snapshot"] = "Snapshot 30"
|
example["snapshot"] = "Snapshot 30"
|
||||||
|
|
||||||
|
|||||||
@@ -460,14 +460,18 @@ class GPT4v_Agent:
|
|||||||
with open("messages.json", "w") as f:
|
with open("messages.json", "w") as f:
|
||||||
f.write(json.dumps(messages, indent=4))
|
f.write(json.dumps(messages, indent=4))
|
||||||
|
|
||||||
response = self.call_llm({
|
try:
|
||||||
"model": self.model,
|
response = self.call_llm({
|
||||||
"messages": messages,
|
"model": self.model,
|
||||||
"max_tokens": self.max_tokens
|
"messages": messages,
|
||||||
})
|
"max_tokens": self.max_tokens
|
||||||
|
})
|
||||||
|
except:
|
||||||
|
response = ""
|
||||||
|
|
||||||
logger.debug("RESPONSE: %s", response)
|
logger.debug("RESPONSE: %s", response)
|
||||||
|
|
||||||
|
# {{{
|
||||||
if self.exp == "seeact":
|
if self.exp == "seeact":
|
||||||
messages.append({
|
messages.append({
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
@@ -503,7 +507,7 @@ class GPT4v_Agent:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Failed to parse action from response", e)
|
print("Failed to parse action from response", e)
|
||||||
actions = None
|
actions = None
|
||||||
self.thoughts.append("")
|
self.thoughts.append("") # }}}
|
||||||
|
|
||||||
return actions
|
return actions
|
||||||
|
|
||||||
@@ -516,7 +520,8 @@ class GPT4v_Agent:
|
|||||||
response = requests.post(
|
response = requests.post(
|
||||||
"https://api.openai.com/v1/chat/completions",
|
"https://api.openai.com/v1/chat/completions",
|
||||||
headers=self.headers,
|
headers=self.headers,
|
||||||
json=payload
|
json=payload,
|
||||||
|
timeout=20
|
||||||
)
|
)
|
||||||
|
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
|
|||||||
Reference in New Issue
Block a user