Merge branch 'zdy'

This commit is contained in:
David Chang
2024-01-27 23:18:33 +08:00
6 changed files with 57 additions and 32 deletions

View File

@@ -76,6 +76,8 @@ class DesktopEnv(gym.Env):
self.cache_dir_base: str = cache_dir
self.vm_screen_size = screen_size
os.makedirs(self.tmp_dir_base, exist_ok=True)
# task-aware stuffs
# todo: handling the logic of snapshot directory
self._set_task_info(task_config)

View File

@@ -92,8 +92,11 @@ def compare_table(result: str, expected: str, **options) -> float:
if result is None:
return 0.
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(result)
try:
xlworkbookr: Workbook = openpyxl.load_workbook(filename=result)
pdworkbookr = pd.ExcelFile(result)
except:
return 0.
worksheetr_names: List[str] = pdworkbookr.sheet_names
xlworkbooke: Workbook = openpyxl.load_workbook(filename=expected)

View File

@@ -52,14 +52,17 @@ def load_sparklines(xlsx_file: str, sheet_name: str) -> Dict[str, str]:
"""
# read xlsx
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet)
try:
with zipfile.ZipFile(xlsx_file, "r") as z_f:
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
sparklines: List[_Element] = _sparklines_selector(sheet)
except zipfile.BadZipFile:
return {}
sparklines_dict: Dict[str, str] = {}
for sp_l in sparklines:
@@ -158,29 +161,32 @@ def load_charts(xlsx_file: Workbook, sheet_name: str, **options) -> Dict[str, An
_shared_str_selector = lxml.cssselect.CSSSelector("oo|sst>oo|si>oo|t", namespaces=_xlsx_ns_mapping)
def read_cell_value(xlsx_file: str, sheet_name: str, coordinate: str) -> Any:
# read_cell_value {{{ #
with zipfile.ZipFile(xlsx_file, "r") as z_f:
try:
with z_f.open("xl/sharedStrings.xml") as f:
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
shared_strs: List[str] = [elm.text for elm in str_elements]
except:
logger.debug("Read shared strings error: %s", xlsx_file)
try:
with zipfile.ZipFile(xlsx_file, "r") as z_f:
try:
with z_f.open("xl/sharedStrings.xml") as f:
shared_str_xml: _Element = lxml.etree.fromstring(f.read())
str_elements: List[_Element] = _shared_str_selector(shared_str_xml)
shared_strs: List[str] = [elm.text for elm in str_elements]
except:
logger.debug("Read shared strings error: %s", xlsx_file)
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/workbook.xml") as f:
workbook_database: _Element = lxml.etree.fromstring(f.read())
sheets: List[_Element] = _sheet_name_selector(workbook_database)
sheet_names: Dict[str, str] = {sh.get("name"): sh.get("sheetId") for sh in sheets}
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
cells: List[_Element] =\
lxml.cssselect.CSSSelector( 'oo|row>oo|c[r="{:}"]'.format(coordinate)
, namespaces=_xlsx_ns_mapping
)(sheet)
if len(cells)==0:
return None
cell: _Element = cells[0]
with z_f.open("xl/worksheets/sheet{:}.xml".format(sheet_names[sheet_name])) as f:
sheet: _Element = lxml.etree.fromstring(f.read())
cells: List[_Element] =\
lxml.cssselect.CSSSelector( 'oo|row>oo|c[r="{:}"]'.format(coordinate)
, namespaces=_xlsx_ns_mapping
)(sheet)
if len(cells)==0:
return None
cell: _Element = cells[0]
except zipfile.BadZipFile:
return None
cell: Dict[str, str] = xmltodict.parse( lxml.etree.tostring(cell, encoding="unicode")
, process_namespaces=True

View File

@@ -44,6 +44,7 @@ logger.addHandler(sdebug_handler)
logger = logging.getLogger("desktopenv.experiment")
PATH_TO_VM = r"C:\Users\tianbaox\Documents\Virtual Machines\Ubuntu\Ubuntu.vmx"
#PATH_TO_VM = "../../../../大文件/镜像/Ubuntu-1218/Ubuntu/Ubuntu.vmx"
def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_trajectory", recording=True):
@@ -142,6 +143,9 @@ def run_one_example(example, agent, max_steps=10, example_trajectory_dir="exp_tr
def main(example_class, example_id):
action_space = "pyautogui"
#example_class = "libreoffice_calc"
#example_id = "7b6c7e24-c58a-49fc-a5bb-d57b80e5b4c3"
#example_id = "01b269ae-2111-4a07-81fd-3fcd711993b0"
gpt4_model = "gpt-4-vision-preview"
gemini_model = "gemini-pro-vision"
@@ -152,6 +156,8 @@ def main(example_class, example_id):
with open(f"evaluation_examples/examples/{example_class}/{example_id}.json", "r", encoding="utf-8") as f:
example = json.load(f)
example["snapshot"] = "exp_chrome"
#example["snapshot"] = "exp_setup4"
#example["snapshot"] = "Snapshot 30"
api_key = os.environ.get("OPENAI_API_KEY")
agent = GPT4v_Agent(api_key=api_key, model=gpt4_model, instruction=example['instruction'],

View File

@@ -22,6 +22,10 @@ from mm_agents.prompts import SYS_PROMPT_IN_SCREENSHOT_OUT_CODE, SYS_PROMPT_IN_S
SYS_PROMPT_IN_SOM_A11Y_OUT_TAG, \
SYS_PROMPT_SEEACT, ACTION_DESCRIPTION_PROMPT_SEEACT, ACTION_GROUNDING_PROMPT_SEEACT
import logging
logger = logging.getLogger("desktopenv.agent")
# Function to encode the image
def encode_image(image_path):
@@ -252,6 +256,7 @@ class GPT4v_Agent:
if self.exp == "both":
_screenshot = previous_obs["screenshot"]
_linearized_accessibility_tree = previous_obs["accessibility_tree"]
logger.debug("LINEAR AT: %s", _linearized_accessibility_tree)
messages.append({
"role": "user",
@@ -456,7 +461,7 @@ class GPT4v_Agent:
"max_tokens": self.max_tokens
})
print(response)
logger.debug("RESPONSE: %s", response)
if self.exp == "seeact":
messages.append({

View File

@@ -35,3 +35,6 @@ playwright
backoff
formulas
pydrive
fastdtw
openai