diff --git a/desktop_env/controllers/setup.py b/desktop_env/controllers/setup.py
index fa4f4f8..c637ec4 100644
--- a/desktop_env/controllers/setup.py
+++ b/desktop_env/controllers/setup.py
@@ -779,27 +779,27 @@ class SetupController:
cache_path = os.path.join(self.cache_dir, "history_new.sqlite")
db_url = "https://drive.usercontent.google.com/u/0/uc?id=1Lv74QkJYDWVX0RIgg0Co-DUcoYpVL0oX&export=download" # google drive
if not os.path.exists(cache_path):
- max_retries = 3
- downloaded = False
- e = None
- for i in range(max_retries):
- try:
- response = requests.get(db_url, stream=True)
- response.raise_for_status()
+ max_retries = 3
+ downloaded = False
+ e = None
+ for i in range(max_retries):
+ try:
+ response = requests.get(db_url, stream=True)
+ response.raise_for_status()
- with open(cache_path, 'wb') as f:
- for chunk in response.iter_content(chunk_size=8192):
- if chunk:
- f.write(chunk)
- logger.info("File downloaded successfully")
- downloaded = True
- break
+ with open(cache_path, 'wb') as f:
+ for chunk in response.iter_content(chunk_size=8192):
+ if chunk:
+ f.write(chunk)
+ logger.info("File downloaded successfully")
+ downloaded = True
+ break
- except requests.RequestException as e:
- logger.error(
- f"Failed to download {db_url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
- if not downloaded:
- raise requests.RequestException(f"Failed to download {db_url}. No retries left. Error: {e}")
+ except requests.RequestException as e:
+ logger.error(
+ f"Failed to download {db_url} caused by {e}. Retrying... ({max_retries - i - 1} attempts left)")
+ if not downloaded:
+ raise requests.RequestException(f"Failed to download {db_url}. No retries left. Error: {e}")
else:
logger.info("File already exists in cache directory")
# copy a new history file in the tmp folder
diff --git a/desktop_env/desktop_env.py b/desktop_env/desktop_env.py
index 52ab1c0..9f621fc 100644
--- a/desktop_env/desktop_env.py
+++ b/desktop_env/desktop_env.py
@@ -186,7 +186,7 @@ class DesktopEnv(gym.Env):
# mode: human or machine
self.instruction = None
- assert action_space in ["computer_13", "pyautogui", "claude_computer_use"]
+ assert action_space in ["computer_13", "pyautogui", "claude_computer_use", "autoglm_computer_use"]
self.action_space = action_space # todo: refactor it to the ActType
# episodic stuffs, like counters, will be updated or reset
@@ -430,7 +430,7 @@ class DesktopEnv(gym.Env):
"""
postconfig = self.evaluator.get("postconfig", [])
- self.setup_controller.setup(postconfig)
+ self.setup_controller.setup(postconfig, self.enable_proxy)
# Mark environment as used if there were postconfig setup operations
if postconfig:
self.is_environment_used = True
diff --git a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
index d6b95fd..66ee4ac 100644
--- a/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
+++ b/evaluation_examples/examples/multi_apps/5990457f-2adb-467b-a4af-5c857c92d762.json
@@ -9,7 +9,6 @@
"parameters": {
"command": [
"google-chrome",
- "--proxy-server=http://127.0.0.1:18888",
"--remote-debugging-port=1337"
]
}
diff --git a/lib_run_single.py b/lib_run_single.py
index 2c21ad0..0d3fefb 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -208,4 +208,62 @@ def run_single_example_opencua(agent, env, example, max_steps, instruction, args
scores.append(result)
with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
f.write(f"{result}\n")
- env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
\ No newline at end of file
+ env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+
+def run_single_example_autoglm(agent, env, example, max_steps, instruction, args, example_result_dir, scores):
+ runtime_logger = setup_logger(example, example_result_dir)
+ try:
+ agent.reset(runtime_logger)
+ except Exception as e:
+ agent.reset()
+
+ env.reset(task_config=example)
+
+ time.sleep(60) # Wait for the environment to be ready
+ obs = env._get_obs() # Get the initial observation
+ done = False
+ step_idx = 0
+ env.controller.start_recording()
+ while not done and step_idx < max_steps:
+ response, actions = agent.predict(
+ instruction,
+ obs
+ )
+ for action in actions:
+ # Capture the timestamp before executing the action
+ action_timestamp = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+ logger.info("Step %d: %s", step_idx + 1, action)
+ obs, reward, done, info = env.step(action, args.sleep_after_execution)
+
+ logger.info("Reward: %.2f", reward)
+ logger.info("Done: %s", done)
+ # Save screenshot and trajectory information
+ with open(os.path.join(example_result_dir, f"step_{step_idx + 1}_{action_timestamp}.png"),
+ "wb") as _f:
+ _f.write(obs['screenshot'])
+ with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+ f.write(json.dumps({
+ "step_num": step_idx + 1,
+ "action_timestamp": action_timestamp,
+ "action": action,
+ "response": response,
+ "reward": reward,
+ "done": done,
+ "info": info,
+ "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
+ }))
+ f.write("\n")
+ if done:
+ logger.info("The episode is done.")
+ break
+
+ if not done: # not completed the task yet
+ env.action_history.append('FAIL')
+
+ step_idx += 1
+ result = env.evaluate()
+ logger.info("Result: %.2f", result)
+ scores.append(result)
+ with open(os.path.join(example_result_dir, "result.txt"), "w", encoding="utf-8") as f:
+ f.write(f"{result}\n")
+ env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
diff --git a/mm_agents/autoglm/__init__.py b/mm_agents/autoglm/__init__.py
new file mode 100644
index 0000000..68226a1
--- /dev/null
+++ b/mm_agents/autoglm/__init__.py
@@ -0,0 +1,7 @@
+"""
+AutoGLM agent implementation
+"""
+
+from .main import AutoGLMAgent
+
+__all__ = ["AutoGLMAgent"]
diff --git a/mm_agents/autoglm/main.py b/mm_agents/autoglm/main.py
new file mode 100644
index 0000000..1c296f5
--- /dev/null
+++ b/mm_agents/autoglm/main.py
@@ -0,0 +1,241 @@
+import logging
+import re
+from base64 import b64encode
+from typing import Dict, List
+
+from .prompt.accessibility_tree_handle import linearize_accessibility_tree, trim_accessibility_tree
+from .prompt.grounding_agent import GroundingAgent as Agent
+from .tools.package.google_chrome import BrowserTools
+from .prompt.procedural_memory import Prompt
+
+logger = logging.getLogger("desktopenv.agent")
+
+pure_text_settings = ["a11y_tree"]
+
+
+def parse_code_from_string(input_string):
+ # input_string = "\n".join([line.strip() for line in input_string.split(';') if line.strip()])
+ if input_string.strip() in ["WAIT", "DONE", "FAIL"]:
+ return [input_string.strip()]
+
+ # This regular expression will match both ```code``` and ```python code```
+ # and capture the `code` part. It uses a non-greedy match for the content inside.
+ pattern = r"```(?:\w+\s+)?(.*?)```"
+ # Find all non-overlapping matches in the string
+ matches = re.findall(pattern, input_string, re.DOTALL)
+
+ # The regex above captures the content inside the triple backticks.
+ # The `re.DOTALL` flag allows the dot `.` to match newline characters as well,
+ # so the code inside backticks can span multiple lines.
+
+ # matches now contains all the captured code snippets
+
+ codes = []
+
+ for match in matches:
+ match = match.strip()
+ commands = ["WAIT", "DONE", "FAIL"] # fixme: updates this part when we have more commands
+
+ if match in commands:
+ codes.append(match.strip())
+ elif match.split("\n")[-1] in commands:
+ if len(match.split("\n")) > 1:
+ codes.append("\n".join(match.split("\n")[:-1]))
+ codes.append(match.split("\n")[-1])
+ else:
+ codes.append(match)
+
+ return codes
+
+
+class AutoGLMAgent:
+ def __init__(
+ self,
+ action_space="autoglm_computer_use",
+ observation_type="a11y_tree",
+ max_trajectory_length=3,
+ a11y_tree_max_items=300,
+ with_image: bool = False,
+ client_password="password",
+ gen_func=None,
+ tool_in_sys_msg: bool = True,
+ ):
+ self.action_space = action_space
+ self.observation_type = observation_type
+ assert action_space in ["autoglm_computer_use"], "Invalid action space"
+ assert observation_type in ["a11y_tree"], "Invalid observation type"
+ self.max_trajectory_length = max_trajectory_length
+ self.a11y_tree_max_items = a11y_tree_max_items
+ self.with_image = with_image
+ self.client_password = client_password
+ self.gen_func = gen_func
+ self.tool_in_sys_msg = tool_in_sys_msg
+
+ self.tool_list = {
+ "libreoffice_calc": "CalcTools",
+ "libreoffice_impress": "ImpressTools",
+ "libreoffice_writer": "WriterTools",
+ "code": "CodeTools",
+ "vlc": "VLCTools",
+ "google_chrome": "BrowserTools",
+ }
+ self.contents = []
+
+ @property
+ def turn_number(self):
+ return len(self.contents)
+
+ def prepare(self, instruction: str, obs: Dict, history: List, last_result: str = "") -> List:
+ """
+ Predict the next action(s) based on the current observation.
+ """
+ if "exe_result" in obs and not last_result:
+ last_result = obs["exe_result"]
+ if self.contents:
+ self.contents[-1]["exe_result"] = last_result
+
+ cur_app = obs["cur_app"]
+ logger.info(f"current app is {cur_app}")
+
+ if cur_app:
+ tool_name = cur_app.strip().lower().replace("-", "_")
+ tool_name = tool_name if tool_name in self.tool_list.keys() else None
+ else:
+ tool_name = None
+
+ setup_prompt, func_def_prompt, note_prompt = Prompt.construct_procedural_memory(
+ Agent, app_name=tool_name, client_password=self.client_password
+ )
+ if self.tool_in_sys_msg:
+ system_message = setup_prompt + "\n\n" + func_def_prompt + "\n\n" + note_prompt
+ else:
+ system_message = setup_prompt + "\n\n" + note_prompt
+ system_message += "\n\n**IMPORTANT** You are asked to complete the following task: {}".format(instruction)
+
+ messages = [
+ {
+ "role": "system",
+ "content": system_message,
+ }
+ ]
+ messages.extend(history)
+
+ if obs["apps"]:
+ app_str = "Window ID App Name Title\n"
+ for window_id, app in obs["apps"].items():
+ app_str += f"{window_id} {app['app_name']} {app['title']}\n"
+ else:
+ app_str = "None"
+
+ last_result = last_result.strip() if last_result else "None"
+ last_result = last_result[:2000] + "..." if len(last_result) > 2000 else last_result
+
+ tree = linearize_accessibility_tree(obs["accessibility_tree"], "Ubuntu")
+ tree = trim_accessibility_tree(tree, 300)
+
+ app_info = obs["app_info"].strip() if obs["app_info"] else "None"
+ app_info = app_info[:5000] + "..." if len(app_info) > 5000 else app_info
+
+ prompt = "* Apps: {}\n\n* Current App: {}\n\n* A11y Tree: {}\n\n* App Info: {}\n\n* Previous Action Result: {}".format(
+ app_str.strip(),
+ obs["cur_window_id"].strip() if obs["cur_window_id"] in app_str else "None",
+ tree.strip(),
+ app_info,
+ last_result if last_result else "None",
+ ) + (
+ "\n\n" + func_def_prompt if not self.tool_in_sys_msg else ""
+ )
+
+ content = [{"type": "text", "text": prompt}]
+ if self.with_image and obs.get('screenshot'):
+ content.append(
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{b64encode(obs['screenshot']).decode('utf-8')}",
+ "detail": "high",
+ },
+ }
+ )
+
+ messages.append({"role": "user", "content": content})
+
+ return messages
+
+ def execute(self, response, obs):
+ try:
+ actions = parse_code_from_string(response)
+ action = actions[0]
+ logger.info(f"The pesudo action is {action}")
+
+ if "Agent." in action:
+ actions = [
+ eval(action),
+ ]
+ elif "BrowserTools." in action: # TODO: special check for BrowserTools
+ actions = [
+ eval(action),
+ ]
+ else:
+ actions = Agent.tool_commands(action, obs["cur_app"].strip().replace("-", "_").lower())
+ logger.info(f"The grounded action is {actions[0]}")
+ except Exception as e:
+ print("Failed to parse action from response", e)
+ actions = []
+
+ return actions
+
+ def format_history(self, max_turns=30):
+ history = []
+ for ix in range(self.turn_number):
+ if ix == 0:
+ env_input = "**Environment State (Omitted)**"
+ else:
+ env_input = (
+ f"**Environment State (Omitted)**\nPrevious Action Result: {self.contents[ix - 1]['exe_result']}"
+ )
+
+ env_input = env_input[:2000] + "..." if len(env_input) > 2000 else env_input
+ response = (
+ self.contents[ix]["response"][:1500] + "..."
+ if len(self.contents[ix]["response"]) > 1500
+ else self.contents[ix]["response"]
+ )
+ history.append({"role": "user", "content": [{"type": "text", "text": env_input}]})
+ history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+
+ return history[-max_turns * 2:]
+
+ def predict(self, instruction: str, obs: Dict) -> List:
+ history = self.format_history()
+ messages = self.prepare(instruction, obs, history)
+
+ assert self.gen_func is not None, "gen_func is not set"
+ try:
+ response = self.gen_func(messages)
+ except Exception as e:
+ logger.error("Failed to call gen_func, Error: " + str(e))
+ response = ""
+
+ logger.info("RESPONSE: %s", response)
+
+ actions = self.execute(response, obs)
+
+ # update the contents
+ self.contents.append(
+ {
+ "instruction": instruction,
+ "index": len(self.contents),
+ "response": response,
+ "action": "Parse error" if not actions else actions[0],
+ "exe_result": "Invalid action" if not actions else "",
+ **obs,
+ }
+ )
+ return response, actions
+
+ def reset(self, _logger=None):
+ global logger
+ logger = _logger if _logger is not None else logging.getLogger("desktopenv.aguvis_agent")
+
+ self.contents = []
diff --git a/mm_agents/autoglm/prompt/accessibility_tree_handle.py b/mm_agents/autoglm/prompt/accessibility_tree_handle.py
new file mode 100644
index 0000000..a9a392d
--- /dev/null
+++ b/mm_agents/autoglm/prompt/accessibility_tree_handle.py
@@ -0,0 +1,329 @@
+import io
+import re
+import xml.etree.ElementTree as ET
+from typing import List, Tuple
+
+from PIL import Image, ImageDraw, ImageFont
+
+from .deduplicate_node import filter_similar_nodes
+
+attributes_ns_ubuntu = "https://accessibility.windows.example.org/ns/attributes"
+attributes_ns_windows = "https://accessibility.windows.example.org/ns/attributes"
+state_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/state"
+state_ns_windows = "https://accessibility.windows.example.org/ns/state"
+component_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/component"
+component_ns_windows = "https://accessibility.windows.example.org/ns/component"
+value_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/value"
+value_ns_windows = "https://accessibility.windows.example.org/ns/value"
+class_ns_windows = "https://accessibility.windows.example.org/ns/class"
+
+
+def find_leaf_nodes(xlm_file_str):
+ if not xlm_file_str:
+ return []
+
+ root = ET.fromstring(xlm_file_str)
+
+ # Recursive function to traverse the XML tree and collect leaf nodes
+ def collect_leaf_nodes(node, leaf_nodes):
+ # If the node has no children, it is a leaf node, add it to the list
+ if not list(node):
+ leaf_nodes.append(node)
+ # If the node has children, recurse on each child
+ for child in node:
+ collect_leaf_nodes(child, leaf_nodes)
+
+ # List to hold all leaf nodes
+ leaf_nodes = []
+ collect_leaf_nodes(root, leaf_nodes)
+ return leaf_nodes
+
+
+def judge_node(node: ET, platform="Ubuntu", check_image=False) -> bool:
+ if platform == "Ubuntu":
+ _state_ns = state_ns_ubuntu
+ _component_ns = component_ns_ubuntu
+ elif platform == "Windows":
+ _state_ns = state_ns_windows
+ _component_ns = component_ns_windows
+ else:
+ raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+ keeps: bool = (
+ node.tag.startswith("document")
+ or node.tag.endswith("item")
+ or node.tag.endswith("button")
+ or node.tag.endswith("heading")
+ or node.tag.endswith("label")
+ or node.tag.endswith("scrollbar")
+ or node.tag.endswith("searchbox")
+ or node.tag.endswith("textbox")
+ or node.tag.endswith("link")
+ or node.tag.endswith("tabelement")
+ or node.tag.endswith("textfield")
+ or node.tag.endswith("textarea")
+ or node.tag.endswith("menu")
+ or node.tag
+ in {
+ "alert",
+ "canvas",
+ "check-box",
+ "combo-box",
+ "entry",
+ "icon",
+ "image",
+ "paragraph",
+ "scroll-bar",
+ "section",
+ "slider",
+ "static",
+ "table-cell",
+ "terminal",
+ "text",
+ "netuiribbontab",
+ "start",
+ "trayclockwclass",
+ "traydummysearchcontrol",
+ "uiimage",
+ "uiproperty",
+ "uiribboncommandbar",
+ }
+ )
+ keeps = (
+ keeps
+ and (
+ platform == "Ubuntu"
+ and node.get("{{{:}}}showing".format(_state_ns), "false") == "true"
+ and node.get("{{{:}}}visible".format(_state_ns), "false") == "true"
+ or platform == "Windows"
+ and node.get("{{{:}}}visible".format(_state_ns), "false") == "true"
+ )
+ and (
+ node.get("name", "") != ""
+ or node.text is not None
+ and len(node.text) > 0
+ or check_image
+ and node.get("image", "false") == "true"
+ )
+ )
+ # and (
+ # node.get("{{{:}}}enabled".format(_state_ns), "false") == "true"
+ # or node.get("{{{:}}}editable".format(_state_ns), "false") == "true"
+ # or node.get("{{{:}}}expandable".format(_state_ns), "false") == "true"
+ # or node.get("{{{:}}}checkable".format(_state_ns), "false") == "true"
+ # ) \
+
+ coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(_component_ns), "(-1, -1)"))
+ sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(_component_ns), "(-1, -1)"))
+ keeps = keeps and coordinates[0] >= 0 and coordinates[1] >= 0 and sizes[0] > 0 and sizes[1] > 0
+ return keeps
+
+
+def filter_nodes(root: ET, platform="Ubuntu", check_image=False):
+ filtered_nodes = []
+
+ for node in root.iter():
+ if judge_node(node, platform, check_image):
+ filtered_nodes.append(node)
+
+ return filtered_nodes
+
+
+def draw_bounding_boxes(nodes, image_file_content, down_sampling_ratio=1.0, platform="Ubuntu"):
+
+ if platform == "Ubuntu":
+ _state_ns = state_ns_ubuntu
+ _component_ns = component_ns_ubuntu
+ _value_ns = value_ns_ubuntu
+ elif platform == "Windows":
+ _state_ns = state_ns_windows
+ _component_ns = component_ns_windows
+ _value_ns = value_ns_windows
+ else:
+ raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+ # Load the screenshot image
+ image_stream = io.BytesIO(image_file_content)
+ image = Image.open(image_stream)
+ if float(down_sampling_ratio) != 1.0:
+ image = image.resize((int(image.size[0] * down_sampling_ratio), int(image.size[1] * down_sampling_ratio)))
+ draw = ImageDraw.Draw(image)
+ marks = []
+ drew_nodes = []
+ text_informations: List[str] = ["index\ttag\tname\ttext"]
+
+ try:
+ # Adjust the path to the font file you have or use a default one
+ font = ImageFont.truetype("arial.ttf", 15)
+ except IOError:
+ # Fallback to a basic font if the specified font can't be loaded
+ font = ImageFont.load_default()
+
+ index = 1
+
+ # Loop over all the visible nodes and draw their bounding boxes
+ for _node in nodes:
+ coords_str = _node.attrib.get("{{{:}}}screencoord".format(_component_ns))
+ size_str = _node.attrib.get("{{{:}}}size".format(_component_ns))
+
+ if coords_str and size_str:
+ try:
+ # Parse the coordinates and size from the strings
+ coords = tuple(map(int, coords_str.strip("()").split(", ")))
+ size = tuple(map(int, size_str.strip("()").split(", ")))
+
+ import copy
+
+ original_coords = copy.deepcopy(coords)
+ original_size = copy.deepcopy(size)
+
+ if float(down_sampling_ratio) != 1.0:
+ # Downsample the coordinates and size
+ coords = tuple(int(coord * down_sampling_ratio) for coord in coords)
+ size = tuple(int(s * down_sampling_ratio) for s in size)
+
+ # Check for negative sizes
+ if size[0] <= 0 or size[1] <= 0:
+ raise ValueError(f"Size must be positive, got: {size}")
+
+ # Calculate the bottom-right corner of the bounding box
+ bottom_right = (coords[0] + size[0], coords[1] + size[1])
+
+ # Check that bottom_right > coords (x1 >= x0, y1 >= y0)
+ if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]:
+ raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}")
+
+ # Check if the area only contains one color
+ cropped_image = image.crop((*coords, *bottom_right))
+ if len(set(list(cropped_image.getdata()))) == 1:
+ continue
+
+ # Draw rectangle on image
+ draw.rectangle([coords, bottom_right], outline="red", width=1)
+
+ # Draw index number at the bottom left of the bounding box with black background
+ text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
+ text_bbox: Tuple[int, int, int, int] = draw.textbbox(text_position, str(index), font=font, anchor="lb")
+ # offset: int = bottom_right[1]-text_bbox[3]
+ # text_bbox = (text_bbox[0], text_bbox[1]+offset, text_bbox[2], text_bbox[3]+offset)
+
+ # draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
+ draw.rectangle(text_bbox, fill="black")
+ draw.text(text_position, str(index), font=font, anchor="lb", fill="white")
+
+ # each mark is an x, y, w, h tuple
+ marks.append([original_coords[0], original_coords[1], original_size[0], original_size[1]])
+ drew_nodes.append(_node)
+
+ if _node.text:
+ node_text = _node.text if '"' not in _node.text else '"{:}"'.format(_node.text.replace('"', '""'))
+ elif _node.get("{{{:}}}class".format(class_ns_windows), "").endswith("EditWrapper") and _node.get(
+ "{{{:}}}value".format(_value_ns)
+ ):
+ node_text = _node.get("{{{:}}}value".format(_value_ns), "")
+ node_text = node_text if '"' not in node_text else '"{:}"'.format(node_text.replace('"', '""'))
+ else:
+ node_text = '""'
+ text_information: str = "{:d}\t{:}\t{:}\t{:}".format(index, _node.tag, _node.get("name", ""), node_text)
+ text_informations.append(text_information)
+
+ index += 1
+
+ except ValueError:
+ pass
+
+ output_image_stream = io.BytesIO()
+ image.save(output_image_stream, format="PNG")
+ image_content = output_image_stream.getvalue()
+
+ return marks, drew_nodes, "\n".join(text_informations), image_content
+
+
+def print_nodes_with_indent(nodes, indent=0):
+ for node in nodes:
+ print(" " * indent, node.tag, node.attrib)
+ print_nodes_with_indent(node, indent + 2)
+
+
+def find_active_applications(tree, state_ns):
+ apps_with_active_tag = []
+ for application in list(tree.getroot()):
+ app_name = application.attrib.get("name")
+ for frame in application:
+ is_active = frame.attrib.get("{{{:}}}active".format(state_ns), "false")
+ if is_active == "true":
+ apps_with_active_tag.append(app_name)
+ if apps_with_active_tag:
+ to_keep = apps_with_active_tag + ["gnome-shell"]
+ else:
+ to_keep = ["gjs", "gnome-shell"]
+ return to_keep
+
+
+def linearize_accessibility_tree(accessibility_tree, platform="Ubuntu"):
+ if platform == "Ubuntu":
+ _attributes_ns = attributes_ns_ubuntu
+ _state_ns = state_ns_ubuntu
+ _component_ns = component_ns_ubuntu
+ _value_ns = value_ns_ubuntu
+ elif platform == "Windows":
+ _attributes_ns = attributes_ns_windows
+ _state_ns = state_ns_windows
+ _component_ns = component_ns_windows
+ _value_ns = value_ns_windows
+ else:
+ raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+ try:
+ tree = ET.ElementTree(ET.fromstring(accessibility_tree))
+ keep_apps = find_active_applications(tree, _state_ns)
+
+ # Remove inactive applications
+ for application in list(tree.getroot()):
+ if application.get("name") not in keep_apps:
+ tree.getroot().remove(application)
+
+ filtered_nodes = filter_nodes(tree.getroot(), platform, check_image=True)
+ linearized_accessibility_tree = ["tag\ttext\tposition (center x & y)\tsize (w & h)"]
+
+ # Linearize the accessibility tree nodes into a table format
+ for node in filtered_nodes:
+ try:
+ text = node.text if node.text is not None else ""
+ text = text.strip()
+ name = node.get("name", "").strip()
+ if text == "":
+ text = name
+ elif name != "" and text != name:
+ text = f"{name} ({text})"
+
+ text = text.replace("\n", "\\n")
+ pos = node.get("{{{:}}}screencoord".format(_component_ns), "")
+ size = node.get("{{{:}}}size".format(_component_ns), "")
+
+ x, y = re.match(f"\((\d+), (\d+)\)", pos).groups()
+ w, h = re.match(f"\((\d+), (\d+)\)", size).groups()
+ x_mid, y_mid = int(x) + int(w) // 2, int(y) + int(h) // 2
+
+ linearized_accessibility_tree.append(
+ "{:}\t{:}\t{:}\t{:}".format(node.tag, text, f"({x_mid}, {y_mid})", size)
+ )
+ except Exception as e:
+ continue
+
+ # Filter out similar nodes
+ linearized_accessibility_tree = filter_similar_nodes("\n".join(linearized_accessibility_tree))
+ except Exception as e:
+ print(f"Error in linearize_accessibility_tree: {e}")
+ linearized_accessibility_tree = ""
+
+ return linearized_accessibility_tree
+
+
+def trim_accessibility_tree(linearized_accessibility_tree, max_items):
+ lines = linearized_accessibility_tree.strip().split("\n")
+ if len(lines) > max_items:
+ lines = lines[:max_items]
+ linearized_accessibility_tree = "\n".join(lines)
+ linearized_accessibility_tree += "\n..."
+ return linearized_accessibility_tree
diff --git a/mm_agents/autoglm/prompt/deduplicate_node.py b/mm_agents/autoglm/prompt/deduplicate_node.py
new file mode 100644
index 0000000..824a2e1
--- /dev/null
+++ b/mm_agents/autoglm/prompt/deduplicate_node.py
@@ -0,0 +1,100 @@
+import re
+
+
+def parse_line(line):
+ # 解析格式,如:label Google Chrome (191, 13) (104, 17)
+ pattern = r"^(\S+)\s+(.+?)\s+\((\d+), (\d+)\)\s+\((\d+), (\d+)\)"
+ m = re.match(pattern, line)
+ if not m:
+ return None
+ node_type, text, cx, cy, w, h = m.groups()
+ cx, cy, w, h = map(int, (cx, cy, w, h))
+ # bounding box as (x1, y1, x2, y2)
+ x1 = cx - w // 2
+ y1 = cy - h // 2
+ x2 = x1 + w
+ y2 = y1 + h
+ return {
+ "type": node_type,
+ "text": text.strip(),
+ "bbox": (x1, y1, x2, y2),
+ "center": (cx, cy),
+ "size": (w, h),
+ "raw": line,
+ }
+
+
+def iou(box1, box2):
+ # box: (x1, y1, x2, y2)
+ xi1 = max(box1[0], box2[0])
+ yi1 = max(box1[1], box2[1])
+ xi2 = min(box1[2], box2[2])
+ yi2 = min(box1[3], box2[3])
+ inter_width = max(0, xi2 - xi1)
+ inter_height = max(0, yi2 - yi1)
+ inter_area = inter_width * inter_height
+ area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+ union = area1 + area2 - inter_area
+ if union == 0:
+ return 0
+ return inter_area / union
+
+
+def norm_text(s):
+ # 归一化文本:小写、去空格等
+ return re.sub(r"\s+", "", s.lower())
+
+
+def text_similarity(a, b):
+ # 简单判定:完全一致为1,否则0
+ na, nb = norm_text(a), norm_text(b)
+ if na == nb:
+ return 1.0
+ else:
+ return 0
+
+
+def filter_similar_nodes(linearized_accessibility_tree):
+ lines = [ln for ln in linearized_accessibility_tree.split("\n") if ln.strip()]
+ # parse all nodes
+ nodes = []
+ for ln in lines:
+ node = parse_line(ln)
+ if node:
+ nodes.append(node)
+ else:
+ # 解析不了的保留
+ nodes.append({"raw": ln, "invalid": True})
+ filtered = []
+ removed = [False] * len(nodes)
+ # 阈值可自行调整
+ IOU_THRESH = 0.2
+ TEXT_THRESH = 0.9
+ for i, ni in enumerate(nodes):
+ if ni.get("invalid"):
+ filtered.append(ni["raw"])
+ continue
+ if removed[i]:
+ continue
+ for j in range(i + 1, len(nodes)):
+ nj = nodes[j]
+ if nj.get("invalid"):
+ continue
+ iou_val = iou(ni["bbox"], nj["bbox"])
+ text_sim = text_similarity(ni["text"], nj["text"])
+ if iou_val > IOU_THRESH and text_sim > TEXT_THRESH:
+ # 二者极其相似,移除后者
+ removed[j] = True
+ # print(f"移除: {nj['raw']} (与 {ni['raw']} 相似度高)")
+ # 保留未被标记为移除的
+ if not removed[i]:
+ filtered.append(ni["raw"])
+ return "\n".join(filtered)
+
+
+# 示例用法
+if __name__ == "__main__":
+ linearized_accessibility_tree = "tag\ttext\tposition (center x & y)\tsize (w & h)\nicon\t\t(1853, 1001)\t(64, 64)\nlabel\tHome\t(1853, 1045)\t(40, 17)\nlabel\tActivities\t(49, 13)\t(63, 17)\ntext\tActivities\t(49, 13)\t(63, 17)\nlabel\tApr 17 17∶04\t(995, 13)\t(117, 27)\ntext\tApr 17 17∶04\t(995, 13)\t(87, 18)\nmenu\tSystem\t(1867, 13)\t(106, 27)\npush-button\tGoogle Chrome\t(35, 65)\t(70, 64)\npush-button\tThunderbird Mail\t(35, 133)\t(70, 64)\npush-button\tVisual Studio Code\t(35, 201)\t(70, 64)\npush-button\tVLC media player\t(35, 269)\t(70, 64)\npush-button\tLibreOffice Writer\t(35, 337)\t(70, 64)\npush-button\tLibreOffice Calc\t(35, 405)\t(70, 64)\npush-button\tLibreOffice Impress\t(35, 473)\t(70, 64)\npush-button\tGNU Image Manipulation Program\t(35, 541)\t(70, 64)\npush-button\tFiles\t(35, 609)\t(70, 64)\npush-button\tUbuntu Software\t(35, 677)\t(70, 64)\npush-button\tHelp\t(35, 745)\t(70, 64)\npush-button\tTrash\t(35, 816)\t(70, 64)\ntoggle-button\tShow Applications\t(35, 1045)\t(70, 70)"
+ result = filter_similar_nodes(linearized_accessibility_tree)
+ print(result)
diff --git a/mm_agents/autoglm/prompt/grounding_agent.py b/mm_agents/autoglm/prompt/grounding_agent.py
new file mode 100644
index 0000000..cd63d5b
--- /dev/null
+++ b/mm_agents/autoglm/prompt/grounding_agent.py
@@ -0,0 +1,259 @@
+import base64
+import json
+import logging
+import os
+import xml.etree.ElementTree as ET
+from typing import Dict, List, Optional, Tuple
+
+logger = logging.getLogger("desktopenv.agent")
+
+
+def agent_action(func):
+ func.is_agent_action = True
+ return func
+
+
+switch_window_code = """import subprocess;
+import pyautogui;
+pyautogui.press('escape');
+time.sleep(0.5);
+subprocess.run(['wmctrl', '-ia', 'WINDOW_ID'])
+subprocess.run(['wmctrl', '-ir', 'WINDOW_ID', '-b', 'add,maximized_vert,maximized_horz'])
+print('Switch to WINDOW_ID')"""
+
+launch_app_commands = {
+ # Web Browser
+ "chrome": "google-chrome --remote-debugging-port=1337",
+ # File Manager
+ "files": "nautilus",
+ # Terminal
+ "terminal": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-terminal',
+ # Utilities
+ "gedit": "gedit",
+ # Office
+ "libreoffice writer": "libreoffice --writer",
+ "libreoffice calc": "libreoffice --calc",
+ "libreoffice impress": "libreoffice --impress",
+ # System
+ "settings": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-control-center',
+ # Multimedia
+ "vlc": "vlc",
+ "gimp": "gimp",
+ # IDE
+ "vs code": "code",
+ # Email
+ "thunderbird": "thunderbird",
+}
+
+
+class GroundingAgent:
+
+ tool_list = {
+ "libreoffice_calc": "CalcTools",
+ "libreoffice_impress": "ImpressTools",
+ "libreoffice_writer": "WriterTools",
+ "code": "CodeTools",
+ "vlc": "VLCTools",
+ "google_chrome": "BrowserTools",
+ }
+
+ @classmethod
+ def tool_commands(cls, code: str, tool_name: str):
+ command = f"from {tool_name} import *; "
+ command += code
+
+ tool_class = cls.tool_list[tool_name]
+ command += f"; {tool_class}.print_result()"
+
+ return [
+ command,
+ ]
+
+ @classmethod
+ @agent_action
+ def click(
+ cls,
+ coordinates: List,
+ num_clicks: int = 1,
+ button_type: str = "left",
+ ):
+ """
+ Click on the element.
+
+ Args:
+ coordinates (List): [x, y], Coordinates of the element to click on
+ num_clicks (int): number of times to click the element
+ button_type (str): which mouse button to press can be "left", "middle", or "right"
+ """
+ command = ""
+ x, y = coordinates
+ command += f"""pyautogui.click({x}, {y}, clicks={num_clicks}, button={repr(button_type)}); print("Click Success")""" # TODO: 最大化窗口需要一次调用
+ return command
+
+ @classmethod
+ @agent_action
+ def type(
+ cls,
+ coordinates: Optional[List] = None,
+ text: str = "",
+ overwrite: bool = False,
+ enter: bool = False,
+ ):
+ """
+ Type text into the element.
+
+ Args:
+ coordinates (List): [x, y] Coordinates of the element to type into. If not provided, typing will start at the current cursor location.
+ text (str): the text to type
+ overwrite (bool): Assign it to True if the text should overwrite the existing text, otherwise assign it to False. Using this argument clears all text in an element.
+ enter (bool): Assign it to True if the enter key should be pressed after typing the text, otherwise assign it to False.
+ """
+
+ command = ""
+
+ if coordinates is not None:
+ # Start typing at the center of the element
+ x, y = coordinates
+ command += f"pyautogui.click({x}, {y}); "
+
+ if overwrite:
+ command += f"pyautogui.hotkey('ctrl', 'a'); pyautogui.press('backspace'); "
+
+ command += f"pyautogui.write({repr(text)}); "
+
+ if enter:
+ command += "pyautogui.press('enter'); "
+
+ command += "print('Type Success')"
+
+ return command
+
+ @classmethod
+ @agent_action
+ def drag_and_drop(cls, drag_from_coordinates: List, drop_on_coordinates: List):
+ """
+ Drag element1 and drop it on element2.
+
+ Args:
+ drag_from_coordinates (List): [x, y] Coordinates of element to drag
+ drop_on_coordinates (List): [x, y] Coordinates of element to drop on
+ """
+ x1, y1 = drag_from_coordinates
+ x2, y2 = drop_on_coordinates
+
+ command = f"pyautogui.moveTo({x1}, {y1}); "
+ # TODO: specified duration?
+ command += f"pyautogui.dragTo({x2}, {y2}, duration=1.); pyautogui.mouseUp(); "
+
+ command += "print('Drag and Drop Success')"
+
+ return command
+
+ @classmethod
+ @agent_action
+ def scroll(cls, coordinates: List, direction: str):
+ """
+ Scroll the element in the specified direction.
+
+ Args:
+ coordinates (List): [x, y] Coordinates of the element to scroll in
+ direction (str): the direction to scroll can be "up" or "down".
+ """
+ x, y = coordinates
+ amount = 100 if direction == "up" else -100
+ return f"import pyautogui; pyautogui.moveTo({x}, {y}); pyautogui.scroll({amount}); print('Scroll Success')"
+
+ @classmethod
+ @agent_action
+ def open_app(cls, app_name: str):
+ """
+ Open a specified application.
+
+ App List:
+ - chrome
+ - files
+ - terminal
+ - gedit
+ - libreoffice writer
+ - libreoffice calc
+ - libreoffice impress
+ - vs code
+ - vlc
+ - gimp
+ - settings
+ - thunderbird
+
+ Args:
+ app_name (str): Name of the application to open
+ """
+
+ app_name = app_name.lower().strip()
+
+ if app_name not in launch_app_commands:
+ command = f"print(f'{app_name} is not supported or recognized')"
+ else:
+ command = {
+ "action_type": "OPEN_APP",
+ "parameters": {"launch_app_command": launch_app_commands[app_name], "app_name": app_name},
+ }
+
+ return command
+
+ @classmethod
+ @agent_action
+ def switch_window(cls, window_id: str):
+ """
+ Switch to the window with the given window id.
+
+ Args:
+ window_id (str): the window id to switch to from the provided list of open windows
+ """
+ return switch_window_code.replace("WINDOW_ID", window_id)
+
+ @classmethod
+ @agent_action
+ def hotkey(cls, keys: List):
+ """
+ Press a hotkey combination.
+
+ Args:
+ keys (List): the keys to press in combination in a list format (e.g. ['ctrl', 'c'] for copy, ['prtsc'] for screenshot)
+ """
+ # add quotes around the keys
+ keys = [f"'{key}'" for key in keys]
+ key_str = ", ".join(keys).replace("'", "\\'")
+ return f"import pyautogui; pyautogui.hotkey({', '.join(keys)}); print(f'Press Hotkey: {key_str}')"
+
+ @classmethod
+ @agent_action
+ def quote(cls, content: str):
+ """
+ Quoting information from the current page for memory. Only you can see the quoted content.
+
+ Args:
+ content (str): text summarized or copied from the page for later operation.
+ """
+ return f'''print("""{content}""")'''
+
+ @classmethod
+ @agent_action
+ def wait(cls):
+ """
+ Wait for a while.
+
+ """
+ return "WAIT"
+
+ @classmethod
+ @agent_action
+ def exit(cls, success: bool):
+ """
+ End the current task.
+
+ Args:
+ success (bool): True if successfully finish a task, otherwise set it False
+ """
+ if success:
+ return "DONE"
+ else:
+ return "FAIL"
diff --git a/mm_agents/autoglm/prompt/procedural_memory.py b/mm_agents/autoglm/prompt/procedural_memory.py
new file mode 100644
index 0000000..9de00aa
--- /dev/null
+++ b/mm_agents/autoglm/prompt/procedural_memory.py
@@ -0,0 +1,202 @@
+import inspect
+import json
+import os
+import textwrap
+
+current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def generate_func(json_data):
+ # 收集所有类名和它们的函数
+ class_funcs = {}
+ no_class_funcs = []
+ cls_name = ""
+
+ for item in json_data:
+ if item["type"] == "function":
+ func = item["function"]
+ func_parts = func["name"].split(".")
+
+ if len(func_parts) == 2:
+ class_name, func_name = func_parts
+ if class_name not in class_funcs:
+ class_funcs[class_name] = []
+ class_funcs[class_name].append(item)
+ else:
+ no_class_funcs.append(item)
+
+ code = ""
+
+ # 生成有类的函数
+ for class_name, funcs in class_funcs.items():
+ code += f"class {class_name}:\n"
+ cls_name = class_name
+ for item in funcs:
+ func = item["function"]
+ func_name = func["name"].split(".")[-1]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = ["cls"]
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}") # 可选参数默认值设为None
+
+ # 构建函数定义
+ func_def = f" def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if len(param_list) == 1: # 只有cls参数
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ code += "\n"
+
+ # 生成没有类的函数
+ for item in no_class_funcs:
+ func = item["function"]
+ func_name = func["name"]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = []
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}")
+
+ # 构建函数定义
+ func_def = f"def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if not param_list:
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ return code.strip(), cls_name
+
+
+setup_prompt = """You are an agent which follow my instruction and perform desktop computer tasks as instructed.
+You have good knowledge of computer and good internet connection and assume your code will run on a computer for controlling the mouse and keyboard.
+For each step, you will get an observation of the desktop by 1) screenshot; 2) current application name; 3) accessibility tree, which is based on AT-SPI library; 4) application info; 5) last action result.
+You should first generate a plan for completing the task, confirm the previous results, reflect on the current status, then generate operations to complete the task in python-style pseudo code using the predefined functions.
+
+Your output should STRICTLY follow the format:
+
+{**YOUR-PLAN-AND-THINKING**}
+
+```python
+{**ONE-LINE-OF-CODE**}
+```"""
+
+func_def_tool_template = """You will be provided access to the following methods to interact with the UI:
+ 1. class Agent, a grounding agent which provides basic action space to interact with desktop.
+ 2. class {tool_class_name}, which provides tools to interact with the current application {app_name}.
+
+Here are the defination of the classes:
+```python
+{class_content}
+```"""
+
+func_def_template = """You will be provided access to the following methods to interact with the UI:
+
+```python
+{class_content}
+```"""
+
+note_prompt = """* Note:
+- Your code should be wrapped in ```python```, and your plan and thinking should be wrapped in .
+- Only **ONE-LINE-OF-CODE** at a time.
+- Each code block is context independent, and variables from the previous round cannot be used in the next round.
+- Do not put anything other than python code in ```python```.
+- You **can only use the above methods to interact with the UI**, do not invent new methods.
+- Return with `Agent.exit(success=True)` immediately after the task is completed.
+- If you think cannot complete the task, **DO NOT keep repeating actions, just return with `Agent.exit(success=False)`.**
+- The computer's environment is Linux, e.g., Desktop path is '/home/user/Desktop'
+- My computer's password is '{client_password}', feel free to use it when you need sudo rights"""
+
+
+class Prompt:
+ @staticmethod
+ def construct_procedural_memory(agent_class, app_name=None, client_password="password"):
+ agent_class_content = "Class Agent:"
+ for attr_name in dir(agent_class):
+ attr = getattr(agent_class, attr_name)
+ if callable(attr) and hasattr(attr, "is_agent_action"):
+ # Use inspect to get the full function signature
+ signature = inspect.signature(attr)
+ agent_class_content += f"""
+ def {attr_name}{signature}:
+ '''{attr.__doc__}'''
+ """
+
+ if app_name is not None:
+ tool_path = os.path.join(current_dir, "tools", "apis", f"{app_name.lower()}.json")
+ with open(tool_path, "r") as f:
+ json_data = json.load(f)
+
+ tool_class_content, tool_class_name = generate_func(json_data)
+
+ agent_class_content += "\n\n{}".format(tool_class_content)
+ func_def_prompt = func_def_tool_template.format(
+ class_content=agent_class_content.strip(),
+ tool_class_name=tool_class_name,
+ app_name=app_name,
+ client_password=client_password,
+ )
+ else:
+ func_def_prompt = func_def_template.format(class_content=agent_class_content.strip())
+ note_prompt_formatted = note_prompt.format(client_password=client_password)
+
+ # procedural_memory = f"{setup_prompt}\n\n{func_def_prompt}\n\n{note_prompt}".strip()
+ # return procedural_memory
+ return setup_prompt, func_def_prompt, note_prompt_formatted
+
+
+if __name__ == "__main__":
+ from grounding_agent import GroundingAgent
+
+ print(Prompt.construct_procedural_memory(GroundingAgent, "vlc"))
diff --git a/mm_agents/autoglm/tools/apis/__init__.py b/mm_agents/autoglm/tools/apis/__init__.py
new file mode 100644
index 0000000..a43137a
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/__init__.py
@@ -0,0 +1,3 @@
+from .func import generate_func
+
+__all__ = ["generate_func"]
diff --git a/mm_agents/autoglm/tools/apis/code.json b/mm_agents/autoglm/tools/apis/code.json
new file mode 100644
index 0000000..082df5c
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/code.json
@@ -0,0 +1,260 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.launch_vscode",
+ "description": "Launches Visual Studio Code with the specified file path or directory",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "The file path or directory to open in VS Code"
+ }
+ },
+ "required": [
+ "path"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.compare_files",
+ "description": "Compares two files in VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file1": {
+ "type": "string",
+ "description": "The path to the first file"
+ },
+ "file2": {
+ "type": "string",
+ "description": "The path to the second file"
+ }
+ },
+ "required": [
+ "file1",
+ "file2"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.add_folder",
+ "description": "Adds a folder to the last active window in VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "folder": {
+ "type": "string",
+ "description": "The folder path to add"
+ }
+ },
+ "required": [
+ "folder"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.goto_file",
+ "description": "Opens a file at a specific line and character position",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "The file path to open"
+ },
+ "line": {
+ "type": "integer",
+ "description": "The line number to navigate to",
+ "default": 1
+ },
+ "character": {
+ "type": "integer",
+ "description": "The character position to navigate to",
+ "default": 1
+ }
+ },
+ "required": [
+ "file_path"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.perform_merge",
+ "description": "Perform a three-way merge",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path1": {
+ "type": "string",
+ "description": "The path to the first version file"
+ },
+ "path2": {
+ "type": "string",
+ "description": "The path to the second version file"
+ },
+ "base": {
+ "type": "string",
+ "description": "The path to the base version file"
+ },
+ "result": {
+ "type": "string",
+ "description": "The path to save the merged result"
+ }
+ },
+ "required": [
+ "path1",
+ "path2",
+ "base",
+ "result"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.remove_folder",
+ "description": "Removes a folder from the last active window in VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "folder": {
+ "type": "string",
+ "description": "The folder path to remove"
+ }
+ },
+ "required": [
+ "folder"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.install_extension",
+ "description": "Installs an extension or updates it in VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "extension_id": {
+ "type": "string",
+ "description": "The identifier of the extension"
+ },
+ "pre_release": {
+ "type": "boolean",
+ "description": "Whether to install the pre-release version",
+ "default": false
+ }
+ },
+ "required": [
+ "extension_id"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.uninstall_extension",
+ "description": "Uninstalls an extension from VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "extension_id": {
+ "type": "string",
+ "description": "The identifier of the extension"
+ }
+ },
+ "required": [
+ "extension_id"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.list_extensions",
+ "description": "Lists installed extensions in VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "show_versions": {
+ "type": "boolean",
+ "description": "Whether to show extension versions",
+ "default": false
+ },
+ "category": {
+ "type": "string",
+ "description": "The category to filter extensions by"
+ }
+ }
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.update_extensions",
+ "description": "Updates all installed extensions in VSCode to the latest version",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.disable_extension",
+ "description": "Disables a specific extension for the next instance of VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "extension_id": {
+ "type": "string",
+ "description": "The identifier of the extension"
+ }
+ },
+ "required": [
+ "extension_id"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.toggle_sync",
+ "description": "Toggles synchronization on or off in VSCode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "state": {
+ "type": "string",
+ "description": "The state to set ('on' or 'off')",
+ "enum": ["on", "off"]
+ }
+ },
+ "required": [
+ "state"
+ ]
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm/tools/apis/func.py b/mm_agents/autoglm/tools/apis/func.py
new file mode 100644
index 0000000..84ee548
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/func.py
@@ -0,0 +1,117 @@
+def generate_func(json_data):
+ # 收集所有类名和它们的函数
+ class_funcs = {}
+ no_class_funcs = []
+
+ for item in json_data:
+ if item["type"] == "function":
+ func = item["function"]
+ func_parts = func["name"].split(".")
+
+ if len(func_parts) == 2:
+ class_name, func_name = func_parts
+ if class_name not in class_funcs:
+ class_funcs[class_name] = []
+ class_funcs[class_name].append(item)
+ else:
+ no_class_funcs.append(item)
+
+ code = ""
+
+ # 生成有类的函数
+ for class_name, funcs in class_funcs.items():
+ code += f"class {class_name}:\n"
+ for item in funcs:
+ func = item["function"]
+ func_name = func["name"].split(".")[-1]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = ["cls"]
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}") # 可选参数默认值设为None
+
+ # 构建函数定义
+ func_def = f" def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if len(param_list) == 1: # 只有cls参数
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ code += "\n"
+
+ # 生成没有类的函数
+ for item in no_class_funcs:
+ func = item["function"]
+ func_name = func["name"]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = []
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}")
+
+ # 构建函数定义
+ func_def = f"def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if not param_list:
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ return code.strip()
+
+
+if __name__ == "__main__":
+ import json
+
+ with open("libreoffice_calc.json", "r") as f:
+ json_data = json.load(f)
+ print(generate_func(json_data))
diff --git a/mm_agents/autoglm/tools/apis/google_chrome.json b/mm_agents/autoglm/tools/apis/google_chrome.json
new file mode 100644
index 0000000..38c6abc
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/google_chrome.json
@@ -0,0 +1,134 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_profile_settings",
+ "description": "Opens the profile settings page in the browser.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_password_settings",
+ "description": "Opens the password/autofill settings page in the browser.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_privacy_settings",
+ "description": "Opens the privacy settings page in the browser.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_appearance_settings",
+ "description": "Opens the appearance settings page in the browser.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_search_engine_settings",
+ "description": "Opens the search engine settings page in the browser.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.bring_back_last_tab",
+ "description": "Restores the last-closed tab in the browser (equivalent to Ctrl+Shift+T).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.print",
+ "description": "Opens the print dialog for the current browser page (equivalent to Ctrl+P).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.delete_browsing_data",
+ "description": "Opens the 'Clear browsing data' dialog in the browser (equivalent to Ctrl+Shift+Del).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_extensions",
+ "description": "Opens the extensions management page in the browser.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.bookmark_page",
+ "description": "Bookmarks the current page in the browser (equivalent to Ctrl+D).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_bookmarks",
+ "description": "Opens the bookmarks page in the browser.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm/tools/apis/libreoffice_calc.json b/mm_agents/autoglm/tools/apis/libreoffice_calc.json
new file mode 100644
index 0000000..c195c8f
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/libreoffice_calc.json
@@ -0,0 +1,634 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.get_workbook_info",
+ "description": "Get workbook information, including file path, file name, sheets and active sheet.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.save",
+ "description": "Save the current workbook to its current location",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.get_column_data",
+ "description": "Get all data from the specified column.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "Name of the column to read (e.g. 'A', 'B', etc.)"
+ }
+ },
+ "required": [
+ "column_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.switch_active_sheet",
+ "description": "Switch to the specified sheet and make it active. Creates new sheet if it doesn't exist.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "sheet_name": {
+ "type": "string",
+ "description": "Name of the sheet to switch to or create"
+ }
+ },
+ "required": [
+ "sheet_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_column_values",
+ "description": "Set values to the specified column, cannot be used to set formulas.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "Name of the column (e.g. 'A', 'B', etc.) to write to"
+ },
+ "data": {
+ "type": "array",
+ "description": "List of values to write to the column"
+ },
+ "start_index": {
+ "type": "integer",
+ "description": "The index of the first row to write to, default is 2 (skip the first row)"
+ }
+ },
+ "required": [
+ "column_name",
+ "data"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.highlight_range",
+ "description": "Highlight the specified range with the specified color.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Range to highlight, in the format of 'A1:B10'"
+ },
+ "color": {
+ "type": "integer",
+ "description": "Color to highlight with, default is 0xFF0000 (red)"
+ }
+ },
+ "required": [
+ "range_str"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.transpose_range",
+ "description": "Transpose the specified range and paste it to the target cell.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "source_range": {
+ "type": "string",
+ "description": "Range to transpose, in the format of 'A1:B10'"
+ },
+ "target_cell": {
+ "type": "string",
+ "description": "Target cell to paste the transposed data, in the format of 'A1'"
+ }
+ },
+ "required": [
+ "source_range",
+ "target_cell"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.export_to_csv",
+ "description": "Export the current document to a CSV file with the same path and name as the original file.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.sort_column",
+ "description": "Sort the data in the specified column in ascending or descending order.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "The name of the column to sort (e.g. 'A', 'B', etc.)"
+ },
+ "ascending": {
+ "type": "boolean",
+ "description": "Whether to sort in ascending order (default True)"
+ },
+ "start_index": {
+ "type": "integer",
+ "description": "The index of the first row to sort, default is 2 (skip the first row)"
+ }
+ },
+ "required": [
+ "column_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_validation_list",
+ "description": "Set a validation list for the specified column.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "The name of the column (e.g. 'A', 'B', etc.) to set the validation list for"
+ },
+ "values": {
+ "type": "array",
+ "description": "The list of values to use for the validation list"
+ }
+ },
+ "required": [
+ "column_name",
+ "values"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.hide_row_data",
+ "description": "Hide rows that contain the specified value.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "value": {
+ "type": "string",
+ "description": "The value to hide rows for, default is 'N/A'"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.reorder_columns",
+ "description": "Reorder the columns in the sheet according to the specified order.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_order": {
+ "type": "array",
+ "description": "A list of column names in the desired order (e.g. ['A', 'B', 'C'])"
+ }
+ },
+ "required": [
+ "column_order"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.create_pivot_table",
+ "description": "Create a pivot table in the active worksheet based on data from the source sheet.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "source_sheet": {
+ "type": "string",
+ "description": "Name of the source sheet containing the data"
+ },
+ "table_name": {
+ "type": "string",
+ "description": "Name for the new pivot table"
+ },
+ "row_fields": {
+ "type": "array",
+ "description": "List of fields to use as row labels (e.g. ['A', 'B', 'C'])"
+ },
+ "col_fields": {
+ "type": "array",
+ "description": "List of fields to use as column labels (e.g. ['A', 'B', 'C'])"
+ },
+ "value_fields": {
+ "type": "array",
+ "description": "List of fields to use as values (e.g. ['A', 'B', 'C'])"
+ },
+ "aggregation_function": {
+ "type": "string",
+ "description": "Aggregation function to use (sum, count, average, min, max), default is 'sum'"
+ },
+ "target_cell": {
+ "type": "string",
+ "description": "Target cell for the pivot table, default is 'A1'"
+ }
+ },
+ "required": [
+ "source_sheet",
+ "table_name",
+ "value_fields"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.merge_cells",
+ "description": "Merge cells in the specified range.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Range of cells to merge, in format 'A1:B10'"
+ }
+ },
+ "required": [
+ "range_str"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_cell_value",
+ "description": "Set a value to a specific cell in the active worksheet.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "cell": {
+ "type": "string",
+ "description": "Cell reference (e.g., 'A1')"
+ },
+ "value": {
+ "type": "string",
+ "description": "Value to set in the cell"
+ }
+ },
+ "required": [
+ "cell",
+ "value"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.format_range",
+ "description": "Apply formatting to the specified range in the active worksheet.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Range to format, in the format of 'A1:B10'"
+ },
+ "background_color": {
+ "type": "string",
+ "description": "Background color in hex format (e.g., '#0000ff')"
+ },
+ "font_color": {
+ "type": "string",
+ "description": "Font color in hex format (e.g., '#ffffff')"
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Whether to make the text bold"
+ },
+ "alignment": {
+ "type": "string",
+ "description": "Text alignment (left, center, right)"
+ }
+ },
+ "required": [
+ "range_str"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.create_chart",
+ "description": "Create a chart in the active worksheet based on the specified data range.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "chart_type": {
+ "type": "string",
+ "description": "Type of chart (bar, column, line, pie, scatter, area)"
+ },
+ "data_range": {
+ "type": "string",
+ "description": "Range containing the data for the chart, in the format of 'A1:B10'"
+ },
+ "title": {
+ "type": "string",
+ "description": "Title for the chart"
+ },
+ "x_axis_title": {
+ "type": "string",
+ "description": "Title for the X axis"
+ },
+ "y_axis_title": {
+ "type": "string",
+ "description": "Title for the Y axis"
+ }
+ },
+ "required": [
+ "chart_type",
+ "data_range"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.freeze_panes",
+ "description": "Freeze rows and/or columns in the active worksheet.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "rows": {
+ "type": "integer",
+ "description": "Number of rows to freeze from the top"
+ },
+ "columns": {
+ "type": "integer",
+ "description": "Number of columns to freeze from the left"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.rename_sheet",
+ "description": "Rename a worksheet in the workbook.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "old_name": {
+ "type": "string",
+ "description": "Current name of the worksheet"
+ },
+ "new_name": {
+ "type": "string",
+ "description": "New name for the worksheet"
+ }
+ },
+ "required": [
+ "old_name",
+ "new_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.copy_sheet",
+ "description": "Create a copy of an existing worksheet in the workbook.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "source_sheet": {
+ "type": "string",
+ "description": "Name of the worksheet to copy"
+ },
+ "new_sheet_name": {
+ "type": "string",
+ "description": "Name for the new worksheet copy (optional)"
+ }
+ },
+ "required": [
+ "source_sheet"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.reorder_sheets",
+ "description": "Change the order of worksheets in the workbook.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "sheet_name": {
+ "type": "string",
+ "description": "Name of the worksheet to move"
+ },
+ "position": {
+ "type": "integer",
+ "description": "New position index (0-based) for the worksheet"
+ }
+ },
+ "required": [
+ "sheet_name",
+ "position"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_chart_legend_position",
+ "description": "Set the position of the legend in a chart.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "position": {
+ "type": "string",
+ "description": "Position of the legend (top, bottom, left, right, none)"
+ }
+ },
+ "required": [
+ "position"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_number_format",
+ "description": "Apply a specific number format to a range of cells.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Range to format, in the format of 'A1:B10'"
+ },
+ "format_type": {
+ "type": "string",
+ "description": "Type of number format (general, number, currency, accounting, date, time, percentage, fraction, scientific, text)"
+ },
+ "decimal_places": {
+ "type": "integer",
+ "description": "Number of decimal places to display (optional)"
+ }
+ },
+ "required": [
+ "range_str",
+ "format_type"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.adjust_column_width",
+ "description": "Adjust the width of specified columns.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "columns": {
+ "type": "string",
+ "description": "Column range to adjust (e.g., 'A:C')"
+ },
+ "width": {
+ "type": "number",
+ "description": "Width to set (in characters)"
+ },
+ "autofit": {
+ "type": "boolean",
+ "description": "Whether to autofit columns to content"
+ }
+ },
+ "required": [
+ "columns"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.adjust_row_height",
+ "description": "Adjust the height of specified rows.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "rows": {
+ "type": "string",
+ "description": "Row range to adjust (e.g., '1:10')"
+ },
+ "height": {
+ "type": "number",
+ "description": "Height to set (in points)"
+ },
+ "autofit": {
+ "type": "boolean",
+ "description": "Whether to autofit rows to content"
+ }
+ },
+ "required": [
+ "rows"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.export_to_pdf",
+ "description": "Export the current document or specified sheets to PDF.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "Path where to save the PDF file, default is the same path as the original file"
+ },
+ "sheets": {
+ "type": "array",
+ "description": "List of sheet names to include in PDF, default is all sheets"
+ },
+ "open_after_export": {
+ "type": "boolean",
+ "description": "Whether to open the PDF after export, default is False"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_zoom_level",
+ "description": "Adjust the zoom level of the current worksheet.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "zoom_percentage": {
+ "type": "integer",
+ "description": "Zoom level as a percentage (e.g., 75 for 75%, 100 for normal size, 150 for zoomed in). Valid range is typically 10-400."
+ }
+ },
+ "required": [
+ "zoom_percentage"
+ ]
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm/tools/apis/libreoffice_impress.json b/mm_agents/autoglm/tools/apis/libreoffice_impress.json
new file mode 100644
index 0000000..e40efd5
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/libreoffice_impress.json
@@ -0,0 +1,569 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.save",
+ "description": "Save the current presentation to its current location",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.go_to_slide",
+ "description": "Navigates to a specific slide in the presentation based on its index.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to navigate to (1-based indexing)"
+ }
+ },
+ "required": ["slide_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.get_slide_count",
+ "description": "Gets the total number of slides in the current presentation.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.duplicate_slide",
+ "description": "Creates a duplicate of a specific slide and places it at the end of the presentation.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to duplicate (1-based indexing)"
+ }
+ },
+ "required": ["slide_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_font",
+ "description": "Sets the font style for all text elements in a specific slide.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to modify (1-based indexing)"
+ },
+ "font_name": {
+ "type": "string",
+ "description": "The name of the font to apply (e.g., 'Arial', 'Times New Roman', 'Calibri')"
+ }
+ },
+ "required": ["slide_index", "font_name"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.write_text",
+ "description": "writes text to a specific textbox on a slide",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "type": "string",
+ "description": "The text content of the note to add"
+ },
+ "page_index": {
+ "type": "integer",
+ "description": "The index of the slide to add a note to (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the textbox to modify (0-based indexing)"
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Whether to make the text bold, default is false"
+ },
+ "italic": {
+ "type": "boolean",
+ "description": "Whether to make the text italic, default is false"
+ },
+ "size": {
+ "type": "integer",
+ "description": "The size of the text. If None, uses the box's current font size."
+ },
+ "append": {
+ "type": "boolean",
+ "description": "Whether to append the text, default is False. If you want to observe some formats(like a bullet at the beginning) or keep the original text, you should set up it."
+ }
+ },
+ "required": ["content", "page_index", "box_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_style",
+ "description": "Sets the style properties for the specified textbox on a slide.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to modify (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the textbox to modify (0-based indexing)"
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Whether to make the title text bold"
+ },
+ "italic": {
+ "type": "boolean",
+ "description": "Whether to make the title text italic"
+ },
+ "underline": {
+ "type": "boolean",
+ "description": "Whether to underline the title text"
+ }
+ },
+ "required": ["slide_index", "box_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.configure_auto_save",
+ "description": "Enables or disables auto-save functionality for the current document and sets the auto-save interval.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "enabled": {
+ "type": "boolean",
+ "description": "Whether to enable (true) or disable (false) auto-save"
+ },
+ "interval_minutes": {
+ "type": "number",
+ "description": "The interval in minutes between auto-saves (minimum 1 minute)"
+ }
+ },
+ "required": ["enabled", "interval_minutes"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_background_color",
+ "description": "Sets the background color for the specified textbox on a slide.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide containing the textbox (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the textbox to modify (0-based indexing)"
+ },
+ "color": {
+ "type": "string",
+ "description": "The color to apply to the textbox (e.g., 'red', 'green', 'blue', 'yellow', or hex color code)"
+ }
+ },
+ "required": ["slide_index", "box_index", "color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_text_color",
+ "description": "Sets the text color for the specified textbox on a slide.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to modify (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the textbox to modify (0-based indexing)"
+ },
+ "color": {
+ "type": "string",
+ "description": "The color to apply to the title text (e.g., 'red', 'green', 'blue', 'black', or hex color code)"
+ }
+ },
+ "required": ["slide_index", "box_index", "color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.delete_content",
+ "description": "Deletes the specified textbox from a slide.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to modify (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the textbox to modify (0-based indexing)"
+ }
+ },
+ "required": ["slide_index", "box_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_orientation",
+ "description": "Changes the orientation of slides in the presentation between portrait (upright) and landscape (sideways).",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "orientation": {
+ "type": "string",
+ "description": "The desired orientation for the slides",
+ "enum": ["portrait", "landscape"]
+ }
+ },
+ "required": ["orientation"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.position_box",
+ "description": "Positions a textbox or image on a slide at a specific location or predefined position.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide containing the box (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the box to position (0-based indexing)"
+ },
+ "position": {
+ "type": "string",
+ "description": "Predefined position on the slide (left, right, center, top, bottom)",
+ "enum": [
+ "left",
+ "right",
+ "center",
+ "top",
+ "bottom",
+ "top-left",
+ "top-right",
+ "bottom-left",
+ "bottom-right"
+ ]
+ }
+ },
+ "required": ["slide_index", "box_index", "position"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.insert_file",
+ "description": "Inserts a video or audio file into the current or specified slide in the presentation.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "The full path to the file to be inserted"
+ },
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to insert the file into (1-based indexing). If not provided, inserts into the current slide."
+ },
+ "position": {
+ "type": "object",
+ "description": "The position coordinates for the file",
+ "properties": {
+ "x": {
+ "type": "number",
+ "description": "The x-coordinate (horizontal position) as a percentage of slide width"
+ },
+ "y": {
+ "type": "number",
+ "description": "The y-coordinate (vertical position) as a percentage of slide height"
+ }
+ }
+ },
+ "size": {
+ "type": "object",
+ "description": "The size dimensions for the file",
+ "properties": {
+ "width": {
+ "type": "number",
+ "description": "The width as a percentage of slide width"
+ },
+ "height": {
+ "type": "number",
+ "description": "The height as a percentage of slide height"
+ }
+ }
+ },
+ "autoplay": {
+ "type": "boolean",
+ "description": "Whether the video or audio should automatically play when the slide is shown"
+ }
+ },
+ "required": ["file_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_background",
+ "description": "Sets the background color or image for a specific slide or all slides.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to modify (1-based indexing). If not provided, applies to all slides."
+ },
+ "color": {
+ "type": "string",
+ "description": "The background color to apply (e.g., 'red', 'green', 'blue', or hex color code)"
+ },
+ "image_path": {
+ "type": "string",
+ "description": "Path to an image file to use as background. If provided, overrides color."
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.save_as",
+ "description": "Saves the current document to a specified location with a given filename.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "The full path where the file should be saved, including the filename and extension"
+ },
+ "overwrite": {
+ "type": "boolean",
+ "description": "Whether to overwrite the file if it already exists (default: false)"
+ }
+ },
+ "required": ["file_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.insert_image",
+ "description": "Inserts an image to a specific slide in the presentation.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to add the image to (1-based indexing)"
+ },
+ "image_path": {
+ "type": "string",
+ "description": "The full path to the image file to be added"
+ },
+ "width": {
+ "type": "number",
+ "description": "The width of the image in centimeters"
+ },
+ "height": {
+ "type": "number",
+ "description": "The height of the image in centimeters"
+ },
+ "position": {
+ "type": "object",
+ "description": "The position coordinates for the image",
+ "properties": {
+ "x": {
+ "type": "number",
+ "description": "The x-coordinate (horizontal position) as a percentage of slide width"
+ },
+ "y": {
+ "type": "number",
+ "description": "The y-coordinate (vertical position) as a percentage of slide height"
+ }
+ }
+ }
+ },
+ "required": ["slide_index", "image_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.configure_display_settings",
+ "description": "Configures the display settings for LibreOffice Impress presentations, including monitor usage and presenter view options.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "use_presenter_view": {
+ "type": "boolean",
+ "description": "Whether to use presenter view (showing current and next slide on one screen). Set to false to disable presenter view."
+ },
+ "primary_monitor_only": {
+ "type": "boolean",
+ "description": "Whether to use only the primary monitor for the presentation. Set to true to use only one screen."
+ },
+ "monitor_for_presentation": {
+ "type": "integer",
+ "description": "Specify which monitor to use for the presentation (1 for primary monitor, 2 for secondary monitor, etc.)"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_number_color",
+ "description": "Sets the color of the slide number in the presentation.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "color": {
+ "type": "string",
+ "description": "The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code)"
+ }
+ },
+ "required": ["color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_text_strikethrough",
+ "description": "Applies or removes strike-through formatting to specific text content in a slide.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide containing the text (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the textbox containing the text (0-based indexing)"
+ },
+ "line_numbers": {
+ "type": "array",
+ "items": {
+ "type": "integer"
+ },
+ "description": "The line numbers to apply strike-through formatting to (1-based indexing)"
+ },
+ "apply": {
+ "type": "boolean",
+ "description": "Whether to apply (true) or remove (false) strike-through formatting"
+ }
+ },
+ "required": ["slide_index", "box_index", "line_numbers", "apply"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_textbox_alignment",
+ "description": "Sets the text alignment for the specified textbox on a slide.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the slide to modify (1-based indexing)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "The index of the textbox to modify (0-based indexing)"
+ },
+ "alignment": {
+ "type": "string",
+ "description": "The text alignment to apply to the title",
+ "enum": ["left", "center", "right", "justify"]
+ }
+ },
+ "required": ["slide_index", "box_index", "alignment"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.export_to_image",
+ "description": "Exports the current presentation or a specific slide to an image file format (PNG, JPEG, etc.).",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "The full path where the image file should be saved, including the filename and extension"
+ },
+ "format": {
+ "type": "string",
+ "description": "The image format to export to (e.g., 'png', 'jpeg', 'gif')",
+ "enum": ["png", "jpeg", "jpg", "gif", "bmp", "tiff"]
+ },
+ "slide_index": {
+ "type": "integer",
+ "description": "The index of the specific slide to export (1-based indexing). If not provided, exports the entire presentation as a series of images."
+ }
+ },
+ "required": ["file_path", "format"]
+ }
+ }
+ }
+]
diff --git a/mm_agents/autoglm/tools/apis/libreoffice_writer.json b/mm_agents/autoglm/tools/apis/libreoffice_writer.json
new file mode 100644
index 0000000..f511c12
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/libreoffice_writer.json
@@ -0,0 +1,412 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.save",
+ "description": "Save the current document to its current location",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.write_text",
+ "description": "Write text at the current cursor position in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "text": {
+ "type": "string",
+ "description": "The text to write at the current cursor position."
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Optional. Whether to write the text in bold."
+ },
+ "italic": {
+ "type": "boolean",
+ "description": "Optional. Whether to write the text in italic."
+ },
+ "size": {
+ "type": "number",
+ "description": "Optional. The size of the text."
+ }
+ },
+ "required": ["text"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_color",
+ "description": "Changes the color of matched text in the document for specified paragraphs.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "pattern": {
+ "type": "string",
+ "description": "The pattern to match in the document, should be a regular expression"
+ },
+ "color": {
+ "type": "number",
+ "description": "The color to apply, should be a hex color code, like 0x000000 for black"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["pattern", "color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.find_and_replace",
+ "description": "Finds all occurrences of a specified text pattern and replaces them with another text in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "pattern": {
+ "type": "string",
+ "description": "The pattern to match in the document, should be a regular expression"
+ },
+ "replacement": {
+ "type": "string",
+ "description": "The text to replace the found text with."
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["pattern", "replacement"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_font",
+ "description": "Changes the font of text in the document or specified paragraphs.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "font_name": {
+ "type": "string",
+ "description": "The name of the font to apply (e.g., 'Times New Roman', 'Arial', 'Calibri')"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["font_name"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_line_spacing",
+ "description": "Sets the line spacing for specified paragraphs in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "spacing_value": {
+ "type": "number",
+ "description": "The line spacing value to apply (1.0 for single spacing, 2.0 for double spacing, etc.)."
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["spacing_value"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.remove_highlighting",
+ "description": "Removes highlighting from text in the document for specified paragraphs.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.find_highlighted_text",
+ "description": "Finds all text in the document that has a specific highlight color applied to it.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "highlight_color": {
+ "type": "string",
+ "description": "The highlight color to search for. Can be a color name (e.g., 'yellow', 'green') or hex code."
+ }
+ },
+ "required": ["highlight_color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.insert_formula_at_cursor",
+ "description": "Inserts a formula at the current cursor position in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "formula": {
+ "type": "string",
+ "description": "The formula to insert at the current cursor position."
+ }
+ },
+ "required": ["formula"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.insert_image_at_cursor",
+ "description": "Inserts an image at the current cursor position in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "image_path": {
+ "type": "string",
+ "description": "Full path to the image file to insert"
+ },
+ "width": {
+ "type": "integer",
+ "description": "Optional. Width to display the image in pixels. If not specified, uses the original image width."
+ },
+ "height": {
+ "type": "integer",
+ "description": "Optional. Height to display the image in pixels. If not specified, uses the original image height."
+ }
+ },
+ "required": ["image_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_strikethrough",
+ "description": "Sets the strikethrough formatting for specified text in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "pattern": {
+ "type": "string",
+ "description": "The pattern to match in the document, should be a regular expression"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["pattern"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_font_size",
+ "description": "Changes the font size of specified text in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "font_size": {
+ "type": "number",
+ "description": "The font size to apply (in points)."
+ },
+ "pattern": {
+ "type": "string",
+ "description": "The pattern to match in the document, should be a regular expression"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["font_size", "pattern"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.export_to_pdf",
+ "description": "Exports the current document to PDF format.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "output_path": {
+ "type": "string",
+ "description": "Optional. The full path where the PDF should be saved. If not provided, uses the same location as the original document with .pdf extension."
+ },
+ "output_filename": {
+ "type": "string",
+ "description": "Optional. The filename to use for the PDF. If not provided, uses the original document's filename with .pdf extension."
+ },
+ "include_comments": {
+ "type": "boolean",
+ "description": "Optional. Whether to include comments in the exported PDF. Defaults to false."
+ },
+ "quality": {
+ "type": "string",
+ "description": "Optional. The quality of the PDF export ('standard', 'high', 'print'). Defaults to 'standard'."
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_paragraph_alignment",
+ "description": "Sets the text alignment for specified paragraphs in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "alignment": {
+ "type": "string",
+ "description": "The alignment to apply ('left', 'center', 'right', 'justify')."
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["alignment"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.capitalize_words",
+ "description": "Capitalizes the first letter of each word for specified paragraphs in the document.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_default_font",
+ "description": "Sets the default font for new text in the document without changing existing text.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "font_name": {
+ "type": "string",
+ "description": "The name of the font to set as default (e.g., 'Times New Roman', 'Arial', 'Calibri')"
+ },
+ "font_size": {
+ "type": "number",
+ "description": "Optional. The default font size in points."
+ }
+ },
+ "required": ["font_name"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.add_page_numbers",
+ "description": "Adds page numbers to the document at the specified position.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "position": {
+ "type": "string",
+ "description": "Position of the page numbers ('bottom_left', 'bottom_center', 'bottom_right', 'top_left', 'top_center', 'top_right')"
+ },
+ "start_number": {
+ "type": "integer",
+ "description": "Optional. The starting page number. Defaults to 1."
+ },
+ "format": {
+ "type": "string",
+ "description": "Optional. Format of the page numbers (e.g., '1', 'Page 1', '1 of N'). Defaults to simple number format."
+ }
+ },
+ "required": ["position"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.insert_page_break",
+ "description": "Inserts a page break at the current cursor position, creating a new blank page after the current one.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "position": {
+ "type": "string",
+ "description": "Optional. Specifies where to insert the page break: 'at_cursor' for current cursor position, 'end_of_document' for end of document. Defaults to 'at_cursor'."
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.change_text_case",
+ "description": "Changes the case of text in the document or a specified selection.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "case_type": {
+ "type": "string",
+ "description": "The type of case conversion to apply ('lowercase', 'uppercase')."
+ },
+ "pattern": {
+ "type": "string",
+ "description": "The pattern to match in the document, should be a regular expression"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Optional. Indices of paragraphs to modify (0-based indexing). If not provided, applies to all paragraphs."
+ }
+ },
+ "required": ["case_type", "pattern"]
+ }
+ }
+ }
+]
diff --git a/mm_agents/autoglm/tools/apis/vlc.json b/mm_agents/autoglm/tools/apis/vlc.json
new file mode 100644
index 0000000..ca9fc58
--- /dev/null
+++ b/mm_agents/autoglm/tools/apis/vlc.json
@@ -0,0 +1,171 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_playlist",
+ "description": "Gets the current VLC playlist with track information including name, URI and duration.",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.play",
+ "description": "Starts playing the current media in VLC player.",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.pause",
+ "description": "Pauses the currently playing media in VLC player.",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.next",
+ "description": "Switches to the next media item in the VLC playlist.",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.previous",
+ "description": "Switches to the previous media item in the VLC playlist.",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.add_to_playlist",
+ "description": "Adds a media file to the VLC playlist.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "uri": {
+ "type": "string",
+ "description": "The URI of the media file to add to the playlist, start with 'file://' or 'https://'"
+ }
+ },
+ "required": ["uri"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_current_time",
+ "description": "Gets the current playback time position of the playing media in seconds.",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_media_duration",
+ "description": "Gets the total duration of the currently playing media file in seconds.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.toggle_fullscreen",
+ "description": "Toggles fullscreen mode for the currently playing video in the media player. If the video is not in fullscreen mode, it will be expanded to fill the entire screen. If it's already in fullscreen mode, it will return to windowed mode.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "enable": {
+ "type": "boolean",
+ "description": "Optional parameter to explicitly set fullscreen mode. If true, forces fullscreen mode. If false, exits fullscreen mode. If not provided, the current state is toggled."
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_settings",
+ "description": "Gets the current settings of the VLC player.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.set_settings",
+ "description": "Sets the settings for the VLC player.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "field": {
+ "type": "string",
+ "description": "The name of the setting to set. i.e. input-record-path: the path to the recording folder, qt-bgcone: disable/enable splash cone icon (in 0/1), qt-max-volume: set max volume (in number), qt-minimal-view: hide/show bottom toolbar (in 0/1), global-key-play-pause: disable/enable play&pause key (in 0/1)"
+ },
+ "value": {
+ "type": "string",
+ "description": "The value to set for the specified setting, set 0/1 for boolean values"
+ }
+ },
+ "required": [
+ "field",
+ "value"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_media_files",
+ "description": "Gets the media files for the specified path.",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "The path to the media files"
+ },
+ "suffix": {
+ "type": "array",
+ "description": "The suffix of the media files, default is ['mp4', 'avi', 'mkv', 'mov', 'mp3', 'm4a', 'wav']"
+ }
+ },
+ "required": ["path"]
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm/tools/package/code.py b/mm_agents/autoglm/tools/package/code.py
new file mode 100644
index 0000000..e580d55
--- /dev/null
+++ b/mm_agents/autoglm/tools/package/code.py
@@ -0,0 +1,260 @@
+import json
+import os
+import subprocess
+from pathlib import Path
+
+
+class CodeTools:
+ ret = ""
+
+ @classmethod
+ def print_result(cls):
+ """打印执行结果"""
+ print(cls.ret)
+
+ @classmethod
+ def launch_vscode(cls, path):
+ """
+ Launches Visual Studio Code with the specified file path or directory.
+ 在存在的窗口中打开一个文件或目录。
+
+ Args:
+ path (str): 文件路径或目录。
+ """
+ try:
+ subprocess.run(["code", "-r", path], check=True)
+ cls.ret = "Successfully launched VS Code"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error launching VS Code: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def env_info(cls):
+ cls.ret = "None"
+
+ @classmethod
+ def compare_files(cls, file1, file2):
+ """
+ Compares two files in VSCode.
+ 在VSCode中比较两个文件。
+
+ Args:
+ file1 (str): 第一个文件的路径。
+ file2 (str): 第二个文件的路径。
+ """
+ try:
+ # 获取compare结果
+ subprocess.run(["code", "-d", file1, file2], check=True)
+ cls.ret = "The compared files are opened in VSCode"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error comparing files: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def add_folder(cls, folder):
+ """
+ Adds a folder to the last active window in VSCode.
+ 向VSCode的最后一个活动窗口添加文件夹。
+
+ Args:
+ folder (str): 文件夹路径。
+ """
+ try:
+ subprocess.run(["code", "-a", folder], check=True)
+ cls.ret = "Successfully added folder"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error adding folder: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def goto_file(cls, file_path, line=1, character=1):
+ """
+ Opens a file at a specific line and character position.
+ 在特定行和字符的位置打开文件。
+
+ Args:
+ file_path (str): 文件路径。
+ line (int): 行号。
+ character (int): 字符位置。
+ """
+ try:
+ command = f"{file_path}:{line}:{character}"
+ subprocess.run(["code", "-g", command], check=True)
+ cls.ret = "Successfully opened file, line: {}, character: {}".format(line, character)
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error going to file: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def perform_merge(cls, path1, path2, base, result):
+ """
+ Perform a three-way merge.
+ 执行三方合并。
+
+ Args:
+ path1 (str): 第一版本文件路径。
+ path2 (str): 第二版本文件路径。
+ base (str): 基础版本文件路径。
+ result (str): 结果文件的保存路径。
+ """
+ try:
+ subprocess.run(["code", "-m", path1, path2, base, result], check=True)
+ cls.ret = "Successfully performed merge"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error performing merge: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def remove_folder(cls, folder):
+ """
+ Removes a folder from the last active window in VSCode.
+ 在VSCode的最后一个活动窗口中移除文件夹。
+
+ Args:
+ folder (str): 文件夹路径。
+ """
+ try:
+ subprocess.run(["code", "--remove", folder], check=True)
+ cls.ret = "Successfully removed folder"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error removing folder: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def install_extension(cls, extension_id, pre_release=False):
+ """
+ Installs an extension or updates it in VSCode.
+ 安装或更新VSCode中的扩展。
+
+ Args:
+ extension_id (str): 扩展的标识符。
+ pre_release (bool): 是否安装预发布版本。
+ """
+ try:
+ command = ["code", "--install-extension", extension_id]
+ if pre_release:
+ command.append("--pre-release")
+ subprocess.run(command, check=True)
+ cls.ret = "Successfully installed extension"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error installing extension: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def uninstall_extension(cls, extension_id):
+ """
+ Uninstalls an extension from VSCode.
+ 从VSCode中卸载扩展。
+
+ Args:
+ extension_id (str): 扩展的标识符。
+ """
+ try:
+ subprocess.run(["code", "--uninstall-extension", extension_id], check=True)
+ cls.ret = "Successfully uninstalled extension"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error uninstalling extension: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def list_extensions(cls, show_versions=False, category=None):
+ """
+ Lists installed extensions in VSCode.
+ 列出VSCode中安装的扩展。
+
+ Args:
+ show_versions (bool): 是否显示扩展的版本。
+ category (str): 按类别筛选扩展。
+ """
+ try:
+ command = ["code", "--list-extensions"]
+ if show_versions:
+ command.append("--show-versions")
+ if category:
+ command.extend(["--category", category])
+ cls.ret = subprocess.run(command, check=True, capture_output=True, text=True).stdout
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error listing extensions: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def update_extensions(cls):
+ """
+ Updates all installed extensions in VSCode to the latest version.
+ 更新VSCode中所有安装的扩展到最新版本。
+ """
+ try:
+ subprocess.run(["code", "--update-extensions"], check=True)
+ cls.ret = "Successfully updated extensions"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error updating extensions: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def disable_extension(cls, extension_id):
+ """
+ Disables a specific extension for the next instance of VSCode.
+ 禁用在下一个VSCode窗口中的指定扩展。
+
+ Args:
+ extension_id (str): 扩展的标识符。
+ """
+ try:
+ subprocess.run(["code", "--disable-extension", extension_id], check=True)
+ cls.ret = "Successfully disabled extension"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error disabling extension: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def toggle_sync(cls, state):
+ """
+ Toggles synchronization on or off in VSCode.
+ 在VSCode中开启或关闭同步。
+
+ Args:
+ state (str): 'on' 或 'off' 表示开启或关闭。
+ """
+ try:
+ command = ["code", "--sync", state]
+ subprocess.run(command, check=True)
+ cls.ret = "Successfully toggled sync"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error toggling sync: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
diff --git a/mm_agents/autoglm/tools/package/google_chrome.py b/mm_agents/autoglm/tools/package/google_chrome.py
new file mode 100644
index 0000000..68afa4c
--- /dev/null
+++ b/mm_agents/autoglm/tools/package/google_chrome.py
@@ -0,0 +1,107 @@
+class BrowserTools:
+ ret = ""
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def env_info(cls):
+ cls.ret = "None"
+
+ # @classmethod
+ # def show_all_tabs(cls):
+ # cls.ret = "Browser not found"
+ # for attempt in range(3):
+ # with sync_playwright() as p:
+ # try:
+ # browser = p.chromium.connect_over_cdp(cls.remote_debugging_url)
+ # if not browser:
+ # continue
+ # context = browser.contexts[0]
+ # # 获取所有窗口名称
+ # cls.ret = 'Browser Tabs: '
+ # for idx, page in enumerate(context.pages):
+ # cls.ret += f"{idx}. {page.title()} ({page.url})" + '\n'
+ # return cls.ret
+ # except TimeoutError:
+ # cls.ret = 'Failed to get browser tabs'
+ # return None
+ # return None
+
+ @classmethod
+ def open_profile_settings(cls):
+ """
+ Open the profile settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/people"]}}
+
+ @classmethod
+ def open_password_settings(cls):
+ """
+ Open the password settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/autofill"]}}
+
+ @classmethod
+ def open_privacy_settings(cls):
+ """
+ Open the privacy settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/privacy"]}}
+
+ @classmethod
+ def open_appearance_settings(cls):
+ """
+ Open the appearance settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/appearance"]}}
+
+ @classmethod
+ def open_search_engine_settings(cls):
+ """
+ Open the search engine settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/search"]}}
+
+ @classmethod
+ def bring_back_last_tab(cls):
+ """
+ Bring back the last tab in the browser.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 't'); print('Brought back last tab')"
+
+ @classmethod
+ def print(cls):
+ """
+ Open the print option in current page.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'p'); print('Opened print option')"
+
+ @classmethod
+ def delete_browsing_data(cls):
+ """
+ Delete browsing data in the browser.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 'del'); print('Deleted browsing data')"
+
+ @classmethod
+ def open_extensions(cls):
+ """
+ open the extensions page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://extensions"]}}
+
+ @classmethod
+ def bookmark_page(cls):
+ """
+ Bookmark the current page in the browser.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'd'); print('Bookmarked page')"
+
+ @classmethod
+ def open_bookmarks(cls):
+ """
+ Open the bookmarks page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://bookmarks"]}}
diff --git a/mm_agents/autoglm/tools/package/libreoffice_calc.py b/mm_agents/autoglm/tools/package/libreoffice_calc.py
new file mode 100644
index 0000000..540da7f
--- /dev/null
+++ b/mm_agents/autoglm/tools/package/libreoffice_calc.py
@@ -0,0 +1,1322 @@
+import json
+import os
+import subprocess
+import sys
+
+import uno
+from com.sun.star.beans import PropertyValue
+
+
+class CalcTools:
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
+ desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
+ doc = desktop.getCurrentComponent()
+ sheet = doc.CurrentController.ActiveSheet
+ ret = ""
+
+ @classmethod
+ def close_other_window(cls):
+ """关闭除当前文档外的所有文档"""
+ # 获取所有打开的文档
+ components = cls.desktop.getComponents().createEnumeration()
+ current_url = cls.doc.getURL()
+
+ while components.hasMoreElements():
+ doc = components.nextElement()
+ if doc.getURL() != current_url: # 如果不是当前文档
+ doc.close(True) # True 表示保存更改
+
+ @classmethod
+ def maximize_window(cls):
+ """
+ 将窗口设置为工作区最大尺寸
+ 使用工作区域大小(考虑任务栏等)
+ """
+ window = cls.doc.getCurrentController().getFrame().getContainerWindow()
+ toolkit = window.getToolkit()
+ device = toolkit.createScreenCompatibleDevice(0, 0)
+
+ # 获取工作区域(排除任务栏等)
+ workarea = toolkit.getWorkArea()
+
+ # 设置窗口位置和大小为工作区域
+ window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15)
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def save(cls):
+ """
+ Save the current workbook to its current location
+
+ Returns:
+ bool: True if save successful, False otherwise
+ """
+ try:
+ # Just save the document
+ cls.doc.store()
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def _get_column_index(cls, column_name, sheet=None):
+ """
+ Get the index of a column by its name (A, B, C, ...)
+
+ Args:
+ column_name (str): Name of the column
+
+ Returns:
+ int: Index of the column
+ """
+ try:
+ return ord(column_name[0]) - ord("A")
+ except ValueError:
+ return None
+
+ @classmethod
+ def _get_last_used_column(cls):
+ """
+ Get the last used column index
+
+ Args:
+ None
+
+ Returns:
+ int: Index of the last used column
+ """
+ cursor = cls.sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ return cursor.RangeAddress.EndColumn
+
+ @classmethod
+ def _get_last_used_row(cls):
+ """
+ Get the last used row index
+
+ Args:
+ None
+
+ Returns:
+ int: Index of the last used row
+ """
+ cursor = cls.sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ return cursor.RangeAddress.EndRow
+
+ @classmethod
+ def _column_name_to_index(cls, column_name):
+ """
+ 将列名转换为列索引
+
+ Args:
+ column_name (str): 列名,如 'A', 'AB'
+
+ Returns:
+ int: 列索引(从0开始)
+ """
+ column_name = column_name.upper()
+ result = 0
+ for char in column_name:
+ result = result * 26 + (ord(char) - ord("A") + 1)
+ return result - 1
+
+ @classmethod
+ def get_workbook_info(cls):
+ """
+ Get workbook information
+
+ Args:
+ None
+
+ Returns:
+ dict: Workbook information, including file path, file name, sheets and active sheet
+ """
+ try:
+ info = {
+ "file_path": cls.doc.getLocation(),
+ "file_title": cls.doc.getTitle(),
+ "sheets": [],
+ "active_sheet": cls.sheet.Name,
+ }
+
+ # Get sheets information
+ sheets = cls.doc.getSheets()
+ info["sheet_count"] = sheets.getCount()
+
+ # Get all sheet names and info
+ for i in range(sheets.getCount()):
+ sheet = sheets.getByIndex(i)
+ cursor = sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ end_col = cursor.getRangeAddress().EndColumn
+ end_row = cursor.getRangeAddress().EndRow
+
+ sheet_info = {
+ "name": sheet.getName(),
+ "index": i,
+ "visible": sheet.IsVisible,
+ "row_count": end_row + 1,
+ "column_count": end_col + 1,
+ }
+ info["sheets"].append(sheet_info)
+
+ # Check if this is the active sheet
+ if sheet == cls.sheet:
+ info["active_sheet"] = sheet_info
+
+ cls.ret = json.dumps(info, ensure_ascii=False)
+ return info
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+
+ @classmethod
+ def env_info(cls, sheet_name=None):
+ """
+ Get content of the specified or active sheet
+
+ Args:
+ sheet_name (str, optional): Name of the sheet to read. If None, uses active sheet
+
+ Returns:
+ dict: Sheet information including name, headers and data
+ """
+ try:
+ # Get the target sheet
+ if sheet_name is not None:
+ sheet = cls.doc.getSheets().getByName(sheet_name)
+ else:
+ sheet = cls.sheet
+
+ # Create cursor to find used range
+ cursor = sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ end_col = cursor.getRangeAddress().EndColumn
+ end_row = cursor.getRangeAddress().EndRow
+
+ # Generate column headers (A, B, C, ...)
+ col_headers = [chr(65 + i) for i in range(end_col + 1)]
+
+ # Get displayed values from cells
+ data_array = []
+ for row in range(end_row + 1):
+ row_data = []
+ for col in range(end_col + 1):
+ cell = sheet.getCellByPosition(col, row)
+ row_data.append(cell.getString())
+ data_array.append(row_data)
+
+ # Calculate maximum width for each column
+ col_widths = [len(header) for header in col_headers] # Initialize with header lengths
+ for row in data_array:
+ for i, cell in enumerate(row):
+ col_widths[i] = max(col_widths[i], len(str(cell)))
+
+ # Format the header row
+ header_row = " | " + " | ".join(f"{h:<{w}}" for h, w in zip(col_headers, col_widths)) + " |"
+ separator = "--|-" + "-|-".join("-" * w for w in col_widths) + "-|"
+
+ # Format data rows with row numbers
+ formatted_rows = []
+ for row_idx, row in enumerate(data_array, 1):
+ row_str = f"{row_idx:<2}| " + " | ".join(f"{cell:<{w}}" for cell, w in zip(row, col_widths)) + " |"
+ formatted_rows.append(row_str)
+
+ # Combine all parts
+ formated_data = header_row + "\n" + separator + "\n" + "\n".join(formatted_rows)
+
+ # Get sheet properties
+ sheet_info = {
+ "name": sheet.getName(),
+ "data": formated_data,
+ "row_count": end_row + 1,
+ "column_count": end_col + 1,
+ }
+
+ cls.ret = json.dumps(sheet_info, ensure_ascii=False)
+ return sheet_info
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+
+ @classmethod
+ def get_column_data(cls, column_name):
+ """
+ Get data from the specified column
+
+ Args:
+ column_name (str): Name of the column to read
+
+ Returns:
+ list: List of values in the specified column
+ """
+ column_index = cls._get_column_index(column_name)
+ if column_index is None:
+ return "Column not found"
+ last_row = cls._get_last_used_row()
+ _range = cls.sheet.getCellRangeByPosition(column_index, 0, column_index, last_row)
+ # 获取数据数组并展平
+ cls.ret = json.dumps([row[0] for row in _range.getDataArray()], ensure_ascii=False)
+ return [row[0] for row in _range.getDataArray()]
+
+ @classmethod
+ def switch_active_sheet(cls, sheet_name):
+ """
+ Switch to the specified sheet and make it active, create if not exist
+
+ Args:
+ sheet_name (str): Name of the sheet to switch to or create
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查工作表是否存在
+ if not sheets.hasByName(sheet_name):
+ # 创建新工作表
+ new_sheet = cls.doc.createInstance("com.sun.star.sheet.Spreadsheet")
+ sheets.insertByName(sheet_name, new_sheet)
+
+ # 获取目标工作表
+ sheet = sheets.getByName(sheet_name)
+
+ # 切换到目标工作表
+ cls.doc.getCurrentController().setActiveSheet(sheet)
+
+ # 更新当前工作表引用
+ cls.sheet = sheet
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_column_values(cls, column_name, data, start_index=2):
+ """
+ Set data to the specified column
+
+ Args:
+ column_name (str): Name of the column to write
+ data (list): List of values to write to the column
+ start_index (int): The index of the first row to write to, default is 2 (skip the first row)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ # 获取列的索引
+ column_index = cls._get_column_index(column_name)
+ if column_index is None:
+ cls.ret = "Column not found"
+ return False
+ for i, value in enumerate(data):
+ cell = cls.sheet.getCellByPosition(column_index, i + start_index - 1)
+ if type(value) == float and value.is_integer():
+ cell.setNumber(int(value))
+ else:
+ cell.setString(str(value))
+ cls.ret = "Success"
+ return True
+
+ @classmethod
+ def highlight_range(cls, range_str, color=0xFF0000):
+ """
+ highlight the specified range with the specified color
+
+ Args:
+ range_str (str): Range to highlight, in the format of "A1:B10"
+ color (str): Color to highlight with, default is '0xFF0000' (red)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ _range = cls.sheet.getCellRangeByName(range_str)
+ _range.CellBackColor = color
+ cls.ret = "Success"
+ return True
+ except:
+ cls.ret = "False"
+ return False
+
+ @classmethod
+ def transpose_range(cls, source_range, target_cell):
+ """
+ Transpose the specified range and paste it to the target cell
+
+ Args:
+ source_range (str): Range to transpose, in the format of "A1:B10"
+ target_cell (str): Target cell to paste the transposed data, in the format of "A1"
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ source = cls.sheet.getCellRangeByName(source_range)
+ target = cls.sheet.getCellRangeByName(target_cell)
+
+ data = source.getDataArray()
+ # 转置数据
+ transposed_data = list(map(list, zip(*data)))
+
+ # 设置转置后的数据
+ target_range = cls.sheet.getCellRangeByPosition(
+ target.CellAddress.Column,
+ target.CellAddress.Row,
+ target.CellAddress.Column + len(transposed_data[0]) - 1,
+ target.CellAddress.Row + len(transposed_data) - 1,
+ )
+ target_range.setDataArray(transposed_data)
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def export_to_csv(cls):
+ """
+ Export the current document to a CSV file
+
+ Args:
+ None
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取当前文档的URL
+ doc_url = cls.doc.getURL()
+ if not doc_url:
+ raise ValueError("Document must be saved first")
+
+ # 构造CSV文件路径
+ if doc_url.startswith("file://"):
+ base_path = doc_url[7:] # 移除 'file://' 前缀
+ else:
+ base_path = doc_url
+
+ # 获取基本路径和文件名
+ csv_path = os.path.splitext(base_path)[0] + ".csv"
+
+ # 确保路径是绝对路径
+ csv_path = os.path.abspath(csv_path)
+
+ # 转换为 LibreOffice URL 格式
+ csv_url = uno.systemPathToFileUrl(csv_path)
+
+ # 设置CSV导出选项
+ props = (
+ PropertyValue(Name="FilterName", Value="Text - txt - csv (StarCalc)"),
+ PropertyValue(
+ Name="FilterOptions", Value="44,0,76,0"
+ ), # 44=comma, 34=quote, 76=UTF-8, 1=first row as header
+ )
+
+ # 导出文件
+ cls.doc.storeToURL(csv_url, props)
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def sort_column(cls, column_name, ascending=True, start_index=2):
+ """
+ Sorts the data in the specified column in ascending or descending order
+
+ Args:
+ column_name (str): The name of the column to sort (e.g. 'A') or the title
+ ascending (bool): Whether to sort in ascending order (default True)
+ start_index (int): The index of the first row to sort, default is 1
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+
+ try:
+ column_data = cls.get_column_data(column_name)[start_index - 1 :]
+ column_data = sorted(column_data, key=lambda x: float(x), reverse=not ascending)
+ except:
+ cls.ret = "Error: Invalid column name or data type"
+ return False
+
+ return cls.set_column_values(column_name, column_data, start_index)
+
+ @classmethod
+ def set_validation_list(cls, column_name, values):
+ """
+ Set a validation list for the specified column
+
+ Args:
+ column_name (str): The name of the column to set the validation list for
+ values (list): The list of values to use for the validation list
+
+ Returns:
+ None
+ """
+ try:
+ column_index = cls._get_column_index(column_name)
+ last_row = cls._get_last_used_row()
+ cell_range = cls.sheet.getCellRangeByPosition(column_index, 1, column_index, last_row)
+
+ # 获取现有的验证对象
+ validation = cell_range.getPropertyValue("Validation")
+
+ # 设置基本验证类型
+ validation.Type = uno.Enum("com.sun.star.sheet.ValidationType", "LIST")
+ validation.Operator = uno.Enum("com.sun.star.sheet.ConditionOperator", "EQUAL")
+
+ # 设置下拉列表
+ validation.ShowList = True
+ values_str = ";".join(str(val) for val in values)
+ validation.Formula1 = values_str
+
+ # 应用验证设置回单元格范围
+ cell_range.setPropertyValue("Validation", validation)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def hide_row_data(cls, value="N/A"):
+ """
+ Hide rows that contain the specified value
+
+ Args:
+ value (str): The value to hide rows for, default is 'N/A'
+
+ Returns:
+ None
+ """
+ last_row = cls._get_last_used_row()
+ last_col = cls._get_last_used_column()
+
+ for row in range(1, last_row + 1):
+ has_value = False
+ for col in range(last_col + 1):
+ cell = cls.sheet.getCellByPosition(col, row)
+ if cell.getString() == value:
+ has_value = True
+ break
+ row_range = cls.sheet.getRows().getByIndex(row)
+ row_range.IsVisible = not has_value
+
+ cls.ret = "Success"
+ return True
+
+ @classmethod
+ def reorder_columns(cls, column_order):
+ """
+ Reorder the columns in the sheet according to the specified order
+
+ Args:
+ column_order (list): A list of column names in the desired order
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取新的列索引
+ new_indices = [cls._get_column_index(col) for col in column_order]
+
+ # 创建新的列顺序
+ for new_index, old_index in enumerate(new_indices):
+ if new_index != old_index:
+ cls.sheet.Columns.insertByIndex(new_index, 1)
+ source = cls.sheet.Columns[old_index + (old_index > new_index)]
+ target = cls.sheet.Columns[new_index]
+ target.setDataArray(source.getDataArray())
+ cls.sheet.Columns.removeByIndex(old_index + (old_index > new_index), 1)
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def create_pivot_table(
+ cls,
+ source_sheet,
+ table_name,
+ row_fields=None,
+ col_fields=None,
+ value_fields=None,
+ aggregation_function="sum",
+ target_cell="A1",
+ ):
+ """
+ Create a pivot table in the active worksheet based on data from the active sheet.
+ """
+ try:
+ source = cls.doc.getSheets().getByName(source_sheet)
+
+ # 获取数据范围
+ cursor = source.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ end_col = cursor.getRangeAddress().EndColumn
+ end_row = cursor.getRangeAddress().EndRow
+
+ # 获取完整的数据范围
+ source_range = source.getCellRangeByPosition(0, 0, end_col, end_row)
+
+ # 获取数据透视表集合
+ dp_tables = cls.sheet.getDataPilotTables()
+
+ # 创建数据透视表描述符
+ dp_descriptor = dp_tables.createDataPilotDescriptor()
+
+ # 设置数据源
+ dp_descriptor.setSourceRange(source_range.getRangeAddress())
+
+ # 设置行字段
+ if row_fields:
+ for field in row_fields:
+ field_index = cls._get_column_index(field)
+ dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index)
+ dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "ROW")
+
+ # 设置列字段
+ if col_fields:
+ for field in col_fields:
+ field_index = cls._get_column_index(field)
+ dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index)
+ dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "COLUMN")
+
+ # 设置数据字段
+ for field in value_fields:
+ field_index = cls._get_column_index(field)
+ dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index)
+ dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "DATA")
+
+ # 设置聚合函数
+ function_map = {"Count": "COUNT", "Sum": "SUM", "Average": "AVERAGE", "Min": "MIN", "Max": "MAX"}
+
+ if aggregation_function in function_map:
+ dimension.Function = uno.Enum(
+ "com.sun.star.sheet.GeneralFunction", function_map[aggregation_function]
+ )
+
+ # 在当前工作表中创建数据透视表
+ dp_tables.insertNewByName(
+ table_name, # 透视表名称
+ cls.sheet.getCellRangeByName(target_cell).CellAddress, # 目标位置
+ dp_descriptor, # 描述符
+ )
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def merge_cells(cls, range_str):
+ """
+ 合并活动工作表中指定范围的单元格
+
+ Args:
+ range_str (str): 要合并的单元格范围,格式为'A1:B10'
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取当前活动工作表
+ sheet = cls.sheet
+
+ # 获取单元格范围
+ cell_range = sheet.getCellRangeByName(range_str)
+
+ # 获取单元格范围的属性
+ range_props = cell_range.getIsMerged()
+
+ # 如果单元格范围尚未合并,则进行合并
+ if not range_props:
+ cell_range.merge(True)
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_cell_value(cls, cell, value):
+ """
+ Set a value to a specific cell in the active worksheet.
+
+ Args:
+ cell (str): Cell reference (e.g., 'A1')
+ value (str): Value to set in the cell
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取单元格对象
+ cell_obj = cls.sheet.getCellRangeByName(cell)
+
+ if isinstance(value, str) and value.startswith("="):
+ # 设置公式
+ cell_obj.Formula = value
+ cls.ret = "Success"
+ return True
+
+ # 尝试将值转换为数字
+ try:
+ # 尝试转换为整数
+ int_value = int(value)
+ cell_obj.Value = int_value
+ except ValueError:
+ try:
+ # 尝试转换为浮点数
+ float_value = float(value)
+ cell_obj.Value = float_value
+ except ValueError:
+ # 如果不是数字,则设置为字符串
+ cell_obj.String = value
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def format_range(cls, range_str, background_color=None, font_color=None, bold=None, alignment=None):
+ """
+ Apply formatting to the specified range in the active worksheet
+
+ Args:
+ range_str (str): Range to format, in the format of 'A1:B10'
+ background_color (str, optional): Background color in hex format (e.g., '#0000ff')
+ font_color (str, optional): Font color in hex format (e.g., '#ffffff')
+ bold (bool, optional): Whether to make the text bold
+ italic (bool, optional): Whether to make the text italic
+ alignment (str, optional): Text alignment (left, center, right)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取指定范围
+ cell_range = cls.sheet.getCellRangeByName(range_str)
+
+ # 设置背景颜色
+ if background_color:
+ # 将十六进制颜色转换为整数
+ bg_color_int = int(background_color.replace("#", ""), 16)
+ cell_range.CellBackColor = bg_color_int
+
+ # 设置字体颜色
+ if font_color:
+ # 将十六进制颜色转换为整数
+ font_color_int = int(font_color.replace("#", ""), 16)
+ cell_range.CharColor = font_color_int
+
+ # 设置粗体
+ if bold is not None:
+ cell_range.CharWeight = 150.0 if bold else 100.0 # 150.0 是粗体,100.0 是正常
+
+ # 设置对齐方式
+ if alignment:
+ # 设置水平对齐方式
+ struct = cell_range.getPropertyValue("HoriJustify")
+ if alignment == "left":
+ struct.value = "LEFT"
+ elif alignment == "center":
+ struct.value = "CENTER"
+ elif alignment == "right":
+ struct.value = "RIGHT"
+ cell_range.setPropertyValue("HoriJustify", struct)
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def create_chart(cls, chart_type, data_range, title=None, x_axis_title=None, y_axis_title=None):
+ """
+ Create a chart in the active worksheet based on the specified data range.
+
+ Args:
+ chart_type (str): Type of chart to create (bar, column, line, pie, scatter, area)
+ data_range (str): Range containing the data for the chart, in the format of 'A1:B10'
+ title (str, optional): Title for the chart
+ x_axis_title (str, optional): Title for the X axis
+ y_axis_title (str, optional): Title for the Y axis
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ # 将图表类型映射到LibreOffice的图表类型常量
+ try:
+ chart_type_map = {
+ "bar": "com.sun.star.chart.BarDiagram",
+ "column": "com.sun.star.chart.ColumnDiagram",
+ "line": "com.sun.star.chart.LineDiagram",
+ "pie": "com.sun.star.chart.PieDiagram",
+ "scatter": "com.sun.star.chart.ScatterDiagram",
+ "area": "com.sun.star.chart.AreaDiagram",
+ }
+
+ # 获取数据范围
+ cell_range_address = cls.sheet.getCellRangeByName(data_range).getRangeAddress()
+
+ # 创建图表
+ charts = cls.sheet.getCharts()
+ rect = uno.createUnoStruct("com.sun.star.awt.Rectangle")
+ rect.Width = 10000 # 默认宽度
+ rect.Height = 7000 # 默认高度
+
+ # 添加图表到工作表
+ charts.addNewByName("MyChart", rect, (cell_range_address,), False, False)
+
+ # 获取图表
+ chart = charts.getByName("MyChart")
+ chart_doc = chart.getEmbeddedObject()
+
+ # 设置图表类型
+ diagram = chart_doc.createInstance(chart_type_map[chart_type])
+ chart_doc.setDiagram(diagram)
+
+ # 设置图表标题
+ if title:
+ chart_doc.Title.String = title
+
+ # 设置X轴标题
+ if x_axis_title:
+ chart_doc.Diagram.XAxis.AxisTitle.String = x_axis_title
+
+ # 设置Y轴标题
+ if y_axis_title:
+ chart_doc.Diagram.YAxis.AxisTitle.String = y_axis_title
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def freeze_panes(cls, rows=0, columns=0):
+ """
+ 冻结活动工作表中的行和/或列
+
+ Args:
+ rows (int): 从顶部开始冻结的行数
+ columns (int): 从左侧开始冻结的列数
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取当前视图
+ view = cls.doc.getCurrentController()
+
+ # 设置冻结窗格
+ view.freezeAtPosition(columns, rows)
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def rename_sheet(cls, old_name, new_name):
+ """
+ 重命名工作表
+
+ Args:
+ old_name (str): 要重命名的工作表的当前名称
+ new_name (str): 工作表的新名称
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查原工作表是否存在
+ if not sheets.hasByName(old_name):
+ return False
+
+ # 检查新名称是否已存在
+ if sheets.hasByName(new_name):
+ return False
+
+ # 获取要重命名的工作表
+ sheet = sheets.getByName(old_name)
+
+ # 重命名工作表
+ sheet.setName(new_name)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def copy_sheet(cls, source_sheet, new_sheet_name=None):
+ """
+ 创建工作簿中现有工作表的副本
+
+ Args:
+ source_sheet (str): 要复制的工作表名称
+ new_sheet_name (str, optional): 新工作表副本的名称,如果不提供则自动生成
+
+ Returns:
+ str: 新创建的工作表名称,如果失败则返回None
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查源工作表是否存在
+ if not sheets.hasByName(source_sheet):
+ return None
+
+ # 如果没有提供新名称,则生成一个
+ if not new_sheet_name:
+ # 生成类似 "Sheet1 (2)" 的名称
+ base_name = source_sheet
+ counter = 1
+ new_sheet_name = f"{base_name} ({counter})"
+
+ # 确保名称不重复
+ while sheets.hasByName(new_sheet_name):
+ counter += 1
+ new_sheet_name = f"{base_name} ({counter})"
+
+ # 检查新名称是否已存在
+ if sheets.hasByName(new_sheet_name):
+ return None # 名称已存在,无法创建
+
+ # 获取源工作表的索引
+ source_index = -1
+ for i in range(sheets.getCount()):
+ if sheets.getByIndex(i).getName() == source_sheet:
+ source_index = i
+ break
+
+ if source_index == -1:
+ return None
+
+ # 复制工作表
+ sheets.copyByName(source_sheet, new_sheet_name, source_index + 1)
+
+ cls.ret = f"New sheet created: {new_sheet_name}"
+ return new_sheet_name
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return None
+
+ @classmethod
+ def reorder_sheets(cls, sheet_name, position):
+ """
+ 重新排序工作表在工作簿中的位置
+
+ Args:
+ sheet_name (str): 要移动的工作表名称
+ position (int): 要移动到的位置(基于0的索引)
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查工作表是否存在
+ if not sheets.hasByName(sheet_name):
+ return False
+
+ # 获取工作表总数
+ sheet_count = sheets.getCount()
+
+ # 检查位置是否有效
+ if position < 0 or position >= sheet_count:
+ return False
+
+ # 获取要移动的工作表
+ sheet = sheets.getByName(sheet_name)
+
+ # 获取工作表当前索引
+ current_index = -1
+ for i in range(sheet_count):
+ if sheets.getByIndex(i).Name == sheet_name:
+ current_index = i
+ break
+
+ if current_index == -1:
+ return False
+
+ # 移动工作表到指定位置
+ sheets.moveByName(sheet_name, position)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_chart_legend_position(cls, position):
+ """
+ Set the position of the legend in a chart in the active worksheet.
+
+ Args:
+ position (str): Position of the legend ('top', 'bottom', 'left', 'right', 'none')
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取当前工作表中的所有图表
+ charts = cls.sheet.getCharts()
+ if charts.getCount() == 0:
+ return False
+
+ # 获取第一个图表(假设我们要修改的是第一个图表)
+ chart = charts.getByIndex(0)
+ chart_obj = chart.getEmbeddedObject()
+
+ # 获取图表的图例
+ diagram = chart_obj.getDiagram()
+ legend = chart_obj.getLegend()
+
+ # 根据指定的位置设置图例位置
+ if position == "none":
+ # 如果选择"none",则隐藏图例
+ chart_obj.HasLegend = False
+ else:
+ # 确保图例可见
+ chart_obj.HasLegend = True
+
+ import inspect
+
+ print(inspect.getmembers(legend))
+
+ # 设置图例位置
+ if position == "top":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "TOP")
+ elif position == "bottom":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "BOTTOM")
+ elif position == "left":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "LEFT")
+ elif position == "right":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "RIGHT")
+
+ legend.Alignment = pos
+
+ cls.ret = "Success"
+ return True
+ except Exception:
+ cls.ret = "Error"
+ return False
+
+ @classmethod
+ def set_number_format(cls, range_str, format_type, decimal_places=None):
+ """
+ Apply a specific number format to a range of cells in the active worksheet.
+
+ Args:
+ range_str (str): Range to format, in the format of 'A1:B10'
+ format_type (str): Type of number format to apply
+ decimal_places (int, optional): Number of decimal places to display
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取单元格范围
+ cell_range = cls.sheet.getCellRangeByName(range_str)
+
+ # 获取数字格式化服务
+ number_formats = cls.doc.NumberFormats
+ locale = cls.doc.CharLocale
+
+ # 根据格式类型设置格式字符串
+ format_string = ""
+
+ if format_type == "general":
+ format_string = "General"
+ elif format_type == "number":
+ if decimal_places is not None:
+ format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}"
+ else:
+ format_string = "0"
+ elif format_type == "currency":
+ if decimal_places is not None:
+ format_string = f"[$¥-804]#,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}"
+ else:
+ format_string = "[$¥-804]#,##0.00"
+ elif format_type == "accounting":
+ if decimal_places is not None:
+ format_string = f"_-[$¥-804]* #,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}_-;-[$¥-804]* #,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}_-;_-[$¥-804]* \"-\"_-;_-@_-"
+ else:
+ format_string = '_-[$¥-804]* #,##0.00_-;-[$¥-804]* #,##0.00_-;_-[$¥-804]* "-"??_-;_-@_-'
+ elif format_type == "date":
+ format_string = "YYYY/MM/DD"
+ elif format_type == "time":
+ format_string = "HH:MM:SS"
+ elif format_type == "percentage":
+ if decimal_places is not None:
+ format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}%"
+ else:
+ format_string = "0.00%"
+ elif format_type == "fraction":
+ format_string = "# ?/?"
+ elif format_type == "scientific":
+ if decimal_places is not None:
+ format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}E+00"
+ else:
+ format_string = "0.00E+00"
+ elif format_type == "text":
+ format_string = "@"
+
+ # 获取格式键
+ format_key = number_formats.queryKey(format_string, locale, True)
+
+ # 如果格式不存在,则添加
+ if format_key == -1:
+ format_key = number_formats.addNew(format_string, locale)
+
+ # 应用格式
+ cell_range.NumberFormat = format_key
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def adjust_column_width(cls, columns, width=None, autofit=False):
+ """
+ 调整活动工作表中指定列的宽度
+
+ Args:
+ columns (str): 要调整的列范围,例如 'A:C' 表示从A列到C列
+ width (float, optional): 要设置的宽度(以字符为单位)
+ autofit (bool, optional): 是否自动调整列宽以适应内容
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 解析列范围
+ col_range = columns.split(":")
+ start_col = cls._column_name_to_index(col_range[0])
+
+ if len(col_range) > 1:
+ end_col = cls._column_name_to_index(col_range[1])
+ else:
+ end_col = start_col
+
+ # 获取列对象
+ columns_obj = cls.sheet.getColumns()
+
+ # 遍历指定的列范围
+ for col_idx in range(start_col, end_col + 1):
+ column = columns_obj.getByIndex(col_idx)
+
+ if autofit:
+ # 自动调整列宽
+ column.OptimalWidth = True
+ elif width is not None:
+ # 设置指定宽度(转换为1/100毫米)
+ # 大约一个字符宽度为256 (1/100 mm)
+ column.Width = int(width * 256)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def adjust_row_height(cls, rows, height=None, autofit=False):
+ """
+ 调整活动工作表中指定行的高度
+
+ Args:
+ rows (str): 要调整的行范围,例如 '1:10' 表示第1行到第10行
+ height (float, optional): 要设置的高度(以点为单位)
+ autofit (bool, optional): 是否自动调整行高以适应内容
+
+ Returns:
+ bool: 操作成功返回True,否则返回False
+ """
+ try:
+ # 解析行范围
+ row_range = rows.split(":")
+ start_row = int(row_range[0])
+ end_row = int(row_range[1]) if len(row_range) > 1 else start_row
+
+ # 获取行对象
+ for row_index in range(start_row, end_row + 1):
+ row = cls.sheet.getRows().getByIndex(row_index - 1) # 索引从0开始
+
+ if autofit:
+ # 自动调整行高以适应内容
+ row.OptimalHeight = True
+ elif height is not None:
+ # 设置指定高度(将点转换为1/100毫米,LibreOffice使用的单位)
+ # 1点 ≈ 35.28 1/100毫米
+ row.Height = int(height * 35.28)
+ row.OptimalHeight = False
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def export_to_pdf(cls, file_path=None, sheets=None, open_after_export=False):
+ """
+ 将当前文档或指定工作表导出为PDF文件
+
+ Args:
+ file_path (str, optional): PDF文件保存路径,如果不指定则使用当前文档路径
+ sheets (list, optional): 要包含在PDF中的工作表名称列表,如果不指定则包含所有工作表
+ open_after_export (bool, optional): 导出后是否打开PDF文件
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 如果未指定文件路径,则使用当前文档路径并更改扩展名为.pdf
+ if not file_path:
+ if cls.doc.hasLocation():
+ url = cls.doc.getLocation()
+ file_path = uno.fileUrlToSystemPath(url)
+ file_path = os.path.splitext(file_path)[0] + ".pdf"
+ else:
+ # 如果文档尚未保存,则在用户桌面创建临时文件
+ desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
+ file_path = os.path.join(desktop_path, "LibreOffice_Export.pdf")
+
+ # 确保文件路径是系统路径,然后转换为URL
+ pdf_url = uno.systemPathToFileUrl(os.path.abspath(file_path))
+
+ # 创建导出属性
+ export_props = []
+
+ # 设置过滤器名称
+ export_props.append(PropertyValue(Name="FilterName", Value="calc_pdf_Export"))
+
+ # 如果指定了特定工作表,则只导出这些工作表
+ if sheets and isinstance(sheets, list) and len(sheets) > 0:
+ # 获取所有工作表
+ all_sheets = cls.doc.getSheets()
+ selection = []
+
+ # 查找指定的工作表
+ for sheet_name in sheets:
+ if all_sheets.hasByName(sheet_name):
+ sheet = all_sheets.getByName(sheet_name)
+ selection.append(sheet)
+
+ # 如果找到了指定的工作表,则设置导出选择
+ if selection:
+ export_props.append(PropertyValue(Name="Selection", Value=tuple(selection)))
+
+ # 导出PDF
+ cls.doc.storeToURL(pdf_url, tuple(export_props))
+
+ # 如果需要,导出后打开PDF
+ if open_after_export:
+ if sys.platform.startswith("darwin"): # macOS
+ subprocess.call(("open", file_path))
+ elif os.name == "nt": # Windows
+ os.startfile(file_path)
+ elif os.name == "posix": # Linux
+ subprocess.call(("xdg-open", file_path))
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_zoom_level(cls, zoom_percentage):
+ """
+ 调整当前工作表的缩放级别,使单元格看起来更大或更小
+
+ Args:
+ zoom_percentage (int): 缩放级别的百分比(例如,75表示75%,100表示正常大小,150表示放大)。
+ 有效范围通常为10-400。
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取当前控制器
+ controller = cls.doc.getCurrentController()
+
+ # 设置缩放值
+ # 确保缩放值在合理范围内
+ if zoom_percentage < 10:
+ zoom_percentage = 10
+ elif zoom_percentage > 400:
+ zoom_percentage = 400
+
+ # 应用缩放值
+ controller.ZoomValue = zoom_percentage
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+
+if __name__ == "__main__":
+ print(CalcTools._get_column_index("A"))
+ print(CalcTools.get_workbook_info())
+ print(CalcTools.get_content())
+ CalcTools.switch_active_sheet("Sheet2")
+ # helper.set_column_values('A', [1, 2, 3, 4, 5])
+ # helper.highlight_range('A1:A3', 'Red')
+ # helper.transpose_range('A1:D5', 'B8')
+ print(CalcTools.get_column_data("A"))
+ CalcTools.sort_column("A", True)
+ CalcTools.hide_row_data("N/A")
+ CalcTools.reorder_columns(["B", "A", "C"])
+ CalcTools.freeze_panes(1, 1)
+ # helper.set_validation_list('C', ['Pass', 'Fail', 'Held'])
+ CalcTools.export_to_csv()
diff --git a/mm_agents/autoglm/tools/package/libreoffice_impress.py b/mm_agents/autoglm/tools/package/libreoffice_impress.py
new file mode 100644
index 0000000..0b8ba17
--- /dev/null
+++ b/mm_agents/autoglm/tools/package/libreoffice_impress.py
@@ -0,0 +1,1424 @@
+import json
+import os
+
+import uno
+from com.sun.star.awt.FontSlant import ITALIC, NONE
+from com.sun.star.awt.FontWeight import BOLD, NORMAL
+from com.sun.star.beans import PropertyValue
+from com.sun.star.drawing.TextHorizontalAdjust import CENTER, LEFT, RIGHT
+
+
+class ImpressTools:
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
+ desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
+ doc = desktop.getCurrentComponent()
+ ret = ""
+
+ @classmethod
+ def close_other_window(cls):
+ """关闭除当前文档外的所有文档"""
+ components = cls.desktop.getComponents().createEnumeration()
+ current_url = cls.doc.getURL()
+ while components.hasMoreElements():
+ doc = components.nextElement()
+ if doc.getURL() != current_url:
+ doc.close(True)
+
+ @classmethod
+ def save(cls):
+ """保存文档到当前位置"""
+ try:
+ if cls.doc.hasLocation():
+ cls.doc.store()
+ cls.ret = "Success"
+ else:
+ cls.ret = "Error: Document has no save location"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def maximize_window(cls):
+ """
+ 将窗口设置为工作区最大尺寸
+ 使用工作区域大小(考虑任务栏等)
+ """
+ window = cls.doc.getCurrentController().getFrame().getContainerWindow()
+ toolkit = window.getToolkit()
+ device = toolkit.createScreenCompatibleDevice(0, 0)
+ workarea = toolkit.getWorkArea()
+ window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15)
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def env_info(cls, page_indices=None):
+ """
+ 获取指定页面的内容
+ :param page_indices: 页码列表,如果为None则获取所有页面
+ :return: 包含各页面内容的列表
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ content_str = ""
+ if page_indices is None:
+ page_indices = range(pages.getCount())
+ for page_index in page_indices:
+ if 0 <= page_index < pages.getCount():
+ page = pages.getByIndex(page_index)
+ page_content = []
+ for i in range(page.getCount()):
+ shape = page.getByIndex(i)
+ if hasattr(shape, "getText"):
+ text = shape.getText()
+ if text:
+ page_content.append("- Box " + str(i) + ": " + text.getString().strip())
+
+ c = "\n".join(page_content)
+ content_str += f"Slide {page_index+1}:\n{c}\n\n"
+
+ cur_idx = cls.get_current_slide_index() + 1
+ content_str = content_str + f"Current Slide Index: {cur_idx}"
+ cls.ret = content_str
+ return content_str
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return []
+
+ @classmethod
+ def get_current_slide_index(cls):
+ """
+ Gets the index of the currently active slide in the presentation.
+ :return: The index of the currently active slide (0-based)
+ """
+ try:
+ controller = cls.doc.getCurrentController()
+ current_page = controller.getCurrentPage()
+ pages = cls.doc.getDrawPages()
+ for i in range(pages.getCount()):
+ if pages.getByIndex(i) == current_page:
+ cls.ret = i
+ return i
+ cls.ret = "Current slide not found"
+ return -1
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return -1
+
+ @classmethod
+ def go_to_slide(cls, slide_index):
+ """
+ Navigates to a specific slide in the presentation based on its index.
+
+ Args:
+ slide_index (int): The index of the slide to navigate to (1-based indexing)
+
+ Returns:
+ bool: True if navigation was successful, False otherwise
+ """
+ try:
+ zero_based_index = slide_index - 1
+ controller = cls.doc.getCurrentController()
+ if not controller:
+ cls.ret = "Error: Could not get document controller"
+ return False
+ pages = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= pages.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1-{pages.getCount()}"
+ return False
+ target_slide = pages.getByIndex(zero_based_index)
+ controller.setCurrentPage(target_slide)
+ cls.ret = f"Successfully navigated to slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error navigating to slide: {str(e)}"
+ return False
+
+ @classmethod
+ def get_slide_count(cls):
+ """
+ Gets the total number of slides in the current presentation.
+ :return: The total number of slides as an integer
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ count = pages.getCount()
+ cls.ret = count
+ return count
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return 0
+
+ @classmethod
+ def duplicate_slide(cls, slide_index):
+ """
+ Creates a duplicate of a specific slide and places it at the end of the presentation.
+
+ :param slide_index: The index of the slide to duplicate (1-based indexing)
+ :return: True if successful, False otherwise
+ """
+ try:
+ zero_based_index = slide_index - 1
+ draw_pages = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= draw_pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {draw_pages.getCount()}"
+ return False
+ controller = cls.doc.getCurrentController()
+ controller.setCurrentPage(draw_pages.getByIndex(zero_based_index))
+ dispatcher = cls.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", cls.ctx)
+ frame = controller.getFrame()
+ dispatcher.executeDispatch(frame, ".uno:DuplicatePage", "", 0, ())
+ duplicated_slide_index = zero_based_index + 1
+ slide_count = draw_pages.getCount()
+ if duplicated_slide_index < slide_count - 1:
+ controller.setCurrentPage(draw_pages.getByIndex(duplicated_slide_index))
+ moves_needed = slide_count - duplicated_slide_index - 1
+ for _ in range(moves_needed):
+ dispatcher.executeDispatch(frame, ".uno:MovePageDown", "", 0, ())
+ cls.ret = f"Slide {slide_index} duplicated successfully and moved to the end"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_font(cls, slide_index, font_name):
+ """
+ Sets the font style for all text elements in a specific slide, including the title.
+
+ Args:
+ slide_index (int): The index of the slide to modify (1-based indexing)
+ font_name (str): The name of the font to apply (e.g., 'Arial', 'Times New Roman', 'Calibri')
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ zero_based_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1 to {slides.getCount()}."
+ return False
+ slide = slides.getByIndex(zero_based_index)
+ for i in range(slide.getCount()):
+ shape = slide.getByIndex(i)
+ if hasattr(shape, "getText"):
+ text = shape.getText()
+ if text:
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setPropertyValue("CharFontName", font_name)
+ cls.ret = f"Successfully set font to '{font_name}' for all text elements in slide {slide_index}."
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting font: {str(e)}"
+ return False
+
+ @classmethod
+ def write_text(cls, content, page_index, box_index, bold=False, italic=False, size=None, append=False):
+ """
+ Writes text to a specific textbox on a slide
+
+ :param content: The text content to add
+ :param page_index: The index of the slide (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :param bold: Whether to make the text bold, default is False
+ :param italic: Whether to make the text italic, default is False
+ :param size: The size of the text. If None, uses the box's current font size.
+ :param append: Whether to append the text, default is False. If you want to observe some formats(like a bullet at the beginning) or keep the original text, you should set up it.
+ :return: True if successful, False otherwise
+ """
+ try:
+ zero_based_page_index = page_index - 1
+ pages = cls.doc.getDrawPages()
+ if zero_based_page_index < 0 or zero_based_page_index >= pages.getCount():
+ cls.ret = f"Error: Page index {page_index} is out of range"
+ return False
+ page = pages.getByIndex(zero_based_page_index)
+ if box_index < 0 or box_index >= page.getCount():
+ cls.ret = f"Error: Box index {box_index} is out of range"
+ return False
+ shape = page.getByIndex(box_index)
+ if not hasattr(shape, "String"):
+ cls.ret = f"Error: The shape at index {box_index} cannot contain text"
+ return False
+ if append:
+ shape.String = shape.String + content
+ else:
+ shape.String = content
+ if hasattr(shape, "getCharacterProperties"):
+ char_props = shape.getCharacterProperties()
+ if bold:
+ char_props.CharWeight = BOLD
+ else:
+ char_props.CharWeight = NORMAL
+ if italic:
+ char_props.CharPosture = ITALIC
+ else:
+ char_props.CharPosture = NONE
+ if size is not None:
+ char_props.CharHeight = size
+
+ cls.ret = f"Text successfully written to page {page_index}, box {box_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_style(cls, slide_index, box_index, bold=None, italic=None, underline=None):
+ """
+ Sets the style properties for the specified textbox on a slide.
+
+ :param slide_index: The index of the slide to modify (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :param bold: Whether to make the text bold
+ :param italic: Whether to make the text italic
+ :param underline: Whether to underline the text
+ :return: True if successful, False otherwise
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ if slide_index < 1 or slide_index > pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {pages.getCount()}"
+ return False
+ page = pages.getByIndex(slide_index - 1)
+ if box_index < 0 or box_index >= page.getCount():
+ cls.ret = f"Error: Invalid box index {box_index}. Valid range is 0 to {page.getCount() - 1}"
+ return False
+ shape = page.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = "Error: The specified shape does not contain text"
+ return False
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ if bold is not None:
+ cursor.setPropertyValue("CharWeight", BOLD if bold else NORMAL)
+ if italic is not None:
+ cursor.setPropertyValue("CharPosture", ITALIC if italic else NONE)
+ if underline is not None:
+ cursor.setPropertyValue("CharUnderline", 1 if underline else 0)
+ cls.ret = "Style applied successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def configure_auto_save(cls, enabled, interval_minutes):
+ """
+ Enables or disables auto-save functionality for the current document and sets the auto-save interval.
+
+ :param enabled: Whether to enable (True) or disable (False) auto-save
+ :param interval_minutes: The interval in minutes between auto-saves (minimum 1 minute)
+ :return: True if successful, False otherwise
+ """
+ try:
+ if interval_minutes < 1:
+ interval_minutes = 1
+ config_provider = cls.ctx.ServiceManager.createInstanceWithContext(
+ "com.sun.star.configuration.ConfigurationProvider", cls.ctx
+ )
+ prop = PropertyValue()
+ prop.Name = "nodepath"
+ prop.Value = "/org.openoffice.Office.Common/Save/Document"
+ config_access = config_provider.createInstanceWithArguments(
+ "com.sun.star.configuration.ConfigurationUpdateAccess", (prop,)
+ )
+ config_access.setPropertyValue("AutoSave", enabled)
+ config_access.setPropertyValue("AutoSaveTimeIntervall", interval_minutes)
+ config_access.commitChanges()
+ cls.ret = f"Auto-save {'enabled' if enabled else 'disabled'} with interval of {interval_minutes} minutes"
+ return True
+ except Exception as e:
+ cls.ret = f"Error configuring auto-save: {str(e)}"
+ return False
+
+ @classmethod
+ def set_background_color(cls, slide_index, box_index, color):
+ """
+ Sets the background color for the specified textbox on a slide.
+
+ Args:
+ slide_index (int): The index of the slide containing the textbox (1-based indexing)
+ box_index (int): The index of the textbox to modify (0-based indexing)
+ color (str): The color to apply to the textbox (e.g., 'red', 'green', 'blue', 'yellow', or hex color code)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ zero_based_slide_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range"
+ return False
+ slide = slides.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Box index {box_index} is out of range"
+ return False
+ shape = slide.getByIndex(box_index)
+ color_int = 0
+ color_map = {
+ "red": 16711680,
+ "green": 65280,
+ "blue": 255,
+ "yellow": 16776960,
+ "black": 0,
+ "white": 16777215,
+ "purple": 8388736,
+ "orange": 16753920,
+ "pink": 16761035,
+ "gray": 8421504,
+ "brown": 10824234,
+ "cyan": 65535,
+ "magenta": 16711935,
+ }
+ if color.lower() in color_map:
+ color_int = color_map[color.lower()]
+ elif color.startswith("#") and len(color) == 7:
+ color_int = int(color[1:], 16)
+ else:
+ cls.ret = f"Error: Invalid color format: {color}"
+ return False
+ shape.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "SOLID")
+ shape.FillColor = color_int
+ cls.ret = f"Background color of textbox {box_index} on slide {slide_index} set to {color}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_text_color(cls, slide_index, box_index, color):
+ """
+ Sets the text color for the specified textbox on a slide.
+
+ Args:
+ slide_index (int): The index of the slide to modify (1-based indexing)
+ box_index (int): The index of the textbox to modify (0-based indexing)
+ color (str): The color to apply to the text (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ zero_based_slide_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range"
+ return False
+ slide = slides.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Box index {box_index} is out of range"
+ return False
+ shape = slide.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = f"Error: Shape at index {box_index} does not contain text"
+ return False
+ color_int = 0
+ if color.startswith("#"):
+ color_int = int(color[1:], 16)
+ else:
+ color_map = {
+ "red": 16711680,
+ "green": 43315,
+ "blue": 255,
+ "black": 0,
+ "white": 16777215,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ }
+ if color.lower() in color_map:
+ color_int = color_map[color.lower()]
+ else:
+ cls.ret = f"Error: Unsupported color '{color}'"
+ return False
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setPropertyValue("CharColor", color_int)
+ cls.ret = f"Successfully set text color to {color} for textbox {box_index} on slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def delete_content(cls, slide_index, box_index):
+ """
+ Deletes the specified textbox from a slide.
+
+ :param slide_index: The index of the slide to modify (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :return: True if successful, False otherwise
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ zero_based_slide_index = slide_index - 1
+ if zero_based_slide_index < 0 or zero_based_slide_index >= pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {pages.getCount()}"
+ return False
+ slide = pages.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Invalid box index {box_index}. Valid range is 0 to {slide.getCount() - 1}"
+ return False
+ shape = slide.getByIndex(box_index)
+ slide.remove(shape)
+ cls.ret = f"Successfully deleted textbox {box_index} from slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_orientation(cls, orientation):
+ """
+ Changes the orientation of slides in the presentation between portrait (upright) and landscape (sideways).
+
+ :param orientation: The desired orientation for the slides ('portrait' or 'landscape')
+ :return: True if successful, False otherwise
+ """
+ try:
+ draw_pages = cls.doc.getDrawPages()
+ first_page = draw_pages.getByIndex(0)
+ current_width = first_page.Width
+ current_height = first_page.Height
+ if orientation == "portrait" and current_width > current_height:
+ new_width, new_height = current_height, current_width
+ elif orientation == "landscape" and current_width < current_height:
+ new_width, new_height = current_height, current_width
+ else:
+ cls.ret = f"Slides are already in {orientation} orientation"
+ return True
+ for i in range(draw_pages.getCount()):
+ page = draw_pages.getByIndex(i)
+ page.Width = new_width
+ page.Height = new_height
+ cls.ret = f"Changed slide orientation to {orientation}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error changing slide orientation: {str(e)}"
+ return False
+
+ @classmethod
+ def position_box(cls, slide_index, box_index, position):
+ """
+ Positions a textbox or image on a slide at a specific location or predefined position.
+
+ :param slide_index: The index of the slide containing the box (1-based indexing)
+ :param box_index: The index of the box to position (0-based indexing)
+ :param position: Predefined position on the slide (left, right, center, top, bottom, etc.)
+ :return: True if successful, False otherwise
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ if slide_index < 1 or slide_index > pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}"
+ return False
+ page = pages.getByIndex(slide_index - 1)
+ if box_index < 0 or box_index >= page.getCount():
+ cls.ret = f"Error: Invalid box index {box_index}"
+ return False
+ shape = page.getByIndex(box_index)
+ controller = cls.doc.getCurrentController()
+ slide_width = 28000
+ slide_height = 21000
+ shape_width = shape.Size.Width
+ shape_height = shape.Size.Height
+ margin = 500
+ if position == "left":
+ new_x = margin
+ new_y = (slide_height - shape_height) / 2
+ elif position == "right":
+ new_x = slide_width - shape_width - margin
+ new_y = (slide_height - shape_height) / 2
+ elif position == "center":
+ new_x = (slide_width - shape_width) / 2
+ new_y = (slide_height - shape_height) / 2
+ elif position == "top":
+ new_x = (slide_width - shape_width) / 2
+ new_y = margin
+ elif position == "bottom":
+ new_x = (slide_width - shape_width) / 2
+ new_y = slide_height - shape_height - margin
+ elif position == "top-left":
+ new_x = margin
+ new_y = margin
+ elif position == "top-right":
+ new_x = slide_width - shape_width - margin
+ new_y = margin
+ elif position == "bottom-left":
+ new_x = margin
+ new_y = slide_height - shape_height - margin
+ elif position == "bottom-right":
+ new_x = slide_width - shape_width - margin
+ new_y = slide_height - shape_height - margin
+ else:
+ cls.ret = f"Error: Invalid position '{position}'"
+ return False
+ try:
+ shape.Position.X = int(new_x)
+ shape.Position.Y = int(new_y)
+ except:
+ try:
+ shape.setPropertyValue("PositionX", int(new_x))
+ shape.setPropertyValue("PositionY", int(new_y))
+ except:
+ point = uno.createUnoStruct("com.sun.star.awt.Point", int(new_x), int(new_y))
+ shape.setPosition(point)
+ cls.ret = f"Box positioned at {position} (X: {new_x}, Y: {new_y})"
+ return True
+ except Exception as e:
+ cls.ret = f"Error positioning box: {str(e)}"
+ return False
+
+ @classmethod
+ def insert_file(cls, file_path, slide_index=None, position=None, size=None, autoplay=False):
+ """
+ Inserts a video file into the current or specified slide in the presentation.
+
+ Args:
+ file_path (str): The full path to the video file to be inserted
+ slide_index (int, optional): The index of the slide to insert the video into (1-based indexing).
+ If not provided, inserts into the current slide.
+ position (dict, optional): The position coordinates for the video as percentages of slide dimensions
+ {'x': float, 'y': float}
+ size (dict, optional): The size dimensions for the video as percentages of slide dimensions
+ {'width': float, 'height': float}
+ autoplay (bool, optional): Whether the video should automatically play when the slide is shown
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ expanded_file_path = os.path.expanduser(file_path)
+ if not os.path.exists(expanded_file_path):
+ cls.ret = f"Error: File not found: {expanded_file_path}"
+ return False
+ file_url = uno.systemPathToFileUrl(os.path.abspath(expanded_file_path))
+ pages = cls.doc.getDrawPages()
+ if slide_index is not None:
+ zero_based_index = slide_index - 1
+ if zero_based_index < 0 or zero_based_index >= pages.getCount():
+ cls.ret = f"Error: Invalid slide index: {slide_index}"
+ return False
+ slide = pages.getByIndex(zero_based_index)
+ else:
+ controller = cls.doc.getCurrentController()
+ slide = controller.getCurrentPage()
+ slide_width = 21000
+ slide_height = 12750
+ if position is None:
+ position = {"x": 10, "y": 10}
+ if size is None:
+ size = {"width": 80, "height": 60}
+ x = int(position["x"] * slide_width / 100)
+ y = int(position["y"] * slide_height / 100)
+ width = int(size["width"] * slide_width / 100)
+ height = int(size["height"] * slide_height / 100)
+ media_shape = cls.doc.createInstance("com.sun.star.presentation.MediaShape")
+ slide.add(media_shape)
+ media_shape.setPosition(uno.createUnoStruct("com.sun.star.awt.Point", x, y))
+ media_shape.setSize(uno.createUnoStruct("com.sun.star.awt.Size", width, height))
+ media_shape.setPropertyValue("MediaURL", file_url)
+ if autoplay:
+ try:
+ media_shape.setPropertyValue("MediaIsAutoPlay", True)
+ except:
+ pass
+ cls.ret = f"Video inserted successfully from {expanded_file_path}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting video: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_background(cls, slide_index=None, color=None, image_path=None):
+ """
+ Sets the background color or image for a specific slide or all slides.
+
+ Args:
+ slide_index (int, optional): The index of the slide to modify (1-based indexing).
+ If not provided, applies to all slides.
+ color (str, optional): The background color to apply (e.g., 'red', 'green', 'blue', or hex color code)
+ image_path (str, optional): Path to an image file to use as background. If provided, overrides color.
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ if not color and not image_path:
+ cls.ret = "Error: Either color or image_path must be provided"
+ return False
+ pages = cls.doc.getDrawPages()
+ page_count = pages.getCount()
+ rgb_color = None
+ if color:
+ if color.startswith("#"):
+ color = color.lstrip("#")
+ rgb_color = int(color, 16)
+ else:
+ color_map = {
+ "red": 16711680,
+ "green": 43315,
+ "blue": 255,
+ "black": 0,
+ "white": 16777215,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ }
+ rgb_color = color_map.get(color.lower(), 0)
+ if slide_index is not None:
+ slide_index = slide_index - 1
+ if slide_index < 0 or slide_index >= page_count:
+ cls.ret = f"Error: Slide index {slide_index + 1} is out of range (1-{page_count})"
+ return False
+ slides_to_modify = [pages.getByIndex(slide_index)]
+ else:
+ slides_to_modify = [pages.getByIndex(i) for i in range(page_count)]
+ for slide in slides_to_modify:
+ fill_props = cls.ctx.ServiceManager.createInstanceWithContext(
+ "com.sun.star.drawing.FillProperties", cls.ctx
+ )
+ if image_path and os.path.exists(image_path):
+ abs_path = os.path.abspath(image_path)
+ file_url = uno.systemPathToFileUrl(abs_path)
+ fill_props.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "BITMAP")
+ fill_props.FillBitmapURL = file_url
+ fill_props.FillBitmapMode = uno.Enum("com.sun.star.drawing.BitmapMode", "STRETCH")
+ elif rgb_color is not None:
+ fill_props.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "SOLID")
+ fill_props.FillColor = rgb_color
+ slide.setPropertyValue("Background", fill_props)
+ cls.ret = "Background set successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting background: {str(e)}"
+ return False
+
+ @classmethod
+ def save_as(cls, file_path, overwrite=False):
+ """
+ Saves the current document to a specified location with a given filename.
+
+ :param file_path: The full path where the file should be saved, including the filename and extension
+ :param overwrite: Whether to overwrite the file if it already exists (default: False)
+ :return: True if successful, False otherwise
+ """
+ try:
+ if os.path.exists(file_path) and not overwrite:
+ cls.ret = f"File already exists and overwrite is set to False: {file_path}"
+ return False
+ abs_path = os.path.abspath(file_path)
+ if os.name == "nt":
+ url = "file:///" + abs_path.replace("\\", "/")
+ else:
+ url = "file://" + abs_path
+ properties = []
+ overwrite_prop = PropertyValue()
+ overwrite_prop.Name = "Overwrite"
+ overwrite_prop.Value = overwrite
+ properties.append(overwrite_prop)
+ extension = os.path.splitext(file_path)[1].lower()
+ if extension == ".odp":
+ filter_name = "impress8"
+ elif extension == ".ppt":
+ filter_name = "MS PowerPoint 97"
+ elif extension == ".pptx":
+ filter_name = "Impress MS PowerPoint 2007 XML"
+ elif extension == ".pdf":
+ filter_name = "impress_pdf_Export"
+ else:
+ filter_name = "impress8"
+ filter_prop = PropertyValue()
+ filter_prop.Name = "FilterName"
+ filter_prop.Value = filter_name
+ properties.append(filter_prop)
+ cls.doc.storeAsURL(url, tuple(properties))
+ cls.ret = f"Document saved successfully to {file_path}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error saving document: {str(e)}"
+ return False
+
+ @classmethod
+ def insert_image(cls, slide_index, image_path, width=None, height=None, position=None):
+ """
+ Inserts an image to a specific slide in the presentation.
+
+ Args:
+ slide_index (int): The index of the slide to add the image to (1-based indexing)
+ image_path (str): The full path to the image file to be added
+ width (float, optional): The width of the image in centimeters
+ height (float, optional): The height of the image in centimeters
+ position (dict, optional): The position coordinates for the image as percentages
+ {
+ 'x': float, # The x-coordinate as a percentage of slide width
+ 'y': float # The y-coordinate as a percentage of slide height
+ }
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ if not os.path.exists(image_path):
+ cls.ret = f"Error: Image file not found at {image_path}"
+ return False
+ zero_based_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1 to {slides.getCount()}"
+ return False
+ slide = slides.getByIndex(zero_based_index)
+ bitmap = cls.doc.createInstance("com.sun.star.drawing.BitmapTable")
+ image_url = uno.systemPathToFileUrl(os.path.abspath(image_path))
+ shape = cls.doc.createInstance("com.sun.star.drawing.GraphicObjectShape")
+ shape.setPropertyValue("GraphicURL", image_url)
+ slide.add(shape)
+ x_pos = 0
+ y_pos = 0
+ slide_width = slide.Width
+ slide_height = slide.Height
+ if position:
+ if "x" in position:
+ x_pos = int(position["x"] / 100 * slide_width)
+ if "y" in position:
+ y_pos = int(position["y"] / 100 * slide_height)
+ current_width = shape.Size.Width
+ current_height = shape.Size.Height
+ new_width = int(width * 1000) if width is not None else current_width
+ new_height = int(height * 1000) if height is not None else current_height
+ size = uno.createUnoStruct("com.sun.star.awt.Size")
+ size.Width = new_width
+ size.Height = new_height
+ point = uno.createUnoStruct("com.sun.star.awt.Point")
+ point.X = x_pos
+ point.Y = y_pos
+ shape.Size = size
+ shape.Position = point
+ cls.ret = f"Image inserted successfully on slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting image: {str(e)}"
+ return False
+
+ @classmethod
+ def configure_display_settings(
+ cls, use_presenter_view=None, primary_monitor_only=None, monitor_for_presentation=None
+ ):
+ """
+ Configures the display settings for LibreOffice Impress presentations.
+
+ Args:
+ use_presenter_view (bool, optional): Whether to use presenter view. Set to false to disable presenter view.
+ primary_monitor_only (bool, optional): Whether to use only the primary monitor for the presentation.
+ monitor_for_presentation (int, optional): Specify which monitor to use (1 for primary, 2 for secondary, etc.)
+
+ Returns:
+ bool: True if settings were successfully applied, False otherwise
+ """
+ try:
+ controller = cls.doc.getCurrentController()
+ if not hasattr(controller, "getPropertyValue"):
+ cls.ret = "Error: Not an Impress presentation or controller not available"
+ return False
+ if use_presenter_view is not None:
+ try:
+ controller.setPropertyValue("IsPresentationViewEnabled", use_presenter_view)
+ except Exception as e:
+ cls.ret = f"Warning: Could not set presenter view: {str(e)}"
+ if primary_monitor_only is not None:
+ try:
+ controller.setPropertyValue("UsePrimaryMonitorOnly", primary_monitor_only)
+ except Exception as e:
+ cls.ret = f"Warning: Could not set primary monitor usage: {str(e)}"
+ if monitor_for_presentation is not None:
+ try:
+ controller.setPropertyValue("MonitorForPresentation", monitor_for_presentation - 1)
+ except Exception as e:
+ cls.ret = f"Warning: Could not set presentation monitor: {str(e)}"
+ cls.ret = "Display settings configured successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error configuring display settings: {str(e)}"
+ return False
+
+ @classmethod
+ def set_text_strikethrough(cls, slide_index, box_index, line_numbers, apply):
+ """
+ Applies or removes strike-through formatting to specific text content in a slide.
+
+ Args:
+ slide_index (int): The index of the slide containing the text (1-based indexing)
+ box_index (int): The index of the textbox containing the text (0-based indexing)
+ line_numbers (list): The line numbers to apply strike-through formatting to (1-based indexing)
+ apply (bool): Whether to apply (true) or remove (false) strike-through formatting
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ slides = cls.doc.getDrawPages()
+ slide = slides.getByIndex(slide_index - 1)
+ shape = slide.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = f"Error: Shape at index {box_index} does not contain text"
+ return False
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ text_content = text.getString()
+ lines = text_content.split("\n")
+ for line_number in line_numbers:
+ if 1 <= line_number <= len(lines):
+ start_pos = 0
+ for i in range(line_number - 1):
+ start_pos += len(lines[i]) + 1
+ end_pos = start_pos + len(lines[line_number - 1])
+ cursor.gotoStart(False)
+ cursor.goRight(start_pos, False)
+ cursor.goRight(len(lines[line_number - 1]), True)
+ cursor.CharStrikeout = apply
+ cls.ret = f"Strike-through {'applied' if apply else 'removed'} successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_textbox_alignment(cls, slide_index, box_index, alignment):
+ """
+ Sets the text alignment for the specified textbox on a slide.
+
+ :param slide_index: The index of the slide to modify (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :param alignment: The text alignment to apply ('left', 'center', 'right', or 'justify')
+ :return: True if successful, False otherwise
+ """
+ try:
+ zero_based_slide_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} out of range"
+ return False
+ slide = slides.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Box index {box_index} out of range"
+ return False
+ shape = slide.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = "Error: Selected shape does not support text"
+ return False
+ if alignment == "left":
+ shape.TextHorizontalAdjust = LEFT
+ elif alignment == "center":
+ shape.TextHorizontalAdjust = CENTER
+ elif alignment == "right":
+ shape.TextHorizontalAdjust = RIGHT
+ elif alignment == "justify":
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.ParaAdjust = 3
+ else:
+ cls.ret = f"Error: Invalid alignment value: {alignment}"
+ return False
+ cls.ret = f"Successfully set text alignment to {alignment} for textbox {box_index} on slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_number_properties(
+ cls, color=None, font_size=None, visible=None, position=None, apply_to="all", slide_indices=None
+ ):
+ """
+ Modifies the properties of slide numbers in the presentation.
+
+ Args:
+ color (str, optional): The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+ font_size (float, optional): The font size for slide numbers (in points)
+ visible (bool, optional): Whether slide numbers should be visible or hidden
+ position (str, optional): The position of slide numbers ('bottom-left', 'bottom-center', 'bottom-right',
+ 'top-left', 'top-center', 'top-right')
+ apply_to (str, optional): Whether to apply changes to 'all', 'current', or 'selected' slides
+ slide_indices (list, optional): Indices of specific slides to change (1-based indexing)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ draw_pages = cls.doc.getDrawPages()
+ master_pages = cls.doc.getMasterPages()
+ pages_to_modify = []
+ if apply_to == "all":
+ for i in range(draw_pages.getCount()):
+ pages_to_modify.append(draw_pages.getByIndex(i))
+ elif apply_to == "current":
+ current_page = cls.doc.getCurrentController().getCurrentPage()
+ pages_to_modify.append(current_page)
+ elif apply_to == "selected" and slide_indices:
+ for idx in slide_indices:
+ if 1 <= idx <= draw_pages.getCount():
+ pages_to_modify.append(draw_pages.getByIndex(idx - 1))
+ for i in range(master_pages.getCount()):
+ master_page = master_pages.getByIndex(i)
+ page_number_shape = None
+ for j in range(master_page.getCount()):
+ shape = master_page.getByIndex(j)
+ if hasattr(shape, "TextType"):
+ try:
+ if shape.TextType == 5:
+ page_number_shape = shape
+ break
+ except:
+ pass
+ if hasattr(shape, "getText"):
+ try:
+ text = shape.getText()
+ if text and text.getTextFields().getCount() > 0:
+ fields = text.getTextFields().createEnumeration()
+ while fields.hasMoreElements():
+ field = fields.nextElement()
+ if "PageNumber" in field.getImplementationName():
+ page_number_shape = shape
+ break
+ if page_number_shape:
+ break
+ except:
+ pass
+ if page_number_shape:
+ if color is not None:
+ color_int = 0
+ if color.startswith("#"):
+ color_int = int(color[1:], 16)
+ elif color == "red":
+ color_int = 16711680
+ elif color == "green":
+ color_int = 65280
+ elif color == "blue":
+ color_int = 255
+ elif color == "black":
+ color_int = 0
+ text = page_number_shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharColor = color_int
+ if font_size is not None:
+ text = page_number_shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharHeight = font_size
+ if position is not None:
+ page_width = master_page.Width
+ page_height = master_page.Height
+ width = page_number_shape.Size.Width
+ height = page_number_shape.Size.Height
+ new_x = 0
+ new_y = 0
+ if position.startswith("bottom"):
+ new_y = page_height - height - 100
+ elif position.startswith("top"):
+ new_y = 100
+ if position.endswith("left"):
+ new_x = 100
+ elif position.endswith("center"):
+ new_x = (page_width - width) / 2
+ elif position.endswith("right"):
+ new_x = page_width - width - 100
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", new_x, new_y)
+ if position.endswith("left"):
+ page_number_shape.ParaAdjust = LEFT
+ elif position.endswith("center"):
+ page_number_shape.ParaAdjust = CENTER
+ elif position.endswith("right"):
+ page_number_shape.ParaAdjust = RIGHT
+ if visible is not None:
+ try:
+ page_number_shape.Visible = visible
+ except:
+ if not visible:
+ page_number_shape.Size = uno.createUnoStruct("com.sun.star.awt.Size", 1, 1)
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", -1000, -1000)
+ elif (
+ visible is True
+ or visible is None
+ and (color is not None or font_size is not None or position is not None)
+ ):
+ page_number_shape = cls.doc.createInstance("com.sun.star.drawing.TextShape")
+ master_page.add(page_number_shape)
+ default_width = 2000
+ default_height = 400
+ page_number_shape.Size = uno.createUnoStruct("com.sun.star.awt.Size", default_width, default_height)
+ page_width = master_page.Width
+ page_height = master_page.Height
+ pos_x = page_width - default_width - 100
+ pos_y = page_height - default_height - 100
+ if position is not None:
+ if position.startswith("bottom"):
+ pos_y = page_height - default_height - 100
+ elif position.startswith("top"):
+ pos_y = 100
+ if position.endswith("left"):
+ pos_x = 100
+ page_number_shape.ParaAdjust = LEFT
+ elif position.endswith("center"):
+ pos_x = (page_width - default_width) / 2
+ page_number_shape.ParaAdjust = CENTER
+ elif position.endswith("right"):
+ pos_x = page_width - default_width - 100
+ page_number_shape.ParaAdjust = RIGHT
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", pos_x, pos_y)
+ text = page_number_shape.getText()
+ cursor = text.createTextCursor()
+ try:
+ page_field = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ text.insertTextContent(cursor, page_field, False)
+ except:
+ text.setString("<#>")
+ if color is not None:
+ color_int = 0
+ if color.startswith("#"):
+ color_int = int(color[1:], 16)
+ elif color == "red":
+ color_int = 16711680
+ elif color == "green":
+ color_int = 65280
+ elif color == "blue":
+ color_int = 255
+ elif color == "black":
+ color_int = 0
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharColor = color_int
+ if font_size is not None:
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharHeight = font_size
+ if visible is not None:
+ try:
+ page_number_shape.Visible = visible
+ except:
+ if not visible:
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", -1000, -1000)
+ else:
+ try:
+ page_number_shape.Visible = True
+ except:
+ pass
+ try:
+ controller = cls.doc.getCurrentController()
+ view_data = controller.getViewData()
+ controller.restoreViewData(view_data)
+ except:
+ pass
+ cls.ret = "Slide number properties updated successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting slide number properties: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_number(cls, color=None, font_size=None, visible=None, position=None):
+ """
+ Sets the slide number in the presentation.
+
+ :param color: The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+ :param font_size: The font size for slide numbers (in points)
+ :param visible: Whether slide numbers should be visible or hidden
+ :param position: The position of slide numbers on the slides (bottom-left, bottom-center, bottom-right, top-left, top-center, top-right)
+ :return: True if successful, False otherwise
+ """
+ try:
+ controller = cls.doc.getCurrentController()
+ dispatcher = cls.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", cls.ctx)
+ if visible is False:
+ pages = cls.doc.getDrawPages()
+ for i in range(pages.getCount()):
+ page = pages.getByIndex(i)
+ for j in range(page.getCount()):
+ try:
+ shape = page.getByIndex(j)
+ if hasattr(shape, "Presentation") and shape.Presentation == "Number":
+ page.remove(shape)
+ except:
+ pass
+ master_pages = cls.doc.getMasterPages()
+ for i in range(master_pages.getCount()):
+ master_page = master_pages.getByIndex(i)
+ for j in range(master_page.getCount()):
+ try:
+ shape = master_page.getByIndex(j)
+ if hasattr(shape, "Presentation") and shape.Presentation == "Number":
+ master_page.remove(shape)
+ except:
+ pass
+ cls.ret = "Slide numbers hidden successfully"
+ return True
+ if visible is True or color is not None or font_size is not None or position is not None:
+ current_slide = controller.getCurrentPage()
+ master_pages = cls.doc.getMasterPages()
+ if master_pages.getCount() == 0:
+ cls.ret = "No master pages found"
+ return False
+ master_page = master_pages.getByIndex(0)
+ slide_number_shape = cls.doc.createInstance("com.sun.star.drawing.TextShape")
+ slide_number_shape.setSize(uno.createUnoStruct("com.sun.star.awt.Size", 2000, 500))
+ pos = position or "bottom-right"
+ page_width = master_page.Width
+ page_height = master_page.Height
+ x, y = 0, 0
+ if "bottom" in pos:
+ y = page_height - 1000
+ elif "top" in pos:
+ y = 500
+ if "left" in pos:
+ x = 500
+ elif "center" in pos:
+ x = (page_width - 2000) / 2
+ elif "right" in pos:
+ x = page_width - 2500
+ slide_number_shape.setPosition(uno.createUnoStruct("com.sun.star.awt.Point", x, y))
+ master_page.add(slide_number_shape)
+ text = slide_number_shape.getText()
+ cursor = text.createTextCursor()
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ text.insertTextContent(cursor, page_number, False)
+ if "center" in pos:
+ slide_number_shape.setPropertyValue("TextHorizontalAdjust", CENTER)
+ elif "right" in pos:
+ slide_number_shape.setPropertyValue("TextHorizontalAdjust", RIGHT)
+ elif "left" in pos:
+ slide_number_shape.setPropertyValue("TextHorizontalAdjust", LEFT)
+ if font_size is not None:
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setPropertyValue("CharHeight", font_size)
+ if color is not None:
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ if color.startswith("#") and len(color) == 7:
+ r = int(color[1:3], 16)
+ g = int(color[3:5], 16)
+ b = int(color[5:7], 16)
+ cursor.setPropertyValue("CharColor", (r << 16) + (g << 8) + b)
+ else:
+ color_map = {
+ "red": 16711680,
+ "green": 65280,
+ "blue": 255,
+ "black": 0,
+ "white": 16777215,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ }
+ if color.lower() in color_map:
+ cursor.setPropertyValue("CharColor", color_map[color.lower()])
+ cls.ret = "Slide numbers added and configured successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting slide number: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_number_color(cls, color):
+ """
+ Sets the color of the slide number in the presentation.
+
+ Args:
+ color (str): The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ color_map = {
+ "black": 0,
+ "white": 16777215,
+ "red": 16711680,
+ "green": 65280,
+ "blue": 255,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ "orange": 16753920,
+ "purple": 8388736,
+ }
+ if color.lower() in color_map:
+ rgb_color = color_map[color.lower()]
+ else:
+ if color.startswith("#"):
+ color = color[1:]
+ try:
+ if len(color) == 6:
+ rgb_color = int(color, 16)
+ else:
+ rgb_color = 0
+ except ValueError:
+ rgb_color = 0
+ found = False
+ master_pages = cls.doc.getMasterPages()
+ for i in range(master_pages.getCount()):
+ master_page = master_pages.getByIndex(i)
+ for j in range(master_page.getCount()):
+ shape = master_page.getByIndex(j)
+ if hasattr(shape, "getText") and shape.getText() is not None:
+ text = shape.getText()
+ try:
+ enum = text.createEnumeration()
+ while enum.hasMoreElements():
+ para = enum.nextElement()
+ if hasattr(para, "createEnumeration"):
+ para_enum = para.createEnumeration()
+ while para_enum.hasMoreElements():
+ portion = para_enum.nextElement()
+ if (
+ hasattr(portion, "TextPortionType")
+ and portion.TextPortionType == "TextField"
+ ):
+ if hasattr(portion, "TextField") and portion.TextField is not None:
+ field = portion.TextField
+ if hasattr(field, "supportsService") and (
+ field.supportsService(
+ "com.sun.star.presentation.TextField.PageNumber"
+ )
+ or field.supportsService("com.sun.star.text.TextField.PageNumber")
+ ):
+ portion.CharColor = rgb_color
+ found = True
+ except Exception as e:
+ continue
+ draw_pages = cls.doc.getDrawPages()
+ for i in range(draw_pages.getCount()):
+ page = draw_pages.getByIndex(i)
+ for j in range(page.getCount()):
+ shape = page.getByIndex(j)
+ if hasattr(shape, "getText") and shape.getText() is not None:
+ text = shape.getText()
+ try:
+ enum = text.createEnumeration()
+ while enum.hasMoreElements():
+ para = enum.nextElement()
+ if hasattr(para, "createEnumeration"):
+ para_enum = para.createEnumeration()
+ while para_enum.hasMoreElements():
+ portion = para_enum.nextElement()
+ if (
+ hasattr(portion, "TextPortionType")
+ and portion.TextPortionType == "TextField"
+ ):
+ if hasattr(portion, "TextField") and portion.TextField is not None:
+ field = portion.TextField
+ if hasattr(field, "supportsService") and (
+ field.supportsService(
+ "com.sun.star.presentation.TextField.PageNumber"
+ )
+ or field.supportsService("com.sun.star.text.TextField.PageNumber")
+ ):
+ portion.CharColor = rgb_color
+ found = True
+ except Exception as e:
+ continue
+ for i in range(draw_pages.getCount()):
+ page = draw_pages.getByIndex(i)
+ for j in range(page.getCount()):
+ shape = page.getByIndex(j)
+ if hasattr(shape, "getText") and shape.getText() is not None:
+ text = shape.getText()
+ text_string = text.getString()
+ if text_string.isdigit() and len(text_string) <= 3:
+ try:
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharColor = rgb_color
+ found = True
+ except Exception as e:
+ continue
+ if found:
+ cls.ret = f"Slide number color set to {color}"
+ return True
+ else:
+ cls.ret = "Could not find slide numbers to change color"
+ return False
+ except Exception as e:
+ cls.ret = f"Error setting slide number color: {str(e)}"
+ return False
+
+ @classmethod
+ def export_to_image(cls, file_path, format, slide_index=None):
+ """
+ Exports the current presentation or a specific slide to an image file format.
+
+ Args:
+ file_path (str): The full path where the image file should be saved, including the filename and extension
+ format (str): The image format to export to (e.g., 'png', 'jpeg', 'gif')
+ slide_index (int, optional): The index of the specific slide to export (1-based indexing).
+ If not provided, exports the entire presentation as a series of images.
+
+ Returns:
+ bool: True if export was successful, False otherwise
+ """
+ try:
+ format = format.lower()
+ valid_formats = ["png", "jpeg", "jpg", "gif", "bmp", "tiff"]
+ if format not in valid_formats:
+ cls.ret = f"Error: Invalid format '{format}'. Valid formats are: {', '.join(valid_formats)}"
+ return False
+ if format == "jpg":
+ format = "jpeg"
+ pages = cls.doc.getDrawPages()
+ page_count = pages.getCount()
+ if slide_index is not None:
+ slide_index = slide_index - 1
+ if slide_index < 0 or slide_index >= page_count:
+ cls.ret = f"Error: Invalid slide index {slide_index + 1}. Valid range is 1 to {page_count}"
+ return False
+ controller = cls.doc.getCurrentController()
+ filter_name = f"draw_{format}_Export"
+ filter_data = PropertyValue(Name="FilterData", Value=())
+ if slide_index is not None:
+ controller.setCurrentPage(pages.getByIndex(slide_index))
+ props = PropertyValue(Name="FilterName", Value=filter_name), filter_data
+ cls.doc.storeToURL(uno.systemPathToFileUrl(file_path), props)
+ cls.ret = f"Successfully exported slide {slide_index + 1} to {file_path}"
+ return True
+ else:
+ base_name, ext = os.path.splitext(file_path)
+ for i in range(page_count):
+ controller.setCurrentPage(pages.getByIndex(i))
+ if page_count == 1:
+ current_file = f"{base_name}.{format}"
+ else:
+ current_file = f"{base_name}_{i + 1}.{format}"
+ props = PropertyValue(Name="FilterName", Value=filter_name), filter_data
+ cls.doc.storeToURL(uno.systemPathToFileUrl(current_file), props)
+
+ if page_count == 1:
+ cls.ret = f"Successfully exported {page_count} slides to {base_name}.{format}"
+ else:
+ cls.ret = f"Successfully exported {page_count} slides to {base_name}_[1-{page_count}].{format}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error exporting to image: {str(e)}"
+ return False
diff --git a/mm_agents/autoglm/tools/package/libreoffice_writer.py b/mm_agents/autoglm/tools/package/libreoffice_writer.py
new file mode 100644
index 0000000..35095c8
--- /dev/null
+++ b/mm_agents/autoglm/tools/package/libreoffice_writer.py
@@ -0,0 +1,753 @@
+import os
+import re
+
+import uno
+from com.sun.star.awt.FontSlant import ITALIC, NONE, OBLIQUE
+from com.sun.star.awt.FontWeight import BOLD, NORMAL
+from com.sun.star.beans import PropertyValue
+from com.sun.star.style.ParagraphAdjust import CENTER, LEFT, RIGHT
+from com.sun.star.text.ControlCharacter import PARAGRAPH_BREAK
+from com.sun.star.text.TextContentAnchorType import AS_CHARACTER
+
+
+class WriterTools:
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
+ desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
+ doc = desktop.getCurrentComponent()
+ text = doc.Text
+ cursor = text.createTextCursor()
+ ret = ""
+
+ @classmethod
+ def close_other_window(cls):
+ """关闭除当前文档外的所有文档"""
+ components = cls.desktop.getComponents().createEnumeration()
+ current_url = cls.doc.getURL()
+ while components.hasMoreElements():
+ doc = components.nextElement()
+ if doc.getURL() != current_url:
+ doc.close(True)
+
+ @classmethod
+ def save(cls):
+ """保存文档到当前位置"""
+ try:
+ if cls.doc.hasLocation():
+ cls.doc.store()
+ else:
+ raise Exception("文档没有保存位置,请使用另存为功能")
+ return True
+ except Exception as e:
+ return False
+
+ @classmethod
+ def maximize_window(cls):
+ """
+ 将窗口设置为工作区最大尺寸
+ 使用工作区域大小(考虑任务栏等)
+ """
+ window = cls.doc.getCurrentController().getFrame().getContainerWindow()
+ toolkit = window.getToolkit()
+ device = toolkit.createScreenCompatibleDevice(0, 0)
+ workarea = toolkit.getWorkArea()
+ window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15)
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def write_text(cls, text, bold=False, italic=False, size=None):
+ """写入文本"""
+ cls.cursor.CharWeight = 150 if bold else 100
+ cls.cursor.CharPosture = ITALIC if italic else NONE
+ if size:
+ cls.cursor.CharHeight = size
+ cls.text.insertString(cls.cursor, text, False)
+ cls.ret = "Success"
+
+ @classmethod
+ def get_paragraphs(cls, start_index=0, count=None):
+ """Retrieves paragraphs from the document as a list."""
+ text = cls.doc.getText()
+ paragraphs = text.createEnumeration()
+ paragraph_list = []
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ paragraph_list.append(paragraph.getString())
+ if start_index < 0:
+ start_index = 0
+ elif start_index >= len(paragraph_list):
+ cls.ret = []
+ if count is not None:
+ end_index = min(start_index + count, len(paragraph_list))
+ cls.ret = paragraph_list[start_index:end_index]
+ else:
+ cls.ret = paragraph_list[start_index:]
+ return cls.ret
+
+ @classmethod
+ def env_info(cls):
+ paras = cls.get_paragraphs()
+ para_str = ""
+ for i, para in enumerate(paras):
+ para = para[:500] + "..." if len(para) > 500 else para
+ para_str += "Paragraph " + str(i) + ": " + para.strip() + "\n"
+ cls.ret = para_str
+ return cls.ret
+
+ @classmethod
+ def set_color(cls, pattern, color, paragraph_indices=None):
+ """
+ Changes the color of matched text in the document for specified paragraphs.
+
+ Args:
+ pattern (str): Regular expression pattern to match text
+ color (int): Hex color code (e.g., 0x000000 for black)
+ paragraph_indices (list, optional): List of paragraph indices to modify (0-based).
+ If None, applies to all paragraphs.
+ """
+ try:
+ enum = cls.doc.Text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraphs.append(enum.nextElement())
+ if not paragraph_indices:
+ paragraphs_to_process = range(len(paragraphs))
+ else:
+ paragraphs_to_process = paragraph_indices
+ regex = re.compile(pattern)
+ for idx in paragraphs_to_process:
+ if idx < 0 or idx >= len(paragraphs):
+ continue
+ paragraph = paragraphs[idx]
+ if not paragraph.supportsService("com.sun.star.text.Paragraph"):
+ continue
+ para_text = paragraph.getString()
+ matches = regex.finditer(para_text)
+ for match in matches:
+ para_cursor = cls.text.createTextCursorByRange(paragraph.getStart())
+ para_cursor.goRight(match.start(), False)
+ para_cursor.goRight(match.end() - match.start(), True)
+ para_cursor.CharColor = color
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def find_and_replace(cls, pattern, replacement, paragraph_indices=None):
+ """
+ Finds all occurrences of a specified text pattern and replaces them with another text in the document.
+
+ Args:
+ pattern (str): The pattern to match in the document, should be a regular expression
+ replacement (str): The text to replace the found text with
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing)
+
+ Returns:
+ str: Success message with number of replacements made
+ """
+ try:
+ enum = cls.doc.Text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraphs.append(enum.nextElement())
+ total_replacements = 0
+ if not paragraph_indices:
+ paragraphs_to_process = list(range(len(paragraphs)))
+ else:
+ paragraphs_to_process = [i for i in paragraph_indices if 0 <= i < len(paragraphs)]
+ regex = re.compile(pattern)
+ for idx in paragraphs_to_process:
+ if idx >= len(paragraphs):
+ continue
+ paragraph = paragraphs[idx]
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ text_content = paragraph.getString()
+ new_text, count = regex.subn(replacement, text_content)
+ if count > 0:
+ paragraph.setString(new_text)
+ total_replacements += count
+ cls.ret = f"Successfully made {total_replacements} replacements"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error during find and replace: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_font(cls, font_name, paragraph_indices=None):
+ """
+ Changes the font of text in the document or specified paragraphs.
+
+ Args:
+ font_name (str): The name of the font to apply (e.g., 'Times New Roman', 'Arial', 'Calibri')
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+ """
+ try:
+ text = cls.doc.getText()
+ enum = text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraphs.append(enum.nextElement())
+ if not paragraph_indices:
+ paragraph_indices = range(len(paragraphs))
+ for idx in paragraph_indices:
+ if 0 <= idx < len(paragraphs):
+ paragraph = paragraphs[idx]
+ cursor = text.createTextCursorByRange(paragraph)
+ cursor.CharFontName = font_name
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_line_spacing(cls, spacing_value, paragraph_indices=None):
+ """
+ Sets the line spacing for specified paragraphs in the document.
+
+ Args:
+ spacing_value (float): The line spacing value to apply (1.0 for single spacing, 2.0 for double spacing, etc.)
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+ """
+ try:
+ text = cls.doc.getText()
+ paragraph_enum = text.createEnumeration()
+ line_spacing_value = int(spacing_value * 100)
+ current_index = 0
+
+ while paragraph_enum.hasMoreElements():
+ paragraph = paragraph_enum.nextElement()
+
+ if not paragraph_indices or current_index in paragraph_indices:
+ line_spacing = uno.createUnoStruct("com.sun.star.style.LineSpacing")
+ line_spacing.Mode = 0
+ line_spacing.Height = line_spacing_value
+ paragraph.ParaLineSpacing = line_spacing
+
+ if paragraph.String.strip():
+ current_index += 1
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def remove_highlighting(cls, paragraph_indices=None):
+ """
+ Removes ALL highlighting from text in the document for specified paragraphs.
+
+ Args:
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ text = cls.doc.getText()
+ paragraphs = text.createEnumeration()
+ target_indices = set(paragraph_indices) if paragraph_indices else None
+ current_index = 0
+
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if target_indices is None or current_index in target_indices:
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ para_cursor = text.createTextCursorByRange(paragraph)
+ # Remove all highlighting by setting back color to -1
+ para_cursor.CharBackColor = -1
+
+ # Additional cleanup for individual text portions (optional)
+ text_portions = paragraph.createEnumeration()
+ while text_portions.hasMoreElements():
+ text_portion = text_portions.nextElement()
+ if hasattr(text_portion, "CharBackColor"):
+ portion_cursor = text.createTextCursorByRange(text_portion)
+ portion_cursor.CharBackColor = -1
+ current_index += 1
+
+ cls.ret = "Successfully removed all highlighting"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error removing highlighting: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def find_highlighted_text(cls, highlight_color):
+ """
+ Finds all text in the document that has a specific highlight color applied to it.
+
+ Args:
+ highlight_color (str): The highlight color to search for. Can be a color name (e.g., 'yellow', 'green') or hex code.
+
+ Returns:
+ list: A list of strings containing all text segments with the specified highlight color.
+ """
+ color_map = {
+ "yellow": 16776960,
+ "green": 65280,
+ "blue": 255,
+ "red": 16711680,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "black": 0,
+ "white": 16777215,
+ "gray": 8421504,
+ "lightgray": 12632256,
+ }
+ target_color = None
+ if highlight_color.lower() in color_map:
+ target_color = color_map[highlight_color.lower()]
+ elif highlight_color.startswith("#") and len(highlight_color) == 7:
+ try:
+ hex_color = highlight_color[1:]
+ r = int(hex_color[0:2], 16)
+ g = int(hex_color[2:4], 16)
+ b = int(hex_color[4:6], 16)
+ target_color = (r << 16) + (g << 8) + b
+ except ValueError:
+ cls.ret = f"Invalid hex color format: {highlight_color}"
+ return []
+ else:
+ cls.ret = f"Unsupported color format: {highlight_color}"
+ return []
+ highlighted_text = []
+ text = cls.doc.getText()
+ enum_paragraphs = text.createEnumeration()
+ while enum_paragraphs.hasMoreElements():
+ paragraph = enum_paragraphs.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ enum_portions = paragraph.createEnumeration()
+ while enum_portions.hasMoreElements():
+ text_portion = enum_portions.nextElement()
+ if hasattr(text_portion, "CharBackColor") and text_portion.CharBackColor == target_color:
+ if text_portion.getString().strip():
+ highlighted_text.append(text_portion.getString())
+ cls.ret = f"Found {len(highlighted_text)} text segments with highlight color {highlight_color}"
+ return highlighted_text
+
+ @classmethod
+ def insert_formula_at_cursor(cls, formula):
+ """
+ Inserts a formula at the current cursor position in the document.
+
+ Args:
+ formula (str): The formula to insert at the current cursor position.
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ embedded_obj = cls.doc.createInstance("com.sun.star.text.TextEmbeddedObject")
+ embedded_obj.setPropertyValue("CLSID", "078B7ABA-54FC-457F-8551-6147e776a997")
+ embedded_obj.setPropertyValue("AnchorType", AS_CHARACTER)
+ cls.text.insertTextContent(cls.cursor, embedded_obj, False)
+ math_obj = embedded_obj.getEmbeddedObject()
+ math_obj.Formula = formula
+ cls.ret = "Formula inserted successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting formula: {str(e)}"
+ return False
+
+ @classmethod
+ def insert_image_at_cursor(cls, image_path, width=None, height=None):
+ """
+ Inserts an image at the current cursor position in the document.
+
+ Args:
+ image_path (str): Full path to the image file to insert
+ width (int, optional): Width to display the image in pixels
+ height (int, optional): Height to display the image in pixels
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ if image_path.startswith("~"):
+ image_path = os.path.expanduser(image_path)
+ if not os.path.exists(image_path):
+ cls.ret = f"Error: Image file not found at {image_path}"
+ return cls.ret
+ image_path = os.path.abspath(image_path)
+ if os.name == "nt":
+ file_url = "file:///" + image_path.replace("\\", "/")
+ else:
+ file_url = "file://" + image_path
+ graphic = cls.doc.createInstance("com.sun.star.text.GraphicObject")
+ graphic.GraphicURL = file_url
+ graphic.AnchorType = AS_CHARACTER
+ if width is not None:
+ graphic.Width = width * 100
+ if height is not None:
+ graphic.Height = height * 100
+ cls.text.insertTextContent(cls.cursor, graphic, False)
+ cls.ret = "Success: Image inserted"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_strikethrough(cls, pattern, paragraph_indices=None):
+ """
+ Sets the strikethrough formatting for text matching the specified pattern in the document.
+
+ Args:
+ pattern (str): The regular expression pattern to match in the document
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error information
+ """
+ try:
+ paragraphs = cls.doc.getText().createEnumeration()
+ para_index = 0
+ found_matches = 0
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ if paragraph_indices and para_index not in paragraph_indices:
+ para_index += 1
+ continue
+ para_text = paragraph.getString()
+ matches = list(re.finditer(pattern, para_text))
+ for match in matches:
+ text_range = paragraph.getStart()
+ cursor = cls.doc.getText().createTextCursorByRange(text_range)
+ cursor.goRight(match.start(), False)
+ cursor.goRight(match.end() - match.start(), True)
+ cursor.CharStrikeout = 1
+ found_matches += 1
+ para_index += 1
+ cls.ret = f"Successfully applied strikethrough to {found_matches} matches of pattern: {pattern}"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error applying strikethrough: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_font_size(cls, font_size, pattern, paragraph_indices=None):
+ """
+ Changes the font size of specified text in the document.
+
+ Args:
+ font_size (float): The font size to apply (in points).
+ pattern (str): The pattern to match in the document, should be a regular expression.
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Result message indicating success or failure.
+ """
+ try:
+ regex = re.compile(pattern)
+ paragraphs = cls.doc.getText().createEnumeration()
+ current_index = 0
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if paragraph_indices and current_index not in paragraph_indices:
+ current_index += 1
+ continue
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ para_cursor = cls.text.createTextCursorByRange(paragraph)
+ para_text = paragraph.getString()
+ matches = list(regex.finditer(para_text))
+ for match in reversed(matches):
+ start_pos = match.start()
+ end_pos = match.end()
+ para_cursor.gotoStart(False)
+ para_cursor.goRight(start_pos, False)
+ para_cursor.goRight(end_pos - start_pos, True)
+ para_cursor.CharHeight = font_size
+ current_index += 1
+ cls.ret = f"Successfully changed font size to {font_size} for text matching '{pattern}'"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error changing font size: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def export_to_pdf(cls, output_path=None, output_filename=None, include_comments=False, quality="standard"):
+ """
+ Exports the current document to PDF format.
+
+ Args:
+ output_path (str, optional): The full path where the PDF should be saved.
+ If not provided, uses the same location as the original document.
+ output_filename (str, optional): The filename to use for the PDF.
+ If not provided, uses the original document's filename with .pdf extension.
+ include_comments (bool, optional): Whether to include comments in the exported PDF.
+ Defaults to False.
+ quality (str, optional): The quality of the PDF export ('standard', 'high', 'print').
+ Defaults to 'standard'.
+
+ Returns:
+ str: Path to the exported PDF file or error message
+ """
+ try:
+ doc_url = cls.doc.getURL()
+ if not doc_url and not output_path:
+ return "Error: Document has not been saved and no output path provided"
+ if doc_url:
+ doc_path = uno.fileUrlToSystemPath(os.path.dirname(doc_url))
+ doc_filename = os.path.basename(doc_url)
+ doc_name = os.path.splitext(doc_filename)[0]
+ else:
+ doc_path = ""
+ doc_name = "export"
+ final_path = output_path if output_path else doc_path
+ final_filename = output_filename if output_filename else f"{doc_name}.pdf"
+ if not final_filename.lower().endswith(".pdf"):
+ final_filename += ".pdf"
+ full_output_path = os.path.join(final_path, final_filename)
+ output_url = uno.systemPathToFileUrl(full_output_path)
+ export_props = []
+ if quality == "high":
+ export_props.append(PropertyValue(Name="SelectPdfVersion", Value=1))
+ elif quality == "print":
+ export_props.append(PropertyValue(Name="SelectPdfVersion", Value=2))
+ else:
+ export_props.append(PropertyValue(Name="SelectPdfVersion", Value=0))
+ export_props.append(PropertyValue(Name="ExportNotes", Value=include_comments))
+ export_props.extend(
+ [
+ PropertyValue(Name="FilterName", Value="writer_pdf_Export"),
+ PropertyValue(Name="Overwrite", Value=True),
+ ]
+ )
+ cls.doc.storeToURL(output_url, tuple(export_props))
+ cls.ret = f"PDF exported to: {full_output_path}"
+ return full_output_path
+ except Exception as e:
+ cls.ret = f"Error exporting to PDF: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_paragraph_alignment(cls, alignment, paragraph_indices=None):
+ """
+ Sets the text alignment for specified paragraphs in the document.
+
+ Args:
+ alignment (str): The alignment to apply ('left', 'center', 'right', 'justify').
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ alignment_map = {"left": LEFT, "center": CENTER, "right": RIGHT, "justify": 3}
+ if alignment.lower() not in alignment_map:
+ cls.ret = f"Error: Invalid alignment '{alignment}'. Use 'left', 'center', 'right', or 'justify'."
+ return cls.ret
+ alignment_value = alignment_map[alignment.lower()]
+ text = cls.doc.getText()
+ paragraph_enum = text.createEnumeration()
+ paragraphs = []
+ while paragraph_enum.hasMoreElements():
+ paragraph = paragraph_enum.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ paragraphs.append(paragraph)
+ if paragraph_indices:
+ valid_indices = [i for i in paragraph_indices if 0 <= i < len(paragraphs)]
+ if len(valid_indices) != len(paragraph_indices):
+ cls.ret = f"Warning: Some paragraph indices were out of range (0-{len(paragraphs) - 1})"
+ for idx in valid_indices:
+ paragraphs[idx].ParaAdjust = alignment_value
+ else:
+ for paragraph in paragraphs:
+ paragraph.ParaAdjust = alignment_value
+ cls.ret = f"Successfully applied '{alignment}' alignment to paragraphs"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error setting paragraph alignment: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def capitalize_words(cls, paragraph_indices=None):
+ """
+ Capitalizes the first letter of each word for specified paragraphs in the document.
+
+ Args:
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ text = cls.doc.getText()
+ enum = text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraph = enum.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ paragraphs.append(paragraph)
+ if not paragraph_indices:
+ target_paragraphs = list(range(len(paragraphs)))
+ else:
+ target_paragraphs = paragraph_indices
+ valid_indices = [idx for idx in target_paragraphs if 0 <= idx < len(paragraphs)]
+ for idx in valid_indices:
+ paragraph = paragraphs[idx]
+ text_content = paragraph.getString()
+ if not text_content.strip():
+ continue
+ capitalized_text = " ".join(word.capitalize() if word else "" for word in text_content.split(" "))
+ para_cursor = text.createTextCursorByRange(paragraph.getStart())
+ para_cursor.gotoRange(paragraph.getEnd(), True)
+ para_cursor.setString(capitalized_text)
+ cls.ret = f"Successfully capitalized words in {len(valid_indices)} paragraphs"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error capitalizing words: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_default_font(cls, font_name, font_size=None):
+ """
+ Sets the default font for new text in the document without changing existing text.
+
+ Args:
+ font_name (str): The name of the font to set as default (e.g., 'Times New Roman', 'Arial', 'Calibri')
+ font_size (float, optional): The default font size in points.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ style_families = cls.doc.getStyleFamilies()
+ paragraph_styles = style_families.getByName("ParagraphStyles")
+ default_style_names = ["Default", "Standard", "Normal"]
+ standard_style = None
+ for style_name in default_style_names:
+ if paragraph_styles.hasByName(style_name):
+ standard_style = paragraph_styles.getByName(style_name)
+ break
+ if standard_style is None:
+ style_names = paragraph_styles.getElementNames()
+ if style_names:
+ standard_style = paragraph_styles.getByName(style_names[0])
+ else:
+ raise Exception("Could not find default paragraph style")
+ standard_style.setPropertyValue("CharFontName", font_name)
+ standard_style.setPropertyValue("CharFontNameAsian", font_name)
+ standard_style.setPropertyValue("CharFontNameComplex", font_name)
+ if font_size is not None:
+ standard_style.setPropertyValue("CharHeight", float(font_size))
+ standard_style.setPropertyValue("CharHeightAsian", float(font_size))
+ standard_style.setPropertyValue("CharHeightComplex", float(font_size))
+ cls.cursor.setPropertyValue("CharFontName", font_name)
+ cls.cursor.setPropertyValue("CharFontNameAsian", font_name)
+ cls.cursor.setPropertyValue("CharFontNameComplex", font_name)
+ if font_size is not None:
+ cls.cursor.setPropertyValue("CharHeight", float(font_size))
+ cls.cursor.setPropertyValue("CharHeightAsian", float(font_size))
+ cls.cursor.setPropertyValue("CharHeightComplex", float(font_size))
+ cls.ret = f"Default font set to '{font_name}'" + (f" with size {font_size}pt" if font_size else "")
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error setting default font: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def add_page_numbers(cls, position, start_number=1, format=None):
+ """
+ Adds page numbers to the document at the specified position.
+
+ Args:
+ position (str): Position of the page numbers ('bottom_left', 'bottom_center', 'bottom_right',
+ 'top_left', 'top_center', 'top_right')
+ start_number (int, optional): The starting page number. Defaults to 1.
+ format (str, optional): Format of the page numbers (e.g., '1', 'Page 1', '1 of N').
+ Defaults to simple number format.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ page_styles = cls.doc.StyleFamilies.getByName("PageStyles")
+ default_style = page_styles.getByName("Standard")
+ try:
+ default_style.setPropertyValue("PageNumberOffset", start_number)
+ except:
+ pass
+ if position.startswith("top"):
+ default_style.HeaderIsOn = True
+ target = default_style.HeaderText
+ else:
+ default_style.FooterIsOn = True
+ target = default_style.FooterText
+ cursor = target.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setString("")
+ cursor.gotoStart(False)
+ if position.endswith("_left"):
+ cursor.ParaAdjust = LEFT
+ elif position.endswith("_center"):
+ cursor.ParaAdjust = CENTER
+ elif position.endswith("_right"):
+ cursor.ParaAdjust = RIGHT
+ if not format or format == "1":
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ elif format == "Page 1" or "Page" in format and "of" not in format:
+ target.insertString(cursor, "Page ", False)
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ elif format == "1 of N" or format == "Page {page} of {total}" or "of" in format:
+ if "Page" in format:
+ target.insertString(cursor, "Page ", False)
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ target.insertString(cursor, " of ", False)
+ page_count = cls.doc.createInstance("com.sun.star.text.TextField.PageCount")
+ page_count.NumberingType = 4
+ target.insertTextContent(cursor, page_count, False)
+ else:
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ cls.ret = "Successfully added page numbers"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error adding page numbers: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def insert_page_break(cls, position="at_cursor"):
+ """
+ Inserts a page break at the specified position.
+
+ Args:
+ position (str): Where to insert the page break: 'at_cursor' for current cursor position,
+ 'end_of_document' for end of document. Defaults to 'at_cursor'.
+ """
+ try:
+ if position == "end_of_document":
+ cls.cursor.gotoEnd(False)
+ cls.text.insertControlCharacter(cls.cursor, PARAGRAPH_BREAK, False)
+ cls.cursor.gotoStartOfParagraph(True)
+ cls.cursor.BreakType = uno.Enum("com.sun.star.style.BreakType", "PAGE_BEFORE")
+ cls.ret = "Page break inserted successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting page break: {str(e)}"
+ return False
diff --git a/mm_agents/autoglm/tools/package/vlc.py b/mm_agents/autoglm/tools/package/vlc.py
new file mode 100644
index 0000000..c3a14fc
--- /dev/null
+++ b/mm_agents/autoglm/tools/package/vlc.py
@@ -0,0 +1,233 @@
+import json
+import os
+import re
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from urllib.parse import quote
+
+import requests
+from requests.auth import HTTPBasicAuth
+
+
+class VLCTools:
+ host = "localhost"
+ port = 8080
+ base_url = f"http://{host}:{port}/requests"
+ password = "password"
+ auth = HTTPBasicAuth("", password)
+ ret = ""
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def _make_request(cls, endpoint, params=None):
+ url = f"{cls.base_url}/{endpoint}"
+ try:
+ response = requests.get(url, params=params, auth=cls.auth)
+ response.raise_for_status()
+ return response
+ except requests.exceptions.RequestException as e:
+ return None
+
+ @classmethod
+ def _get_status(cls):
+ response = cls._make_request("status.xml")
+ if response:
+ return ET.fromstring(response.content)
+ return None
+
+ @classmethod
+ def env_info(cls):
+ cls.ret = "None"
+
+ @classmethod
+ def get_playlist(cls):
+ response = cls._make_request("playlist.xml")
+ if response:
+ info = ET.fromstring(response.content)
+ playlist_node = info.find('.//node[@name="Playlist"]')
+ if playlist_node is not None:
+ playlist_items = []
+ for leaf in playlist_node.findall("leaf"):
+ item = {"name": leaf.get("name"), "uri": leaf.get("uri"), "duration": leaf.get("duration") + "s"}
+ playlist_items.append(item)
+ cls.ret = f"Playlist: {playlist_items}"
+ return cls.ret
+ cls.ret = "Error getting playlist"
+ return None
+
+ @classmethod
+ def play(cls):
+ response = cls._make_request("status.xml", {"command": "pl_play"})
+ if response:
+ cls.ret = "Start playing the media"
+ return cls.ret
+ cls.ret = "Error playing the media"
+ return None
+
+ @classmethod
+ def pause(cls):
+ response = cls._make_request("status.xml", {"command": "pl_pause"})
+ if response:
+ cls.ret = "Pause the media"
+ return cls.ret
+ cls.ret = "Error pausing the media"
+ return None
+
+ @classmethod
+ def next(cls):
+ response = cls._make_request("status.xml", {"command": "pl_next"})
+ if response:
+ cls.ret = "Switch to next media"
+ return cls.ret
+ cls.ret = "Error switching to next media"
+ return None
+
+ @classmethod
+ def previous(cls):
+ response = cls._make_request("status.xml", {"command": "pl_previous"})
+ if response:
+ cls.ret = "Switch to previous media"
+ return cls.ret
+ cls.ret = "Error switching to previous media"
+ return None
+
+ @classmethod
+ def add_to_playlist(cls, uri):
+ if uri.startswith("http"):
+ encoded_uri = uri
+ else:
+ encoded_uri = "file://" + quote(uri.replace("file://", ""))
+
+ response = cls._make_request("status.xml", {"command": "in_play", "input": encoded_uri})
+ if response:
+ cls.ret = f"Add {uri} to playlist"
+ return cls.ret
+ cls.ret = f"Error adding {uri} to playlist"
+ return None
+
+ @classmethod
+ def get_current_time(cls):
+ status = cls._get_status()
+ if status is not None:
+ time = status.find("time")
+ cls.ret = int(time.text) if time is not None else None
+ return cls.ret
+ return None
+
+ @classmethod
+ def get_media_duration(cls):
+ status = cls._get_status()
+ if status is not None:
+ length = status.find("length")
+ if length is not None:
+ cls.ret = f"Media duration: {length.text} seconds"
+ return cls.ret
+ cls.ret = "Error getting media duration"
+ return None
+
+ @classmethod
+ def get_settings(cls):
+ settings = {}
+ with open(Path.home() / ".config/vlc/vlcrc", "r") as f:
+ for line in f:
+ if line:
+ try:
+ key, value = line.split("=")
+ if key.strip().startswith("#"):
+ continue
+ settings[key.strip()] = value.strip()
+ except:
+ continue
+ cls.ret = json.dumps(settings, indent=4, ensure_ascii=False)
+ return cls.ret
+
+ @classmethod
+ def set_settings(cls, field, value):
+ with open(Path.home() / ".config/vlc/vlcrc", "r") as rf:
+ settings = rf.read()
+
+ # 正则表达式匹配settings中的field项并替换
+ pattern = re.compile(r"#? *" + re.escape(field) + r"=.*")
+ # 判断是否存在field项
+ if pattern.search(settings):
+ settings = pattern.sub(f"{field}={value}", settings)
+ else:
+ settings += f"{field}={value}\n"
+
+ with open(Path.home() / ".config/vlc/vlcrc", "w") as wf:
+ wf.write(settings)
+
+ cls.ret = f"Set {field} to {value}"
+ return cls.ret
+
+ @classmethod
+ def toggle_fullscreen(cls, enable=None):
+ """
+ Toggle fullscreen mode or set it explicitly based on the enable parameter.
+
+ Args:
+ enable (bool, optional): If provided, explicitly set fullscreen mode (True for fullscreen, False for windowed)
+
+ Returns:
+ str: Success or error message
+ """
+ if enable is not None:
+ command = "fullscreen" if enable else "fullscreen off"
+ else:
+ command = "fullscreen"
+ response = cls._make_request("status.xml", {"command": command})
+ if response:
+ action = "enabled" if enable is True else "disabled" if enable is False else "toggled"
+ cls.ret = f"Fullscreen mode {action}"
+ return cls.ret
+ cls.ret = "Error changing fullscreen mode"
+ return None
+
+ @classmethod
+ def get_media_files(cls, path, suffix=None):
+ """
+ Gets the media files for the specified path.
+
+ Args:
+ path (str): The path to the media files
+ suffix (List[str], optional): The suffix of the media files.
+ Defaults to ['mp4', 'avi', 'mkv', 'mov', 'mp3', 'm4a', 'wav']
+ """
+ # Set default suffix if not provided
+ if suffix is None:
+ suffix = ["mp4", "avi", "mkv", "mov", "mp3", "m4a", "wav"]
+
+ # Validate path
+ if not path:
+ cls.ret = "Path cannot be empty"
+ return None
+
+ if not os.path.exists(path):
+ cls.ret = f"Path not found: {path}"
+ return None
+
+ # Initialize result list
+ media_files = []
+
+ # Convert suffix list to lowercase for case-insensitive comparison
+ suffix = [s.lower() for s in suffix]
+
+ # Walk through directory
+ try:
+ for root, _, files in os.walk(path):
+ for file in files:
+ # Check if file extension matches any of the specified suffixes
+ if any(file.lower().endswith(f".{s}") for s in suffix):
+ # Add full path of the file to results
+ full_path = os.path.join(root, file)
+ media_files.append(full_path)
+
+ except Exception as e:
+ cls.ret = f"Error while scanning directory: {str(e)}"
+ return None
+
+ cls.ret = media_files
+ return cls.ret
diff --git a/run_autoglm.py b/run_autoglm.py
new file mode 100644
index 0000000..9456ab8
--- /dev/null
+++ b/run_autoglm.py
@@ -0,0 +1,495 @@
+"""Script to run end-to-end evaluation on the benchmark.
+Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py.
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+import math
+import ast
+import time
+
+import backoff
+import httpx
+from openai import APIConnectionError, APIError, OpenAI, RateLimitError
+from requests.exceptions import SSLError
+from tqdm import tqdm
+
+import lib_run_single
+from desktop_env.desktop_env import DesktopEnv as DesktopEnvBase
+from mm_agents.autoglm import AutoGLMAgent
+
+# Almost deprecated since it's not multi-env, use run_multienv_*.py instead
+
+# Logger Configs {{{ #
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(
+ fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s"
+)
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+# }}} Logger Configs #
+
+logger = logging.getLogger("desktopenv.experiment")
+
+
+def config() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Run end-to-end evaluation on the benchmark")
+
+ # environment config
+ parser.add_argument("--path_to_vm", type=str)
+ parser.add_argument(
+ "--provider_name",
+ type=str,
+ default="docker",
+ help="Virtualization provider (vmware, docker, aws, azure, gcp, virtualbox)",
+ )
+ parser.add_argument("--headless", action="store_true", default=True, help="Run in headless machine")
+ parser.add_argument("--action_space", type=str, default="autoglm_computer_use", help="Action type")
+ parser.add_argument(
+ "--observation_type",
+ choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"],
+ default="a11y_tree",
+ help="Observation type",
+ )
+ parser.add_argument("--screen_width", type=int, default=1920)
+ parser.add_argument("--screen_height", type=int, default=1080)
+ parser.add_argument("--sleep_after_execution", type=float, default=1.0)
+ parser.add_argument("--max_steps", type=int, default=50)
+
+ # agent config
+ parser.add_argument("--max_trajectory_length", type=int, default=3)
+ parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples")
+
+ # lm config
+ parser.add_argument("--model", type=str, default="autoglm-os")
+ parser.add_argument("--temperature", type=float, default=0.4)
+ parser.add_argument("--top_p", type=float, default=0.5)
+ parser.add_argument("--max_tokens", type=int, default=4096)
+ parser.add_argument("--stop_token", type=str, default=None)
+
+ # example config
+ parser.add_argument("--domain", type=str, default="all")
+ parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json")
+
+ # logging related
+ parser.add_argument("--result_dir", type=str, default="./results")
+ args = parser.parse_args()
+
+ return args
+
+
+class DesktopEnv(DesktopEnvBase):
+ def step(self, action, pause=2):
+ self._step_no += 1
+ self.action_history.append(action)
+
+ # Mark environment as used when step is called
+ self.is_environment_used = True
+
+ reward = 0 # todo: Define reward calculation for each example
+ done = False # todo: Define episode termination condition for each example
+ info = {}
+ logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}")
+
+ # handle the special actions
+ if action in ['WAIT', 'FAIL', 'DONE']:
+ if action == 'WAIT':
+ time.sleep(pause)
+ exe_result = 'Wait ' + str(pause) + ' seconds'
+ elif action == 'FAIL':
+ done = True
+ info = {"fail": True}
+ exe_result = 'Finish: fail'
+ elif action == 'DONE':
+ done = True
+ info = {"done": True}
+ exe_result = 'Finish: success'
+ elif type(action) == dict:
+ if action['action_type'] == 'OPEN_APP':
+ self.setup_controller._launch_setup(action['parameters']['launch_app_command'], shell=True)
+ exe_result = 'Open ' + action['parameters']['app_name']
+ elif action['action_type'] == 'OPEN_CHROME_TAB':
+ self.setup_controller._chrome_open_tabs_setup(action['parameters']['urls_to_open'])
+ exe_result = 'Open ' + str(action['parameters']['urls_to_open']) + ' in Chrome successfully'
+ else:
+ # the set of all possible python commands insides `pyautogui`
+ result = self.controller.execute_python_command(action)
+ try:
+ if result['error']:
+ exe_result = result['error'].strip()
+ else:
+ exe_result = result['output'].strip()
+ except Exception as e:
+ exe_result = 'Error Action: ' + action
+ logger.error(f"Error executing action: {e}")
+
+ time.sleep(pause)
+ observation = self._get_obs()
+ observation['exe_result'] = exe_result
+
+ return observation, reward, done, info
+
+ def reset(self, *args, **kwargs):
+ # Upload tools from autoglm package
+ import mm_agents.autoglm
+ tool_dir = os.path.join(os.path.dirname(mm_agents.autoglm.__file__), 'tools', 'package')
+ for file in os.listdir(tool_dir):
+ if os.path.isdir(os.path.join(tool_dir, file)):
+ continue
+ self.setup_controller._upload_file_setup([{
+ "local_path": os.path.join(tool_dir, file),
+ "path": os.path.join('/home/user', file)
+ }])
+
+ # start soffice service for office tools
+ self.setup_controller._launch_setup('soffice --accept="socket,host=localhost,port=2002;urp;" --norestore --nologo --nodefault', shell=True)
+ time.sleep(5)
+
+ super().reset(*args, **kwargs)
+
+ def get_current_apps(self):
+ apps_code = r"""import subprocess;
+command = "wmctrl -xl";
+apps = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip().split('\n');
+print(apps);"""
+ window_code = r"""import subprocess;
+command = "wmctrl -a :ACTIVE: -v 2>&1 | grep 'Using window' | awk '{print $3}'";
+window_id = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip();
+print(window_id);"""
+
+ apps = self.controller.execute_python_command(apps_code)['output'].strip()
+ apps = ast.literal_eval(apps)
+ app_list = {}
+
+ for app in apps:
+ parts = app.split(maxsplit=4)
+ if len(parts) < 4:
+ continue
+ if parts[1] != '0':
+ continue
+ window_id = parts[0]
+ app_name = '.'.join(parts[2].split('.')[-(math.ceil(parts[2].count('.') / 2)):])
+ title = parts[3]
+ app_list[window_id] = {
+ 'app_name': app_name,
+ 'title': title
+ }
+
+ cur_id = self.controller.execute_python_command(window_code)['output'].strip()
+
+ return app_list, cur_id
+
+ def maximize_window(self):
+ window_state = r"""import subprocess;
+command = "xprop -id $(xprop -root _NET_ACTIVE_WINDOW | awk -F' ' '{print $5}') _NET_WM_STATE"
+output = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip();
+print(output);"""
+ for _ in range(5):
+ try:
+ self.setup_controller._launch_setup('wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz', shell=True)
+ time.sleep(2)
+ output = self.controller.execute_python_command(window_state)['output'].strip()
+ if '_NET_WM_STATE_FOCUSED' not in output or '_NET_WM_STATE_SKIP_TASKBAR' in output or '_NET_WM_STATE_MODAL' in output or '_NET_WM_STATE_MAXIMIZED' in output: # 没有窗口 or popups or 模态窗口 or 窗口已经最大化
+ return
+ except Exception as e:
+ logger.error(f"Failed to maximize window: {e}")
+ time.sleep(1)
+
+ def _get_obs(self):
+ tool_list = {
+ "libreoffice_calc": "CalcTools",
+ "libreoffice_impress": "ImpressTools",
+ "libreoffice_writer": "WriterTools",
+ "code": "CodeTools",
+ "vlc": "VLCTools",
+ "google_chrome": "BrowserTools"
+ }
+
+ self.maximize_window()
+
+ for i in range(3):
+ try:
+ app_list, cur_id = self.get_current_apps()
+ except Exception as e:
+ if i == 2:
+ raise e
+ logger.error(f"Failed to get current apps: {e}")
+ time.sleep(1)
+
+ if cur_id in app_list:
+ cur_app = app_list[cur_id]['app_name']
+
+ tool_name = cur_app.strip().lower().replace('-', '_')
+ if tool_name in tool_list:
+ class_name = tool_list[tool_name]
+ command = f"from {tool_name} import *; "
+ command += f"{class_name}.env_info(); "
+ command += f"{class_name}.print_result();"
+ app_info = self.controller.execute_python_command(command)['output'].strip()
+ else:
+ app_info = None
+ else:
+ cur_app = None
+ app_info = None
+
+ tree = self.controller.get_accessibility_tree()
+ screenshot = self.controller.get_screenshot()
+ if screenshot is None:
+ logger.error("Failed to get screenshot.")
+ screenshot = b''
+
+ return {
+ "screenshot": screenshot,
+ "accessibility_tree": tree,
+ "instruction": self.instruction,
+ "apps": app_list,
+ "cur_window_id": cur_id,
+ "cur_app": cur_app,
+ "app_info": app_info,
+ }
+
+
+def test(args: argparse.Namespace, test_all_meta: dict) -> None:
+ scores = []
+ max_steps = args.max_steps
+
+ # log args
+ logger.info("Args: %s", args)
+ # set wandb project
+ cfg_args = {
+ "path_to_vm": args.path_to_vm,
+ "provider_name": args.provider_name,
+ "headless": args.headless,
+ "action_space": args.action_space,
+ "observation_type": args.observation_type,
+ "screen_width": args.screen_width,
+ "screen_height": args.screen_height,
+ "sleep_after_execution": args.sleep_after_execution,
+ "max_steps": args.max_steps,
+ "max_trajectory_length": args.max_trajectory_length,
+ "model": args.model,
+ "temperature": args.temperature,
+ "top_p": args.top_p,
+ "max_tokens": args.max_tokens,
+ "stop_token": args.stop_token,
+ "result_dir": args.result_dir,
+ }
+
+ @backoff.on_exception(
+ backoff.constant,
+ (RateLimitError, APIConnectionError),
+ interval=0.1,
+ )
+ def call_llm(messages):
+ logger.info("Calling LLM...")
+ # set api_key and base_url by environment variables
+ engine = OpenAI(timeout=60.0)
+ response = engine.chat.completions.create(
+ model=args.model,
+ messages=messages,
+ max_tokens=args.max_tokens,
+ temperature=args.temperature,
+ top_p=args.top_p,
+ )
+ logger.info("LLM called successfully.")
+ return response.choices[0].message.content
+
+ env = DesktopEnv(
+ provider_name=args.provider_name,
+ path_to_vm=args.path_to_vm,
+ action_space=agent.action_space,
+ screen_size=(args.screen_width, args.screen_height),
+ headless=args.headless,
+ os_type="Ubuntu",
+ require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
+ )
+ agent = AutoGLMAgent(
+ action_space=args.action_space,
+ observation_type=args.observation_type,
+ max_trajectory_length=args.max_trajectory_length,
+ gen_func=call_llm,
+ )
+
+ for domain in tqdm(test_all_meta, desc="Domain"):
+ for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
+ config_file = os.path.join(args.test_config_base_dir, f"examples/{domain}/{example_id}.json")
+ with open(config_file, "r", encoding="utf-8") as f:
+ example = json.load(f)
+
+ logger.info(f"[Domain]: {domain}")
+ logger.info(f"[Example ID]: {example_id}")
+
+ instruction = example["instruction"]
+
+ logger.info(f"[Instruction]: {instruction}")
+ # wandb each example config settings
+ cfg_args["instruction"] = instruction
+ cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
+
+ example_result_dir = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ domain,
+ example_id,
+ )
+ os.makedirs(example_result_dir, exist_ok=True)
+ # example start running
+ try:
+ lib_run_single.run_single_example_autoglm(
+ agent,
+ env,
+ example,
+ max_steps,
+ instruction,
+ args,
+ example_result_dir,
+ scores,
+ )
+ except Exception as e:
+ logger.error(f"Exception in {domain}/{example_id}: {e}")
+ # Only attempt to end recording if controller exists (not Docker provider)
+ if hasattr(env, "controller") and env.controller is not None:
+ env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+ with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+ f.write(json.dumps({"Error": f"Time limit exceeded in {domain}/{example_id}"}))
+ f.write("\n")
+
+ env.close()
+ logger.info(f"Average score: {sum(scores) / len(scores)}")
+
+
+def get_unfinished(action_space, use_model, observation_type, result_dir, total_file_json):
+ target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+
+ if not os.path.exists(target_dir):
+ return total_file_json
+
+ finished = {}
+ for domain in os.listdir(target_dir):
+ finished[domain] = []
+ domain_path = os.path.join(target_dir, domain)
+ if os.path.isdir(domain_path):
+ for example_id in os.listdir(domain_path):
+ if example_id == "onboard":
+ continue
+ example_path = os.path.join(domain_path, example_id)
+ if os.path.isdir(example_path):
+ if "result.txt" not in os.listdir(example_path):
+ # empty all files under example_id
+ for file in os.listdir(example_path):
+ os.remove(os.path.join(example_path, file))
+ else:
+ finished[domain].append(example_id)
+
+ if not finished:
+ return total_file_json
+
+ for domain, examples in finished.items():
+ if domain in total_file_json:
+ total_file_json[domain] = [x for x in total_file_json[domain] if x not in examples]
+
+ return total_file_json
+
+
+def get_result(action_space, use_model, observation_type, result_dir, total_file_json):
+ target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+ if not os.path.exists(target_dir):
+ print("New experiment, no result yet.")
+ return None
+
+ all_result = []
+
+ for domain in os.listdir(target_dir):
+ domain_path = os.path.join(target_dir, domain)
+ if os.path.isdir(domain_path):
+ for example_id in os.listdir(domain_path):
+ example_path = os.path.join(domain_path, example_id)
+ if os.path.isdir(example_path):
+ if "result.txt" in os.listdir(example_path):
+ # empty all files under example_id
+ try:
+ all_result.append(float(open(os.path.join(example_path, "result.txt"), "r").read()))
+ except:
+ all_result.append(0.0)
+
+ if not all_result:
+ print("New experiment, no result yet.")
+ return None
+ else:
+ print("Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
+ return all_result
+
+
+if __name__ == "__main__":
+ ####### The complete version of the list of examples #######
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ args = config()
+
+ # save args to json in result_dir/action_space/observation_type/model/args.json
+ path_to_args = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ "args.json",
+ )
+ os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+ with open(path_to_args, "w", encoding="utf-8") as f:
+ json.dump(vars(args), f, indent=4)
+
+ with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
+ test_all_meta = json.load(f)
+
+ if args.domain != "all":
+ test_all_meta = {args.domain: test_all_meta[args.domain]}
+
+ test_file_list = get_unfinished(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ left_info = ""
+ for domain in test_file_list:
+ left_info += f"{domain}: {len(test_file_list[domain])}\n"
+ logger.info(f"Left tasks:\n{left_info}")
+
+ get_result(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ test(args, test_file_list)
diff --git a/run_multienv_autoglm.py b/run_multienv_autoglm.py
new file mode 100644
index 0000000..77c7471
--- /dev/null
+++ b/run_multienv_autoglm.py
@@ -0,0 +1,545 @@
+"""Script to run end-to-end evaluation on the benchmark.
+Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py.
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+import signal
+import time
+from typing import List
+from multiprocessing import Process, Manager, current_process
+import lib_run_single
+from run_autoglm import DesktopEnv
+from mm_agents.autoglm import AutoGLMAgent
+
+import backoff
+import httpx
+from openai import APIConnectionError, APIError, OpenAI, RateLimitError
+from requests.exceptions import SSLError
+from tqdm import tqdm
+
+# Global variables for signal handling
+active_environments = []
+processes = []
+is_terminating = False
+
+# .env
+from dotenv import load_dotenv
+load_dotenv()
+
+# Logger Configs {{{ #
+def config() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(
+ description="Run end-to-end evaluation on the benchmark"
+ )
+
+ # environment config
+ parser.add_argument("--path_to_vm", type=str)
+ parser.add_argument(
+ "--headless", action="store_true", default=True, help="Run in headless machine"
+ )
+ parser.add_argument(
+ "--action_space", type=str, default="autoglm_computer_use", help="Action type"
+ )
+ parser.add_argument(
+ "--observation_type",
+ choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"],
+ default="a11y_tree",
+ help="Observation type",
+ )
+ parser.add_argument(
+ "--provider_name", type=str, default="docker", choices=["aws", "virtualbox", "vmware", "docker", "azure"], help="Provider name"
+ )
+ parser.add_argument(
+ "--screen_width", type=int, default=1920, help="Screen width"
+ )
+ parser.add_argument(
+ "--screen_height", type=int, default=1080, help="Screen height"
+ )
+ parser.add_argument("--sleep_after_execution", type=float, default=1.0)
+ parser.add_argument("--max_steps", type=int, default=50)
+
+ # agent config
+ parser.add_argument("--max_trajectory_length", type=int, default=3)
+ parser.add_argument(
+ "--test_config_base_dir", type=str, default="evaluation_examples"
+ )
+
+ # lm config
+ parser.add_argument("--model", type=str, default="autoglm-os")
+ parser.add_argument("--temperature", type=float, default=0.4)
+ parser.add_argument("--top_p", type=float, default=0.5)
+ parser.add_argument("--max_tokens", type=int, default=4096)
+ parser.add_argument("--stop_token", type=str, default=None)
+
+ # example config
+ parser.add_argument("--domain", type=str, default="all")
+ parser.add_argument(
+ "--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json"
+ )
+
+ # logging related
+ parser.add_argument("--result_dir", type=str, default="./results")
+ parser.add_argument("--num_envs", type=int, default=20, help="Number of environments to run in parallel")
+ parser.add_argument("--log_level", type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], default='INFO', help="Set the logging level")
+
+ args = parser.parse_args()
+ return args
+
+args = config() # Get command line arguments first
+
+logger = logging.getLogger()
+log_level = getattr(logging, args.log_level.upper())
+logger.setLevel(log_level)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(
+ os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8"
+)
+debug_handler = logging.FileHandler(
+ os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8"
+)
+stdout_handler = logging.StreamHandler(sys.stdout)
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(log_level)
+
+formatter = logging.Formatter(
+ fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s"
+)
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+# }}} Logger Configs #
+
+logger = logging.getLogger("desktopenv.experiment")
+
+
+def distribute_tasks(test_all_meta: dict) -> List[tuple]:
+ """Distribute tasks evenly across environments."""
+ # Flatten the tasks into a single list
+ all_tasks = []
+ for domain, examples in test_all_meta.items():
+ for example_id in examples:
+ all_tasks.append((domain, example_id))
+
+ return all_tasks
+
+
+def process_signal_handler(signum, frame, env_idx):
+ """Signal handler for child processes to gracefully shut down their environments."""
+ logger.info(f"Process {env_idx + 1} received signal {signum}. Shutting down...")
+
+ # Get the active_environments from the caller's frame
+ local_vars = frame.f_locals
+ active_environments = local_vars.get('active_environments', [])
+
+ # Close environment in the current process context
+ for env in active_environments:
+ if env is not None:
+ try:
+ logger.info(f"Process {env_idx + 1} closing environment...")
+ env.close()
+ logger.info(f"Process {env_idx + 1} environment closed successfully")
+ except Exception as e:
+ logger.error(f"Process {env_idx + 1} error closing environment: {e}")
+
+ logger.info(f"Process {env_idx + 1} shutdown complete. Exiting.")
+ sys.exit(0)
+
+
+def run_env_tasks(task_queue, args, shared_scores):
+ """Run tasks for a single environment."""
+ active_environments = []
+ env = None
+ try:
+ @backoff.on_exception(
+ backoff.constant,
+ (RateLimitError, APIConnectionError),
+ interval=0.1,
+ )
+ def call_llm(messages):
+ logger.info("Calling LLM...")
+ # set api_key and base_url by environment variables
+ engine = OpenAI(timeout=60.0)
+ response = engine.chat.completions.create(
+ model=args.model,
+ messages=messages,
+ max_tokens=args.max_tokens,
+ temperature=args.temperature,
+ top_p=args.top_p,
+ )
+ logger.info("LLM called successfully.")
+ return response.choices[0].message.content
+
+ env = DesktopEnv(
+ provider_name=args.provider_name,
+ path_to_vm=args.path_to_vm,
+ action_space=args.action_space,
+ screen_size=(args.screen_width, args.screen_height),
+ headless=args.headless,
+ os_type="Ubuntu",
+ require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
+ )
+ active_environments.append(env)
+ agent = AutoGLMAgent(
+ action_space=args.action_space,
+ observation_type=args.observation_type,
+ max_trajectory_length=args.max_trajectory_length,
+ gen_func=call_llm,
+ )
+ logger.info(f"Process {current_process().name} started.")
+ while True:
+ try:
+ item = task_queue.get(timeout=5)
+ except Exception:
+ break
+ domain, example_id = item
+ try:
+ config_file = os.path.join(
+ args.test_config_base_dir, f"examples/{domain}/{example_id}.json"
+ )
+ with open(config_file, "r", encoding="utf-8") as f:
+ example = json.load(f)
+ logger.info(f"[{current_process().name}][Domain]: {domain}")
+ logger.info(f"[{current_process().name}][Example ID]: {example_id}")
+ logger.info(f"[{current_process().name}][Instruction]: {example['instruction']}")
+ example_result_dir = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ domain,
+ example_id,
+ )
+ os.makedirs(example_result_dir, exist_ok=True)
+ try:
+ lib_run_single.run_single_example_autoglm(
+ agent,
+ env,
+ example,
+ args.max_steps,
+ example["instruction"],
+ args,
+ example_result_dir,
+ shared_scores,
+ )
+ except Exception as e:
+ import traceback
+ logger.error(f"Exception in {current_process().name} {domain}/{example_id}: {e}")
+ logger.error(traceback.format_exc())
+ try:
+ env.controller.end_recording(
+ os.path.join(example_result_dir, "recording.mp4")
+ )
+ except Exception as rec_e:
+ logger.error(f"Failed to end recording: {rec_e}")
+ with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+ f.write(
+ json.dumps(
+ {"Error": f"{domain}/{example_id} - {e}"}
+ )
+ )
+ f.write("\n")
+ except Exception as e:
+ logger.error(f"Task-level error in {current_process().name}: {e}")
+ import traceback
+ logger.error(traceback.format_exc())
+ except Exception as e:
+ logger.error(f"Process-level error in {current_process().name}: {e}")
+ import traceback
+ logger.error(traceback.format_exc())
+ finally:
+ logger.info(f"{current_process().name} cleaning up environment...")
+ try:
+ if env:
+ env.close()
+ logger.info(f"{current_process().name} environment closed successfully")
+ except Exception as e:
+ logger.error(f"{current_process().name} error during environment cleanup: {e}")
+
+
+def signal_handler(signum, frame):
+ """Handle termination signals (SIGINT, SIGTERM) to gracefully shutdown environments."""
+ global is_terminating, active_environments, processes
+
+ # Avoid duplicate handling
+ if is_terminating:
+ return
+
+ is_terminating = True
+ logger.info(f"Received signal {signum}. Gracefully shutting down...")
+
+ # Close all registered environments in the main process
+ for env in active_environments:
+ try:
+ logger.info(f"Closing environment...")
+ env.close()
+ logger.info(f"Environment closed successfully")
+ except Exception as e:
+ logger.error(f"Error closing environment: {e}")
+
+ # Send termination signal to all child processes first
+ for p in processes:
+ if p.is_alive():
+ try:
+ logger.info(f"Sending termination signal to process {p.name}...")
+ p.terminate()
+ except Exception as e:
+ logger.error(f"Error sending termination signal to process: {e}")
+
+ # Allow a short time for processes to handle their own cleanup
+ time.sleep(1)
+
+ # Forcefully terminate any processes that didn't exit
+ for p in processes:
+ if p.is_alive():
+ try:
+ logger.info(f"Forcefully terminating process {p.name}...")
+ import signal as sig
+ os.kill(p.pid, sig.SIGKILL)
+ except Exception as e:
+ logger.error(f"Error forcefully terminating process: {e}")
+
+ logger.info("Shutdown complete. Exiting.")
+ sys.exit(0)
+
+
+def test(args: argparse.Namespace, test_all_meta: dict) -> None:
+ global processes
+ logger.info("Args: %s", args)
+ all_tasks = distribute_tasks(test_all_meta)
+ logger.info(f"Total tasks: {len(all_tasks)}")
+ with Manager() as manager:
+ shared_scores = manager.list()
+ task_queue = manager.Queue()
+ for item in all_tasks:
+ task_queue.put(item)
+ num_envs = args.num_envs
+ processes = []
+ for i in range(num_envs):
+ p = Process(
+ target=run_env_tasks,
+ args=(task_queue, args, shared_scores),
+ name=f"EnvProcess-{i+1}"
+ )
+ p.daemon = True
+ p.start()
+ processes.append(p)
+ logger.info(f"Started process {p.name} with PID {p.pid}")
+ try:
+ while True:
+ alive_count = 0
+ for idx, p in enumerate(processes):
+ if not p.is_alive():
+ logger.warning(f"Process {p.name} died, restarting...")
+ new_p = Process(
+ target=run_env_tasks,
+ args=(task_queue, args, shared_scores),
+ name=f"EnvProcess-Restart-{idx+1}"
+ )
+ new_p.daemon = True
+ new_p.start()
+ processes[idx] = new_p
+ logger.info(f"Restarted process {new_p.name} with PID {new_p.pid}")
+ else:
+ alive_count += 1
+ if task_queue.empty():
+ logger.info("All tasks finished.")
+ break
+ if alive_count == 0:
+ logger.error("All processes died, exiting.")
+ break
+ time.sleep(5)
+ for p in processes:
+ p.join()
+ except KeyboardInterrupt:
+ logger.info("Main process received KeyboardInterrupt. Initiating graceful shutdown...")
+ raise
+ except Exception as e:
+ logger.error(f"Unexpected error while waiting for processes: {e}", exc_info=True)
+ for p in processes:
+ if p.is_alive():
+ try:
+ logger.info(f"Terminating process {p.name} due to error...")
+ p.terminate()
+ except Exception as term_e:
+ logger.error(f"Error terminating process {p.name}: {term_e}")
+ raise
+ scores = list(shared_scores)
+ logger.info(f"Average score: {sum(scores) / len(scores) if scores else 0}")
+
+
+def get_unfinished(
+ action_space, use_model, observation_type, result_dir, total_file_json
+):
+ target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+
+ if not os.path.exists(target_dir):
+ return total_file_json
+
+ finished = {}
+ for domain in os.listdir(target_dir):
+ finished[domain] = []
+ domain_path = os.path.join(target_dir, domain)
+ if os.path.isdir(domain_path):
+ for example_id in os.listdir(domain_path):
+ if example_id == "onboard":
+ continue
+ example_path = os.path.join(domain_path, example_id)
+ if os.path.isdir(example_path):
+ if "result.txt" not in os.listdir(example_path):
+ # empty all files under example_id
+ for file in os.listdir(example_path):
+ os.remove(os.path.join(example_path, file))
+ else:
+ finished[domain].append(example_id)
+
+ if not finished:
+ return total_file_json
+
+ for domain, examples in finished.items():
+ if domain in total_file_json:
+ total_file_json[domain] = [
+ x for x in total_file_json[domain] if x not in examples
+ ]
+
+ return total_file_json
+
+
+def get_result(action_space, use_model, observation_type, result_dir, total_file_json):
+ target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+ if not os.path.exists(target_dir):
+ print("New experiment, no result yet.")
+ return None
+
+ all_result = []
+
+ for domain in os.listdir(target_dir):
+ domain_path = os.path.join(target_dir, domain)
+ if os.path.isdir(domain_path):
+ for example_id in os.listdir(domain_path):
+ example_path = os.path.join(domain_path, example_id)
+ if os.path.isdir(example_path):
+ if "result.txt" in os.listdir(example_path):
+ # empty all files under example_id
+ try:
+ all_result.append(
+ float(
+ open(
+ os.path.join(example_path, "result.txt"), "r"
+ ).read()
+ )
+ )
+ except:
+ all_result.append(0.0)
+
+ if not all_result:
+ print("New experiment, no result yet.")
+ return None
+ else:
+ print("Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
+ return all_result
+
+
+if __name__ == "__main__":
+ ####### The complete version of the list of examples #######
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+ # Register signal handlers for graceful termination
+ signal.signal(signal.SIGINT, signal_handler) # Handle Ctrl+C
+ signal.signal(signal.SIGTERM, signal_handler) # Handle termination signal
+
+ try:
+ # args already defined globally above
+
+ # save args to json in result_dir/action_space/observation_type/model/args.json
+ path_to_args = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ "args.json",
+ )
+ os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+ with open(path_to_args, "w", encoding="utf-8") as f:
+ json.dump(vars(args), f, indent=4)
+
+ with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
+ test_all_meta = json.load(f)
+
+ if args.domain != "all":
+ test_all_meta = {args.domain: test_all_meta[args.domain]}
+
+ test_file_list = get_unfinished(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ left_info = ""
+ for domain in test_file_list:
+ left_info += f"{domain}: {len(test_file_list[domain])}\n"
+ logger.info(f"Left tasks:\n{left_info}")
+
+ get_result(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ test(args, test_file_list)
+ except KeyboardInterrupt:
+ logger.info("Main process received KeyboardInterrupt.")
+ # Signal handler will take care of cleanup
+ except Exception as e:
+ logger.error(f"Unexpected error in main process: {e}", exc_info=True)
+ # Also trigger cleanup for unhandled exceptions
+ signal_handler(signal.SIGTERM, None)
+ finally:
+ # Final cleanup in case any environments or processes remain
+ logger.info("Main process final cleanup...")
+ for env in active_environments:
+ if env is not None:
+ try:
+ logger.info(f"Closing environment in final cleanup...")
+ env.close()
+ logger.info(f"Environment closed successfully in final cleanup")
+ except Exception as e:
+ logger.error(f"Error during final environment cleanup: {e}")
+
+ # First try gentle termination
+ for p in processes:
+ if p is not None and p.is_alive():
+ try:
+ logger.info(f"Terminating process {p.name}...")
+ p.terminate()
+ except Exception as e:
+ logger.error(f"Error terminating process: {e}")
+
+ # Wait a moment for processes to terminate
+ time.sleep(1)
+
+ # Then force kill if needed
+ for p in processes:
+ if p is not None and p.is_alive():
+ try:
+ logger.info(f"Force killing process {p.name}...")
+ os.kill(p.pid, signal.SIGKILL)
+ logger.info(f"Process {p.name} force killed")
+ except Exception as e:
+ logger.error(f"Error force killing process: {e}")
\ No newline at end of file