diff --git a/lib_run_single.py b/lib_run_single.py
index be4bc545..d19599ca 100644
--- a/lib_run_single.py
+++ b/lib_run_single.py
@@ -253,14 +253,20 @@ def run_single_example_autoglm(agent, env, example, max_steps, instruction, args
"screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
}))
f.write("\n")
+
if done:
logger.info("The episode is done.")
break
- if not done: # not completed the task yet
- env.action_history.append('FAIL')
+ # Invalid Action
+ if not actions:
+ obs = env._get_obs() # update observation
step_idx += 1
+
+ if not done: # not completed the task yet
+ env.action_history.append('FAIL')
+
result = env.evaluate()
logger.info("Result: %.2f", result)
scores.append(result)
diff --git a/mm_agents/autoglm_v/__init__.py b/mm_agents/autoglm_v/__init__.py
new file mode 100644
index 00000000..68226a16
--- /dev/null
+++ b/mm_agents/autoglm_v/__init__.py
@@ -0,0 +1,7 @@
+"""
+AutoGLM agent implementation
+"""
+
+from .main import AutoGLMAgent
+
+__all__ = ["AutoGLMAgent"]
diff --git a/mm_agents/autoglm_v/main.py b/mm_agents/autoglm_v/main.py
new file mode 100644
index 00000000..0095949d
--- /dev/null
+++ b/mm_agents/autoglm_v/main.py
@@ -0,0 +1,265 @@
+import logging
+import re
+from base64 import b64encode
+from PIL import Image
+from io import BytesIO
+from typing import Dict, List
+
+from .prompt.accessibility_tree_handle import linearize_accessibility_tree, trim_accessibility_tree
+from .prompt.grounding_agent import GroundingAgent as Agent
+from .tools.package.google_chrome import BrowserTools
+from .prompt.procedural_memory import Prompt
+
+logger = logging.getLogger("desktopenv.agent")
+
+pure_text_settings = ["a11y_tree"]
+
+def resize_image(image, w, h):
+ img = Image.open(BytesIO(image))
+ # resize to max_pixel_num max_pixels
+ img = img.resize((w, h))
+ buf = BytesIO()
+ img.save(buf, format='PNG') # 指定保存格式,比如 PNG、JPEG
+ img_bytes = buf.getvalue() # 得到 bytes 数据
+ return img_bytes
+
+def parse_code_from_string(input_string):
+ # input_string = "\n".join([line.strip() for line in input_string.split(';') if line.strip()])
+ if input_string.strip() in ["WAIT", "DONE", "FAIL"]:
+ return [input_string.strip()]
+
+ # This regular expression will match both ```code``` and ```python code```
+ # and capture the `code` part. It uses a non-greedy match for the content inside.
+ pattern = r"```(?:\w+\s+)?(.*?)```"
+ # Find all non-overlapping matches in the string
+ matches = re.findall(pattern, input_string, re.DOTALL)
+
+ # The regex above captures the content inside the triple backticks.
+ # The `re.DOTALL` flag allows the dot `.` to match newline characters as well,
+ # so the code inside backticks can span multiple lines.
+
+ # matches now contains all the captured code snippets
+
+ codes = []
+
+ for match in matches:
+ match = match.strip()
+ commands = ["WAIT", "DONE", "FAIL"] # fixme: updates this part when we have more commands
+
+ if match in commands:
+ codes.append(match.strip())
+ elif match.split("\n")[-1] in commands:
+ if len(match.split("\n")) > 1:
+ codes.append("\n".join(match.split("\n")[:-1]))
+ codes.append(match.split("\n")[-1])
+ else:
+ codes.append(match)
+
+ return codes
+
+
+class AutoGLMAgent:
+ def __init__(
+ self,
+ action_space="autoglm_computer_use",
+ observation_type="a11y_tree",
+ max_trajectory_length=3,
+ a11y_tree_max_items=300,
+ with_image: bool = True,
+ screen_size = (1920, 1080),
+ image_size=(1920, 1080),
+ with_atree: bool = False,
+ glm41v_format: bool = True,
+ relative_coordinate: bool = True,
+ client_password="password",
+ gen_func=None,
+ tool_in_sys_msg: bool = True,
+ ):
+ self.action_space = action_space
+ self.observation_type = observation_type
+ assert action_space in ["autoglm_computer_use"], "Invalid action space"
+ assert observation_type in ["a11y_tree"], "Invalid observation type"
+ self.max_trajectory_length = max_trajectory_length
+ self.a11y_tree_max_items = a11y_tree_max_items
+ self.with_image = with_image
+ self.screen_size = screen_size
+ self.image_size = image_size
+ self.with_atree = with_atree
+ self.glm41v_format = glm41v_format
+ self.relative_coordinate = relative_coordinate
+ self.client_password = client_password
+ self.gen_func = gen_func
+ self.tool_in_sys_msg = tool_in_sys_msg
+
+ self.tool_list = {
+ "libreoffice_calc": "CalcTools",
+ "libreoffice_impress": "ImpressTools",
+ "libreoffice_writer": "WriterTools",
+ "code": "CodeTools",
+ "vlc": "VLCTools",
+ "google_chrome": "BrowserTools",
+ }
+
+ Agent.relative_coordinate = relative_coordinate
+
+ self.contents = []
+
+ @property
+ def turn_number(self):
+ return len(self.contents)
+
+ def prepare(self, instruction: str, obs: Dict, history: List, last_result: str = "") -> List:
+ """
+ Predict the next action(s) based on the current observation.
+ """
+ if "exe_result" in obs and not last_result:
+ last_result = obs["exe_result"]
+ if self.contents:
+ self.contents[-1]["exe_result"] = last_result
+
+ cur_app = obs["cur_app"]
+ logger.info(f"current app is {cur_app}")
+
+ if cur_app:
+ tool_name = cur_app.strip().lower().replace("-", "_")
+ tool_name = tool_name if tool_name in self.tool_list.keys() else None
+ else:
+ tool_name = None
+
+ setup_prompt, func_def_prompt, note_prompt = Prompt.construct_procedural_memory(
+ Agent, app_name=tool_name, client_password=self.client_password, with_image=self.with_image, with_atree=self.with_atree, relative_coordinate=self.relative_coordinate, glm41v_format=self.glm41v_format
+ )
+ if self.tool_in_sys_msg:
+ system_message = setup_prompt + "\n\n" + func_def_prompt + "\n\n" + note_prompt
+ else:
+ system_message = setup_prompt + "\n\n" + note_prompt
+ system_message += "\n\n**IMPORTANT** You are asked to complete the following task: {}".format(instruction)
+
+ messages = [
+ {
+ "role": "system",
+ "content": system_message,
+ }
+ ]
+ messages.extend(history)
+
+ if obs["apps"]:
+ app_str = "Window ID App Name Title\n"
+ for window_id, app in obs["apps"].items():
+ app_str += f"{window_id} {app['app_name']} {app['title']}\n"
+ else:
+ app_str = "None"
+
+ last_result = last_result.strip() if last_result else "None"
+ last_result = last_result[:2000] + "..." if len(last_result) > 2000 else last_result
+
+ tree = linearize_accessibility_tree(obs["accessibility_tree"], "Ubuntu")
+ tree = trim_accessibility_tree(tree, 300)
+
+ app_info = obs["app_info"].strip() if obs["app_info"] else "None"
+ app_info = app_info[:5000] + "..." if len(app_info) > 5000 else app_info
+
+ prompt = "* Apps: {}\n\n* Current App: {}{}\n\n* App Info: {}\n\n* Previous Action Result: {}".format(
+ app_str.strip(),
+ obs["cur_window_id"].strip() if obs["cur_window_id"] in app_str else "None",
+ '\n\n* A11y Tree: {}'.format(tree.strip()) if self.with_atree else "",
+ app_info,
+ last_result if last_result else "None",
+ ) + (
+ "\n\n" + func_def_prompt if not self.tool_in_sys_msg else ""
+ )
+
+ content = [{"type": "text", "text": prompt}]
+ if self.with_image and obs.get('screenshot'):
+ screenshot = resize_image(obs['screenshot'], self.image_size[0], self.image_size[1])
+ content = [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{b64encode(screenshot).decode('utf-8')}",
+ "detail": "high",
+ },
+ }
+ ] + content
+
+ messages.append({"role": "user", "content": content})
+
+ return messages
+
+ def execute(self, response, obs):
+ try:
+ actions = parse_code_from_string(response)
+ action = actions[0]
+ logger.info(f"The pesudo action is {action}")
+
+ if "Agent." in action:
+ actions = [
+ eval(action),
+ ]
+ elif "BrowserTools." in action: # TODO: special check for BrowserTools
+ actions = [
+ eval(action),
+ ]
+ else:
+ actions = Agent.tool_commands(action, obs["cur_app"].strip().replace("-", "_").lower())
+ logger.info(f"The grounded action is {actions[0]}")
+ except Exception as e:
+ print("Failed to parse action from response", e)
+ actions = []
+
+ return actions
+
+ def format_history(self, max_turns=30):
+ history = []
+ for ix in range(self.turn_number):
+ if ix == 0:
+ env_input = "**Environment State (Omitted)**"
+ else:
+ env_input = (
+ f"**Environment State (Omitted)**\nPrevious Action Result: {self.contents[ix - 1]['exe_result']}"
+ )
+
+ env_input = env_input[:2000] + "..." if len(env_input) > 2000 else env_input
+ response = (
+ self.contents[ix]["response"][:1500] + "..."
+ if len(self.contents[ix]["response"]) > 1500
+ else self.contents[ix]["response"]
+ )
+ history.append({"role": "user", "content": [{"type": "text", "text": env_input}]})
+ history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+
+ return history[-max_turns * 2:]
+
+ def predict(self, instruction: str, obs: Dict) -> List:
+ history = self.format_history()
+ messages = self.prepare(instruction, obs, history)
+
+ assert self.gen_func is not None, "gen_func is not set"
+ try:
+ response = self.gen_func(messages)
+ except Exception as e:
+ logger.error("Failed to call gen_func, Error: " + str(e))
+ response = ""
+
+ logger.info("RESPONSE: %s", response)
+
+ actions = self.execute(response, obs)
+
+ # update the contents
+ self.contents.append(
+ {
+ "instruction": instruction,
+ "index": len(self.contents),
+ "response": response,
+ "action": "Parse error" if not actions else actions[0],
+ "exe_result": "Invalid action" if not actions else "",
+ **obs,
+ }
+ )
+ return response, actions
+
+ def reset(self, _logger=None):
+ global logger
+ logger = _logger if _logger is not None else logging.getLogger("desktopenv.aguvis_agent")
+
+ self.contents = []
diff --git a/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py b/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py
new file mode 100644
index 00000000..a9a392d2
--- /dev/null
+++ b/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py
@@ -0,0 +1,329 @@
+import io
+import re
+import xml.etree.ElementTree as ET
+from typing import List, Tuple
+
+from PIL import Image, ImageDraw, ImageFont
+
+from .deduplicate_node import filter_similar_nodes
+
+attributes_ns_ubuntu = "https://accessibility.windows.example.org/ns/attributes"
+attributes_ns_windows = "https://accessibility.windows.example.org/ns/attributes"
+state_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/state"
+state_ns_windows = "https://accessibility.windows.example.org/ns/state"
+component_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/component"
+component_ns_windows = "https://accessibility.windows.example.org/ns/component"
+value_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/value"
+value_ns_windows = "https://accessibility.windows.example.org/ns/value"
+class_ns_windows = "https://accessibility.windows.example.org/ns/class"
+
+
+def find_leaf_nodes(xlm_file_str):
+ if not xlm_file_str:
+ return []
+
+ root = ET.fromstring(xlm_file_str)
+
+ # Recursive function to traverse the XML tree and collect leaf nodes
+ def collect_leaf_nodes(node, leaf_nodes):
+ # If the node has no children, it is a leaf node, add it to the list
+ if not list(node):
+ leaf_nodes.append(node)
+ # If the node has children, recurse on each child
+ for child in node:
+ collect_leaf_nodes(child, leaf_nodes)
+
+ # List to hold all leaf nodes
+ leaf_nodes = []
+ collect_leaf_nodes(root, leaf_nodes)
+ return leaf_nodes
+
+
+def judge_node(node: ET, platform="Ubuntu", check_image=False) -> bool:
+ if platform == "Ubuntu":
+ _state_ns = state_ns_ubuntu
+ _component_ns = component_ns_ubuntu
+ elif platform == "Windows":
+ _state_ns = state_ns_windows
+ _component_ns = component_ns_windows
+ else:
+ raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+ keeps: bool = (
+ node.tag.startswith("document")
+ or node.tag.endswith("item")
+ or node.tag.endswith("button")
+ or node.tag.endswith("heading")
+ or node.tag.endswith("label")
+ or node.tag.endswith("scrollbar")
+ or node.tag.endswith("searchbox")
+ or node.tag.endswith("textbox")
+ or node.tag.endswith("link")
+ or node.tag.endswith("tabelement")
+ or node.tag.endswith("textfield")
+ or node.tag.endswith("textarea")
+ or node.tag.endswith("menu")
+ or node.tag
+ in {
+ "alert",
+ "canvas",
+ "check-box",
+ "combo-box",
+ "entry",
+ "icon",
+ "image",
+ "paragraph",
+ "scroll-bar",
+ "section",
+ "slider",
+ "static",
+ "table-cell",
+ "terminal",
+ "text",
+ "netuiribbontab",
+ "start",
+ "trayclockwclass",
+ "traydummysearchcontrol",
+ "uiimage",
+ "uiproperty",
+ "uiribboncommandbar",
+ }
+ )
+ keeps = (
+ keeps
+ and (
+ platform == "Ubuntu"
+ and node.get("{{{:}}}showing".format(_state_ns), "false") == "true"
+ and node.get("{{{:}}}visible".format(_state_ns), "false") == "true"
+ or platform == "Windows"
+ and node.get("{{{:}}}visible".format(_state_ns), "false") == "true"
+ )
+ and (
+ node.get("name", "") != ""
+ or node.text is not None
+ and len(node.text) > 0
+ or check_image
+ and node.get("image", "false") == "true"
+ )
+ )
+ # and (
+ # node.get("{{{:}}}enabled".format(_state_ns), "false") == "true"
+ # or node.get("{{{:}}}editable".format(_state_ns), "false") == "true"
+ # or node.get("{{{:}}}expandable".format(_state_ns), "false") == "true"
+ # or node.get("{{{:}}}checkable".format(_state_ns), "false") == "true"
+ # ) \
+
+ coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(_component_ns), "(-1, -1)"))
+ sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(_component_ns), "(-1, -1)"))
+ keeps = keeps and coordinates[0] >= 0 and coordinates[1] >= 0 and sizes[0] > 0 and sizes[1] > 0
+ return keeps
+
+
+def filter_nodes(root: ET, platform="Ubuntu", check_image=False):
+ filtered_nodes = []
+
+ for node in root.iter():
+ if judge_node(node, platform, check_image):
+ filtered_nodes.append(node)
+
+ return filtered_nodes
+
+
+def draw_bounding_boxes(nodes, image_file_content, down_sampling_ratio=1.0, platform="Ubuntu"):
+
+ if platform == "Ubuntu":
+ _state_ns = state_ns_ubuntu
+ _component_ns = component_ns_ubuntu
+ _value_ns = value_ns_ubuntu
+ elif platform == "Windows":
+ _state_ns = state_ns_windows
+ _component_ns = component_ns_windows
+ _value_ns = value_ns_windows
+ else:
+ raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+ # Load the screenshot image
+ image_stream = io.BytesIO(image_file_content)
+ image = Image.open(image_stream)
+ if float(down_sampling_ratio) != 1.0:
+ image = image.resize((int(image.size[0] * down_sampling_ratio), int(image.size[1] * down_sampling_ratio)))
+ draw = ImageDraw.Draw(image)
+ marks = []
+ drew_nodes = []
+ text_informations: List[str] = ["index\ttag\tname\ttext"]
+
+ try:
+ # Adjust the path to the font file you have or use a default one
+ font = ImageFont.truetype("arial.ttf", 15)
+ except IOError:
+ # Fallback to a basic font if the specified font can't be loaded
+ font = ImageFont.load_default()
+
+ index = 1
+
+ # Loop over all the visible nodes and draw their bounding boxes
+ for _node in nodes:
+ coords_str = _node.attrib.get("{{{:}}}screencoord".format(_component_ns))
+ size_str = _node.attrib.get("{{{:}}}size".format(_component_ns))
+
+ if coords_str and size_str:
+ try:
+ # Parse the coordinates and size from the strings
+ coords = tuple(map(int, coords_str.strip("()").split(", ")))
+ size = tuple(map(int, size_str.strip("()").split(", ")))
+
+ import copy
+
+ original_coords = copy.deepcopy(coords)
+ original_size = copy.deepcopy(size)
+
+ if float(down_sampling_ratio) != 1.0:
+ # Downsample the coordinates and size
+ coords = tuple(int(coord * down_sampling_ratio) for coord in coords)
+ size = tuple(int(s * down_sampling_ratio) for s in size)
+
+ # Check for negative sizes
+ if size[0] <= 0 or size[1] <= 0:
+ raise ValueError(f"Size must be positive, got: {size}")
+
+ # Calculate the bottom-right corner of the bounding box
+ bottom_right = (coords[0] + size[0], coords[1] + size[1])
+
+ # Check that bottom_right > coords (x1 >= x0, y1 >= y0)
+ if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]:
+ raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}")
+
+ # Check if the area only contains one color
+ cropped_image = image.crop((*coords, *bottom_right))
+ if len(set(list(cropped_image.getdata()))) == 1:
+ continue
+
+ # Draw rectangle on image
+ draw.rectangle([coords, bottom_right], outline="red", width=1)
+
+ # Draw index number at the bottom left of the bounding box with black background
+ text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right
+ text_bbox: Tuple[int, int, int, int] = draw.textbbox(text_position, str(index), font=font, anchor="lb")
+ # offset: int = bottom_right[1]-text_bbox[3]
+ # text_bbox = (text_bbox[0], text_bbox[1]+offset, text_bbox[2], text_bbox[3]+offset)
+
+ # draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
+ draw.rectangle(text_bbox, fill="black")
+ draw.text(text_position, str(index), font=font, anchor="lb", fill="white")
+
+ # each mark is an x, y, w, h tuple
+ marks.append([original_coords[0], original_coords[1], original_size[0], original_size[1]])
+ drew_nodes.append(_node)
+
+ if _node.text:
+ node_text = _node.text if '"' not in _node.text else '"{:}"'.format(_node.text.replace('"', '""'))
+ elif _node.get("{{{:}}}class".format(class_ns_windows), "").endswith("EditWrapper") and _node.get(
+ "{{{:}}}value".format(_value_ns)
+ ):
+ node_text = _node.get("{{{:}}}value".format(_value_ns), "")
+ node_text = node_text if '"' not in node_text else '"{:}"'.format(node_text.replace('"', '""'))
+ else:
+ node_text = '""'
+ text_information: str = "{:d}\t{:}\t{:}\t{:}".format(index, _node.tag, _node.get("name", ""), node_text)
+ text_informations.append(text_information)
+
+ index += 1
+
+ except ValueError:
+ pass
+
+ output_image_stream = io.BytesIO()
+ image.save(output_image_stream, format="PNG")
+ image_content = output_image_stream.getvalue()
+
+ return marks, drew_nodes, "\n".join(text_informations), image_content
+
+
+def print_nodes_with_indent(nodes, indent=0):
+ for node in nodes:
+ print(" " * indent, node.tag, node.attrib)
+ print_nodes_with_indent(node, indent + 2)
+
+
+def find_active_applications(tree, state_ns):
+ apps_with_active_tag = []
+ for application in list(tree.getroot()):
+ app_name = application.attrib.get("name")
+ for frame in application:
+ is_active = frame.attrib.get("{{{:}}}active".format(state_ns), "false")
+ if is_active == "true":
+ apps_with_active_tag.append(app_name)
+ if apps_with_active_tag:
+ to_keep = apps_with_active_tag + ["gnome-shell"]
+ else:
+ to_keep = ["gjs", "gnome-shell"]
+ return to_keep
+
+
+def linearize_accessibility_tree(accessibility_tree, platform="Ubuntu"):
+ if platform == "Ubuntu":
+ _attributes_ns = attributes_ns_ubuntu
+ _state_ns = state_ns_ubuntu
+ _component_ns = component_ns_ubuntu
+ _value_ns = value_ns_ubuntu
+ elif platform == "Windows":
+ _attributes_ns = attributes_ns_windows
+ _state_ns = state_ns_windows
+ _component_ns = component_ns_windows
+ _value_ns = value_ns_windows
+ else:
+ raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+ try:
+ tree = ET.ElementTree(ET.fromstring(accessibility_tree))
+ keep_apps = find_active_applications(tree, _state_ns)
+
+ # Remove inactive applications
+ for application in list(tree.getroot()):
+ if application.get("name") not in keep_apps:
+ tree.getroot().remove(application)
+
+ filtered_nodes = filter_nodes(tree.getroot(), platform, check_image=True)
+ linearized_accessibility_tree = ["tag\ttext\tposition (center x & y)\tsize (w & h)"]
+
+ # Linearize the accessibility tree nodes into a table format
+ for node in filtered_nodes:
+ try:
+ text = node.text if node.text is not None else ""
+ text = text.strip()
+ name = node.get("name", "").strip()
+ if text == "":
+ text = name
+ elif name != "" and text != name:
+ text = f"{name} ({text})"
+
+ text = text.replace("\n", "\\n")
+ pos = node.get("{{{:}}}screencoord".format(_component_ns), "")
+ size = node.get("{{{:}}}size".format(_component_ns), "")
+
+ x, y = re.match(f"\((\d+), (\d+)\)", pos).groups()
+ w, h = re.match(f"\((\d+), (\d+)\)", size).groups()
+ x_mid, y_mid = int(x) + int(w) // 2, int(y) + int(h) // 2
+
+ linearized_accessibility_tree.append(
+ "{:}\t{:}\t{:}\t{:}".format(node.tag, text, f"({x_mid}, {y_mid})", size)
+ )
+ except Exception as e:
+ continue
+
+ # Filter out similar nodes
+ linearized_accessibility_tree = filter_similar_nodes("\n".join(linearized_accessibility_tree))
+ except Exception as e:
+ print(f"Error in linearize_accessibility_tree: {e}")
+ linearized_accessibility_tree = ""
+
+ return linearized_accessibility_tree
+
+
+def trim_accessibility_tree(linearized_accessibility_tree, max_items):
+ lines = linearized_accessibility_tree.strip().split("\n")
+ if len(lines) > max_items:
+ lines = lines[:max_items]
+ linearized_accessibility_tree = "\n".join(lines)
+ linearized_accessibility_tree += "\n..."
+ return linearized_accessibility_tree
diff --git a/mm_agents/autoglm_v/prompt/deduplicate_node.py b/mm_agents/autoglm_v/prompt/deduplicate_node.py
new file mode 100644
index 00000000..824a2e1f
--- /dev/null
+++ b/mm_agents/autoglm_v/prompt/deduplicate_node.py
@@ -0,0 +1,100 @@
+import re
+
+
+def parse_line(line):
+ # 解析格式,如:label Google Chrome (191, 13) (104, 17)
+ pattern = r"^(\S+)\s+(.+?)\s+\((\d+), (\d+)\)\s+\((\d+), (\d+)\)"
+ m = re.match(pattern, line)
+ if not m:
+ return None
+ node_type, text, cx, cy, w, h = m.groups()
+ cx, cy, w, h = map(int, (cx, cy, w, h))
+ # bounding box as (x1, y1, x2, y2)
+ x1 = cx - w // 2
+ y1 = cy - h // 2
+ x2 = x1 + w
+ y2 = y1 + h
+ return {
+ "type": node_type,
+ "text": text.strip(),
+ "bbox": (x1, y1, x2, y2),
+ "center": (cx, cy),
+ "size": (w, h),
+ "raw": line,
+ }
+
+
+def iou(box1, box2):
+ # box: (x1, y1, x2, y2)
+ xi1 = max(box1[0], box2[0])
+ yi1 = max(box1[1], box2[1])
+ xi2 = min(box1[2], box2[2])
+ yi2 = min(box1[3], box2[3])
+ inter_width = max(0, xi2 - xi1)
+ inter_height = max(0, yi2 - yi1)
+ inter_area = inter_width * inter_height
+ area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+ union = area1 + area2 - inter_area
+ if union == 0:
+ return 0
+ return inter_area / union
+
+
+def norm_text(s):
+ # 归一化文本:小写、去空格等
+ return re.sub(r"\s+", "", s.lower())
+
+
+def text_similarity(a, b):
+ # 简单判定:完全一致为1,否则0
+ na, nb = norm_text(a), norm_text(b)
+ if na == nb:
+ return 1.0
+ else:
+ return 0
+
+
+def filter_similar_nodes(linearized_accessibility_tree):
+ lines = [ln for ln in linearized_accessibility_tree.split("\n") if ln.strip()]
+ # parse all nodes
+ nodes = []
+ for ln in lines:
+ node = parse_line(ln)
+ if node:
+ nodes.append(node)
+ else:
+ # 解析不了的保留
+ nodes.append({"raw": ln, "invalid": True})
+ filtered = []
+ removed = [False] * len(nodes)
+ # 阈值可自行调整
+ IOU_THRESH = 0.2
+ TEXT_THRESH = 0.9
+ for i, ni in enumerate(nodes):
+ if ni.get("invalid"):
+ filtered.append(ni["raw"])
+ continue
+ if removed[i]:
+ continue
+ for j in range(i + 1, len(nodes)):
+ nj = nodes[j]
+ if nj.get("invalid"):
+ continue
+ iou_val = iou(ni["bbox"], nj["bbox"])
+ text_sim = text_similarity(ni["text"], nj["text"])
+ if iou_val > IOU_THRESH and text_sim > TEXT_THRESH:
+ # 二者极其相似,移除后者
+ removed[j] = True
+ # print(f"移除: {nj['raw']} (与 {ni['raw']} 相似度高)")
+ # 保留未被标记为移除的
+ if not removed[i]:
+ filtered.append(ni["raw"])
+ return "\n".join(filtered)
+
+
+# 示例用法
+if __name__ == "__main__":
+ linearized_accessibility_tree = "tag\ttext\tposition (center x & y)\tsize (w & h)\nicon\t\t(1853, 1001)\t(64, 64)\nlabel\tHome\t(1853, 1045)\t(40, 17)\nlabel\tActivities\t(49, 13)\t(63, 17)\ntext\tActivities\t(49, 13)\t(63, 17)\nlabel\tApr 17 17∶04\t(995, 13)\t(117, 27)\ntext\tApr 17 17∶04\t(995, 13)\t(87, 18)\nmenu\tSystem\t(1867, 13)\t(106, 27)\npush-button\tGoogle Chrome\t(35, 65)\t(70, 64)\npush-button\tThunderbird Mail\t(35, 133)\t(70, 64)\npush-button\tVisual Studio Code\t(35, 201)\t(70, 64)\npush-button\tVLC media player\t(35, 269)\t(70, 64)\npush-button\tLibreOffice Writer\t(35, 337)\t(70, 64)\npush-button\tLibreOffice Calc\t(35, 405)\t(70, 64)\npush-button\tLibreOffice Impress\t(35, 473)\t(70, 64)\npush-button\tGNU Image Manipulation Program\t(35, 541)\t(70, 64)\npush-button\tFiles\t(35, 609)\t(70, 64)\npush-button\tUbuntu Software\t(35, 677)\t(70, 64)\npush-button\tHelp\t(35, 745)\t(70, 64)\npush-button\tTrash\t(35, 816)\t(70, 64)\ntoggle-button\tShow Applications\t(35, 1045)\t(70, 70)"
+ result = filter_similar_nodes(linearized_accessibility_tree)
+ print(result)
diff --git a/mm_agents/autoglm_v/prompt/grounding_agent.py b/mm_agents/autoglm_v/prompt/grounding_agent.py
new file mode 100644
index 00000000..e29c7513
--- /dev/null
+++ b/mm_agents/autoglm_v/prompt/grounding_agent.py
@@ -0,0 +1,260 @@
+import base64
+import json
+import logging
+import os
+import xml.etree.ElementTree as ET
+from typing import Dict, List, Optional, Tuple
+
+logger = logging.getLogger("desktopenv.agent")
+
+
+def agent_action(func):
+ func.is_agent_action = True
+ return func
+
+
+switch_window_code = """import subprocess;
+import pyautogui;
+pyautogui.press('escape');
+time.sleep(0.5);
+subprocess.run(['wmctrl', '-ia', 'WINDOW_ID'])
+subprocess.run(['wmctrl', '-ir', 'WINDOW_ID', '-b', 'add,maximized_vert,maximized_horz'])
+print('Switch to WINDOW_ID')"""
+
+launch_app_commands = {
+ # Web Browser
+ "chrome": "google-chrome --remote-debugging-port=1337",
+ # File Manager
+ "files": "nautilus",
+ # Terminal
+ "terminal": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-terminal',
+ # Utilities
+ "gedit": "gedit",
+ # Office
+ "libreoffice writer": "libreoffice --writer",
+ "libreoffice calc": "libreoffice --calc",
+ "libreoffice impress": "libreoffice --impress",
+ # System
+ "settings": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-control-center',
+ # Multimedia
+ "vlc": "vlc",
+ "gimp": "gimp",
+ # IDE
+ "vs code": "code",
+ # Email
+ "thunderbird": "thunderbird",
+}
+
+
+class GroundingAgent:
+
+ tool_list = {
+ "libreoffice_calc": "CalcTools",
+ "libreoffice_impress": "ImpressTools",
+ "libreoffice_writer": "WriterTools",
+ "code": "CodeTools",
+ "vlc": "VLCTools",
+ "google_chrome": "BrowserTools",
+ }
+
+ relative_coordinate = True # whether the coordinates are relative (0-1000) or absolute (e.g. 1920x1080)
+
+ @classmethod
+ def tool_commands(cls, code: str, tool_name: str):
+ command = f"from {tool_name} import *; "
+ command += code
+
+ tool_class = cls.tool_list[tool_name]
+ command += f"; {tool_class}.print_result()"
+
+ return [
+ command,
+ ]
+
+ @classmethod
+ @agent_action
+ def click(
+ cls,
+ coordinate: List,
+ num_clicks: int = 1,
+ button_type: str = "left",
+ ):
+ """
+ Click on the element
+
+ Args:
+ coordinate (List): [x, y], coordinate of the element to click on
+ num_clicks (int): number of times to click the element
+ button_type (str): which mouse button to press ("left", "middle", or "right")
+ """
+ command = ""
+ x, y = coordinate
+ if cls.relative_coordinate:
+ x, y = round(x * 1920 / 1000), round(y * 1080 / 1000)
+ command += f"""pyautogui.click({x}, {y}, clicks={num_clicks}, button={repr(button_type)}); print("Click Success")""" # TODO: 最大化窗口需要一次调用
+ return command
+
+ @classmethod
+ @agent_action
+ def type(
+ cls,
+ coordinate: Optional[List] = None,
+ text: str = "",
+ overwrite: bool = False,
+ enter: bool = False,
+ ):
+ """
+ Type text into the element
+
+ Args:
+ coordinate (List): [x, y], coordinate of the element to type into. If None, typing starts at current cursor location
+ text (str): the text to type
+ overwrite (bool): True to overwrite existing text, False otherwise
+ enter (bool): True to press enter after typing, False otherwise
+ """
+
+ command = ""
+
+ if coordinate is not None:
+ # Start typing at the center of the element
+ x, y = coordinate
+ if cls.relative_coordinate:
+ x, y = round(x * 1920 / 1000), round(y * 1080 / 1000)
+ command += f"pyautogui.click({x}, {y}); "
+
+ if overwrite:
+ command += f"pyautogui.hotkey('ctrl', 'a'); pyautogui.press('backspace'); "
+
+ command += f"pyautogui.write({repr(text)}); "
+
+ if enter:
+ command += "pyautogui.press('enter'); "
+
+ command += "print('Type Success')"
+
+ return command
+
+ @classmethod
+ @agent_action
+ def drag_and_drop(cls, drag_from_coordinate: List, drop_on_coordinate: List):
+ """
+ Drag element1 and drop it on element2
+
+ Args:
+ drag_from_coordinate (List): [x, y], coordinate of element to drag
+ drop_on_coordinate (List): [x, y], coordinate of element to drop on
+ """
+ x1, y1 = drag_from_coordinate
+ if cls.relative_coordinate:
+ x1, y1 = round(x1 * 1920 / 1000), round(y1 * 1080 / 1000)
+ x2, y2 = drop_on_coordinate
+ if cls.relative_coordinate:
+ x2, y2 = round(x2 * 1920 / 1000), round(y2 * 1080 / 1000)
+
+ command = f"pyautogui.moveTo({x1}, {y1}); "
+ # TODO: specified duration?
+ command += f"pyautogui.dragTo({x2}, {y2}, duration=1.); pyautogui.mouseUp(); "
+
+ command += "print('Drag and Drop Success')"
+
+ return command
+
+ @classmethod
+ @agent_action
+ def scroll(cls, coordinate: List, direction: str):
+ """
+ Scroll the element in the specified direction
+
+ Args:
+ coordinate (List): [x, y], coordinate of the element to scroll in
+ direction (str): the direction to scroll ("up" or "down")
+ """
+ x, y = coordinate
+ if cls.relative_coordinate:
+ x, y = round(x * 1920 / 1000), round(y * 1080 / 1000)
+ amount = 100 if direction == "up" else -100
+ return f"import pyautogui; pyautogui.moveTo({x}, {y}); pyautogui.scroll({amount}); print('Scroll Success')"
+
+ @classmethod
+ @agent_action
+ def open_app(cls, app_name: str):
+ """
+ Open a specified application
+
+ Supported apps: chrome, files, terminal, gedit, libreoffice writer,
+ libreoffice calc, libreoffice impress, vs code, vlc, gimp, settings, thunderbird
+
+ Args:
+ app_name (str): name of the application to open
+ """
+
+ app_name = app_name.lower().strip()
+
+ if app_name not in launch_app_commands:
+ command = f"print(f'{app_name} is not supported or recognized')"
+ else:
+ command = {
+ "action_type": "OPEN_APP",
+ "parameters": {"launch_app_command": launch_app_commands[app_name], "app_name": app_name},
+ }
+
+ return command
+
+ @classmethod
+ @agent_action
+ def switch_window(cls, window_id: str):
+ """
+ Switch to the window with the given window id
+
+ Args:
+ window_id (str): the window id to switch to from the provided list of open windows
+ """
+ return switch_window_code.replace("WINDOW_ID", window_id)
+
+ @classmethod
+ @agent_action
+ def hotkey(cls, keys: List):
+ """
+ Press a hotkey combination
+
+ Args:
+ keys (List): the keys to press in combination (e.g. ['ctrl', 'c'] for copy, ['prtsc'] for screenshot)
+ """
+ # add quotes around the keys
+ keys = [f"'{key}'" for key in keys]
+ key_str = ", ".join(keys).replace("'", "\\'")
+ return f"import pyautogui; pyautogui.hotkey({', '.join(keys)}); print(f'Press Hotkey: {key_str}')"
+
+ @classmethod
+ @agent_action
+ def quote(cls, content: str):
+ """
+ Quote information from the current page for memory
+
+ Args:
+ content (str): text summarized or copied from the page for later operation
+ """
+ return f'''print("""{content}""")'''
+
+ @classmethod
+ @agent_action
+ def wait(cls):
+ """
+ Wait for a while
+
+ """
+ return "WAIT"
+
+ @classmethod
+ @agent_action
+ def exit(cls, success: bool):
+ """
+ End the current task
+
+ Args:
+ success (bool): True if successfully finish a task, False otherwise
+ """
+ if success:
+ return "DONE"
+ else:
+ return "FAIL"
diff --git a/mm_agents/autoglm_v/prompt/procedural_memory.py b/mm_agents/autoglm_v/prompt/procedural_memory.py
new file mode 100644
index 00000000..2003b5a2
--- /dev/null
+++ b/mm_agents/autoglm_v/prompt/procedural_memory.py
@@ -0,0 +1,194 @@
+import inspect
+import json
+import os
+import textwrap
+
+current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def generate_func(json_data):
+ # 收集所有类名和它们的函数
+ class_funcs = {}
+ no_class_funcs = []
+ cls_name = ""
+
+ for item in json_data:
+ if item["type"] == "function":
+ func = item["function"]
+ func_parts = func["name"].split(".")
+
+ if len(func_parts) == 2:
+ class_name, func_name = func_parts
+ if class_name not in class_funcs:
+ class_funcs[class_name] = []
+ class_funcs[class_name].append(item)
+ else:
+ no_class_funcs.append(item)
+
+ code = ""
+
+ # 生成有类的函数
+ for class_name, funcs in class_funcs.items():
+ code += f"class {class_name}:\n"
+ cls_name = class_name
+ for item in funcs:
+ func = item["function"]
+ func_name = func["name"].split(".")[-1]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = ["cls"]
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}") # 可选参数默认值设为None
+
+ # 构建函数定义
+ func_def = f" def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if len(param_list) == 1: # 只有cls参数
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ code += "\n"
+
+ # 生成没有类的函数
+ for item in no_class_funcs:
+ func = item["function"]
+ func_name = func["name"]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = []
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}")
+
+ # 构建函数定义
+ func_def = f"def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if not param_list:
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ return code.strip(), cls_name
+
+
+setup_prompt = """You are a GUI operation agent. You will be given a task and your action history, with current observation ({observation_list}). You should help me control the computer, output the best action step by step to accomplish the task.
+You should first generate a plan, reflect on the current observation, then generate actions to complete the task in python-style pseudo code using the predefined functions.
+
+* Output Format:
+{format_hint}"""
+
+func_def_template = """* Available Functions:
+```python
+{class_content}
+```"""
+
+note_prompt = """* Note:
+- Your code should only be wrapped in ```python```.
+- Only **ONE-LINE-OF-CODE** at a time.
+- Each code block is context independent, and variables from the previous round cannot be used in the next round.
+{relative_coordinate_hint}- Return with `Agent.exit(success=True)` immediately after the task is completed.
+- The computer's environment is Linux, e.g., Desktop path is '/home/user/Desktop'
+- My computer's password is '{client_password}', feel free to use it when you need sudo rights"""
+
+
+class Prompt:
+ @staticmethod
+ def construct_procedural_memory(agent_class, app_name=None, client_password="password", with_image=True, with_atree=False, relative_coordinate=True, glm41v_format=True):
+ agent_class_content = "Class Agent:"
+ for attr_name in dir(agent_class):
+ attr = getattr(agent_class, attr_name)
+ if callable(attr) and hasattr(attr, "is_agent_action"):
+ # Use inspect to get the full function signature
+ signature = inspect.signature(attr)
+ agent_class_content += f"""
+ def {attr_name}{signature}:
+ '''{attr.__doc__}'''
+ """
+
+ if app_name is not None:
+ tool_path = os.path.join(current_dir, "tools", "apis", f"{app_name.lower()}.json")
+ with open(tool_path, "r") as f:
+ json_data = json.load(f)
+
+ tool_class_content, tool_class_name = generate_func(json_data)
+
+ agent_class_content += "\n\n{}".format(tool_class_content)
+
+ func_def_prompt = func_def_template.format(class_content=agent_class_content.strip())
+
+ # --- dynamic observation list ---
+ obs_items = []
+ if with_image:
+ obs_items.append("screenshot")
+ obs_items.append("current app name")
+ if with_atree:
+ obs_items.append("a11y tree (based on AT-SPI library)")
+ obs_items.append("app info")
+ obs_items.append("last action result")
+ observation_list = ", ".join(obs_items)
+
+ setup_prompt_formatted = setup_prompt.format(
+ observation_list=observation_list,
+ format_hint="\n{**YOUR-PLAN-AND-THINKING**}\n```python\n{**ONE-LINE-OF-CODE**}\n```" if glm41v_format else "\n{**YOUR-PLAN-AND-THINKING**}\n\n```python\n{**ONE-LINE-OF-CODE**}\n```"
+ )
+
+ note_prompt_formatted = note_prompt.format(
+ relative_coordinate_hint="- The coordinate [x, y] should be normalized to 0-1000, which usually should be the center of a specific target element.\n" if relative_coordinate else "",
+ client_password=client_password
+ )
+
+ return setup_prompt_formatted, func_def_prompt, note_prompt_formatted
+
+
+if __name__ == "__main__":
+ from grounding_agent import GroundingAgent
+
+ print(Prompt.construct_procedural_memory(GroundingAgent, "vlc"))
diff --git a/mm_agents/autoglm_v/tools/apis/__init__.py b/mm_agents/autoglm_v/tools/apis/__init__.py
new file mode 100644
index 00000000..a43137a8
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/__init__.py
@@ -0,0 +1,3 @@
+from .func import generate_func
+
+__all__ = ["generate_func"]
diff --git a/mm_agents/autoglm_v/tools/apis/code.json b/mm_agents/autoglm_v/tools/apis/code.json
new file mode 100644
index 00000000..8af9510a
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/code.json
@@ -0,0 +1,236 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.launch_vscode",
+ "description": "Launch VS Code with specified path",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "File path or directory to open"
+ }
+ },
+ "required": ["path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.compare_files",
+ "description": "Compare two files in VS Code",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file1": {
+ "type": "string",
+ "description": "First file path"
+ },
+ "file2": {
+ "type": "string",
+ "description": "Second file path"
+ }
+ },
+ "required": ["file1", "file2"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.add_folder",
+ "description": "Add folder to active VS Code window",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "folder": {
+ "type": "string",
+ "description": "Folder path to add"
+ }
+ },
+ "required": ["folder"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.goto_file",
+ "description": "Open file at specific position",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "File path to open"
+ },
+ "line": {
+ "type": "integer",
+ "description": "Line number",
+ "default": 1
+ },
+ "character": {
+ "type": "integer",
+ "description": "Character position",
+ "default": 1
+ }
+ },
+ "required": ["file_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.perform_merge",
+ "description": "Perform three-way merge",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path1": {
+ "type": "string",
+ "description": "First version file path"
+ },
+ "path2": {
+ "type": "string",
+ "description": "Second version file path"
+ },
+ "base": {
+ "type": "string",
+ "description": "Base version file path"
+ },
+ "result": {
+ "type": "string",
+ "description": "Output file path"
+ }
+ },
+ "required": ["path1", "path2", "base", "result"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.remove_folder",
+ "description": "Remove folder from active VS Code window",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "folder": {
+ "type": "string",
+ "description": "Folder path to remove"
+ }
+ },
+ "required": ["folder"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.install_extension",
+ "description": "Install or update VS Code extension",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "extension_id": {
+ "type": "string",
+ "description": "Extension identifier"
+ },
+ "pre_release": {
+ "type": "boolean",
+ "description": "Install pre-release version",
+ "default": false
+ }
+ },
+ "required": ["extension_id"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.uninstall_extension",
+ "description": "Uninstall VS Code extension",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "extension_id": {
+ "type": "string",
+ "description": "Extension identifier"
+ }
+ },
+ "required": ["extension_id"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.list_extensions",
+ "description": "List installed extensions",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "show_versions": {
+ "type": "boolean",
+ "description": "Show extension versions",
+ "default": false
+ },
+ "category": {
+ "type": "string",
+ "description": "Filter by category"
+ }
+ }
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.update_extensions",
+ "description": "Update all extensions to latest version",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.disable_extension",
+ "description": "Disable extension for next VS Code instance",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "extension_id": {
+ "type": "string",
+ "description": "Extension identifier"
+ }
+ },
+ "required": ["extension_id"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CodeTools.toggle_sync",
+ "description": "Toggle VS Code synchronization",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "state": {
+ "type": "string",
+ "description": "Sync state",
+ "enum": ["on", "off"]
+ }
+ },
+ "required": ["state"]
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm_v/tools/apis/func.py b/mm_agents/autoglm_v/tools/apis/func.py
new file mode 100644
index 00000000..84ee5480
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/func.py
@@ -0,0 +1,117 @@
+def generate_func(json_data):
+ # 收集所有类名和它们的函数
+ class_funcs = {}
+ no_class_funcs = []
+
+ for item in json_data:
+ if item["type"] == "function":
+ func = item["function"]
+ func_parts = func["name"].split(".")
+
+ if len(func_parts) == 2:
+ class_name, func_name = func_parts
+ if class_name not in class_funcs:
+ class_funcs[class_name] = []
+ class_funcs[class_name].append(item)
+ else:
+ no_class_funcs.append(item)
+
+ code = ""
+
+ # 生成有类的函数
+ for class_name, funcs in class_funcs.items():
+ code += f"class {class_name}:\n"
+ for item in funcs:
+ func = item["function"]
+ func_name = func["name"].split(".")[-1]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = ["cls"]
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}") # 可选参数默认值设为None
+
+ # 构建函数定义
+ func_def = f" def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if len(param_list) == 1: # 只有cls参数
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ code += "\n"
+
+ # 生成没有类的函数
+ for item in no_class_funcs:
+ func = item["function"]
+ func_name = func["name"]
+ description = func["description"]
+ params = func["parameters"]["properties"]
+ required = func["parameters"].get("required", [])
+
+ # 构建参数列表
+ param_list = []
+ # 首先添加必需参数
+ for param_name in required:
+ param_list.append(f"{param_name}")
+ # 然后添加可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_list.append(f"{param_name}")
+
+ # 构建函数定义
+ func_def = f"def {func_name}({', '.join(param_list)}):\n"
+
+ # 构建文档字符串
+ docstring = f' """\n {description}\n\n Args:\n'
+ if not param_list:
+ docstring += " None\n"
+ else:
+ # 首先记录必需参数
+ for param_name in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}): {param_desc}\n"
+ # 然后记录可选参数
+ for param_name in params:
+ if param_name not in required:
+ param_type = params[param_name]["type"]
+ param_desc = params[param_name].get("description", "")
+ docstring += f" {param_name} ({param_type}, optional): {param_desc}\n"
+
+ docstring += ' """\n'
+
+ code += func_def + docstring + "\n"
+
+ return code.strip()
+
+
+if __name__ == "__main__":
+ import json
+
+ with open("libreoffice_calc.json", "r") as f:
+ json_data = json.load(f)
+ print(generate_func(json_data))
diff --git a/mm_agents/autoglm_v/tools/apis/google_chrome.json b/mm_agents/autoglm_v/tools/apis/google_chrome.json
new file mode 100644
index 00000000..f7ae26a6
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/google_chrome.json
@@ -0,0 +1,134 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_profile_settings",
+ "description": "Opens profile settings page.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_password_settings",
+ "description": "Opens password/autofill settings page.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_privacy_settings",
+ "description": "Opens privacy settings page.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_appearance_settings",
+ "description": "Opens appearance settings page.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_search_engine_settings",
+ "description": "Opens search engine settings page.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.bring_back_last_tab",
+ "description": "Restores last-closed tab (Ctrl+Shift+T).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.print",
+ "description": "Opens print dialog (Ctrl+P).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.delete_browsing_data",
+ "description": "Opens clear browsing data dialog (Ctrl+Shift+Del).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_extensions",
+ "description": "Opens extensions management page.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.bookmark_page",
+ "description": "Bookmarks current page (Ctrl+D).",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "BrowserTools.open_bookmarks",
+ "description": "Opens bookmarks page.",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json b/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json
new file mode 100644
index 00000000..90fe4e79
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json
@@ -0,0 +1,634 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.get_workbook_info",
+ "description": "Get workbook info: file path, name, sheets, and active sheet",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.save",
+ "description": "Save workbook to current location",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.get_column_data",
+ "description": "Get all data from specified column",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "Column name (e.g. 'A', 'B')"
+ }
+ },
+ "required": [
+ "column_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.switch_active_sheet",
+ "description": "Switch to sheet (creates if not exists)",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "sheet_name": {
+ "type": "string",
+ "description": "Sheet name"
+ }
+ },
+ "required": [
+ "sheet_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_column_values",
+ "description": "Set values to column (values only, not formulas)",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "Column name (e.g. 'A', 'B')"
+ },
+ "data": {
+ "type": "array",
+ "description": "Values to write"
+ },
+ "start_index": {
+ "type": "integer",
+ "description": "First row index (default: 2)"
+ }
+ },
+ "required": [
+ "column_name",
+ "data"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.highlight_range",
+ "description": "Highlight range with color",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Range (e.g. 'A1:B10')"
+ },
+ "color": {
+ "type": "integer",
+ "description": "Color value (default: 0xFF0000)"
+ }
+ },
+ "required": [
+ "range_str"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.transpose_range",
+ "description": "Transpose range and paste to target cell",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "source_range": {
+ "type": "string",
+ "description": "Source range (e.g. 'A1:B10')"
+ },
+ "target_cell": {
+ "type": "string",
+ "description": "Target cell (e.g. 'A1')"
+ }
+ },
+ "required": [
+ "source_range",
+ "target_cell"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.export_to_csv",
+ "description": "Export to CSV with same path/name",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.sort_column",
+ "description": "Sort column data",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "Column name (e.g. 'A', 'B')"
+ },
+ "ascending": {
+ "type": "boolean",
+ "description": "Sort ascending (default: true)"
+ },
+ "start_index": {
+ "type": "integer",
+ "description": "First row index (default: 2)"
+ }
+ },
+ "required": [
+ "column_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_validation_list",
+ "description": "Set validation list for column",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_name": {
+ "type": "string",
+ "description": "Column name (e.g. 'A', 'B')"
+ },
+ "values": {
+ "type": "array",
+ "description": "Validation values"
+ }
+ },
+ "required": [
+ "column_name",
+ "values"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.hide_row_data",
+ "description": "Hide rows containing value",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "value": {
+ "type": "string",
+ "description": "Value to hide (default: 'N/A')"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.reorder_columns",
+ "description": "Reorder columns by specified order",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "column_order": {
+ "type": "array",
+ "description": "Column names in desired order (e.g. ['A', 'B', 'C'])"
+ }
+ },
+ "required": [
+ "column_order"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.create_pivot_table",
+ "description": "Create pivot table from source sheet",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "source_sheet": {
+ "type": "string",
+ "description": "Source sheet name"
+ },
+ "table_name": {
+ "type": "string",
+ "description": "Pivot table name"
+ },
+ "row_fields": {
+ "type": "array",
+ "description": "Row labels (e.g. ['A', 'B'])"
+ },
+ "col_fields": {
+ "type": "array",
+ "description": "Column labels (e.g. ['A', 'B'])"
+ },
+ "value_fields": {
+ "type": "array",
+ "description": "Value fields (e.g. ['A', 'B'])"
+ },
+ "aggregation_function": {
+ "type": "string",
+ "description": "Aggregation function (sum, count, average, min, max)"
+ },
+ "target_cell": {
+ "type": "string",
+ "description": "Target cell (default: 'A1')"
+ }
+ },
+ "required": [
+ "source_sheet",
+ "table_name",
+ "value_fields"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.merge_cells",
+ "description": "Merge cells in range",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Cell range (e.g. 'A1:B10')"
+ }
+ },
+ "required": [
+ "range_str"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_cell_value",
+ "description": "Set cell value",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "cell": {
+ "type": "string",
+ "description": "Cell reference (e.g. 'A1')"
+ },
+ "value": {
+ "type": "string",
+ "description": "Cell value"
+ }
+ },
+ "required": [
+ "cell",
+ "value"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.format_range",
+ "description": "Apply formatting to range",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Range (e.g. 'A1:B10')"
+ },
+ "background_color": {
+ "type": "string",
+ "description": "Background color (e.g. '#0000ff')"
+ },
+ "font_color": {
+ "type": "string",
+ "description": "Font color (e.g. '#ffffff')"
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Bold text"
+ },
+ "alignment": {
+ "type": "string",
+ "description": "Text alignment (left, center, right)"
+ }
+ },
+ "required": [
+ "range_str"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.create_chart",
+ "description": "Create chart from data range",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "chart_type": {
+ "type": "string",
+ "description": "Chart type (bar, column, line, pie, scatter, area)"
+ },
+ "data_range": {
+ "type": "string",
+ "description": "Data range (e.g. 'A1:B10')"
+ },
+ "title": {
+ "type": "string",
+ "description": "Chart title"
+ },
+ "x_axis_title": {
+ "type": "string",
+ "description": "X axis title"
+ },
+ "y_axis_title": {
+ "type": "string",
+ "description": "Y axis title"
+ }
+ },
+ "required": [
+ "chart_type",
+ "data_range"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.freeze_panes",
+ "description": "Freeze rows/columns",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "rows": {
+ "type": "integer",
+ "description": "Rows to freeze from top"
+ },
+ "columns": {
+ "type": "integer",
+ "description": "Columns to freeze from left"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.rename_sheet",
+ "description": "Rename worksheet",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "old_name": {
+ "type": "string",
+ "description": "Current sheet name"
+ },
+ "new_name": {
+ "type": "string",
+ "description": "New sheet name"
+ }
+ },
+ "required": [
+ "old_name",
+ "new_name"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.copy_sheet",
+ "description": "Copy worksheet",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "source_sheet": {
+ "type": "string",
+ "description": "Source sheet name"
+ },
+ "new_sheet_name": {
+ "type": "string",
+ "description": "New sheet name (optional)"
+ }
+ },
+ "required": [
+ "source_sheet"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.reorder_sheets",
+ "description": "Change sheet order",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "sheet_name": {
+ "type": "string",
+ "description": "Sheet to move"
+ },
+ "position": {
+ "type": "integer",
+ "description": "New position (0-based)"
+ }
+ },
+ "required": [
+ "sheet_name",
+ "position"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_chart_legend_position",
+ "description": "Set chart legend position",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "position": {
+ "type": "string",
+ "description": "Legend position (top, bottom, left, right, none)"
+ }
+ },
+ "required": [
+ "position"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_number_format",
+ "description": "Apply number format to range",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "range_str": {
+ "type": "string",
+ "description": "Range (e.g. 'A1:B10')"
+ },
+ "format_type": {
+ "type": "string",
+ "description": "Format type (general, number, currency, accounting, date, time, percentage, fraction, scientific, text)"
+ },
+ "decimal_places": {
+ "type": "integer",
+ "description": "Decimal places (optional)"
+ }
+ },
+ "required": [
+ "range_str",
+ "format_type"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.adjust_column_width",
+ "description": "Adjust column width",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "columns": {
+ "type": "string",
+ "description": "Column range (e.g. 'A:C')"
+ },
+ "width": {
+ "type": "number",
+ "description": "Width in characters"
+ },
+ "autofit": {
+ "type": "boolean",
+ "description": "Autofit to content"
+ }
+ },
+ "required": [
+ "columns"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.adjust_row_height",
+ "description": "Adjust row height",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "rows": {
+ "type": "string",
+ "description": "Row range (e.g. '1:10')"
+ },
+ "height": {
+ "type": "number",
+ "description": "Height in points"
+ },
+ "autofit": {
+ "type": "boolean",
+ "description": "Autofit to content"
+ }
+ },
+ "required": [
+ "rows"
+ ]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.export_to_pdf",
+ "description": "Export to PDF",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "PDF save path (default: same as original)"
+ },
+ "sheets": {
+ "type": "array",
+ "description": "Sheets to include (default: all)"
+ },
+ "open_after_export": {
+ "type": "boolean",
+ "description": "Open PDF after export (default: false)"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "CalcTools.set_zoom_level",
+ "description": "Set worksheet zoom level",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "zoom_percentage": {
+ "type": "integer",
+ "description": "Zoom percentage (10-400)"
+ }
+ },
+ "required": [
+ "zoom_percentage"
+ ]
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json b/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json
new file mode 100644
index 00000000..524e87b8
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json
@@ -0,0 +1,559 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.save",
+ "description": "Save current presentation",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.go_to_slide",
+ "description": "Navigate to specific slide",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ }
+ },
+ "required": ["slide_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.get_slide_count",
+ "description": "Get total slide count",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.duplicate_slide",
+ "description": "Duplicate slide and place at end",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index to duplicate (1-based)"
+ }
+ },
+ "required": ["slide_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_font",
+ "description": "Set font for all text in slide",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "font_name": {
+ "type": "string",
+ "description": "Font name (e.g., 'Arial', 'Times New Roman')"
+ }
+ },
+ "required": ["slide_index", "font_name"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.write_text",
+ "description": "Write text to textbox",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "content": {
+ "type": "string",
+ "description": "Text content"
+ },
+ "page_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Textbox index (0-based)"
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Bold text (default: false)"
+ },
+ "italic": {
+ "type": "boolean",
+ "description": "Italic text (default: false)"
+ },
+ "size": {
+ "type": "integer",
+ "description": "Font size"
+ },
+ "append": {
+ "type": "boolean",
+ "description": "Append to existing text (default: false)"
+ }
+ },
+ "required": ["content", "page_index", "box_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_style",
+ "description": "Set text style for textbox",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Textbox index (0-based)"
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Bold text"
+ },
+ "italic": {
+ "type": "boolean",
+ "description": "Italic text"
+ },
+ "underline": {
+ "type": "boolean",
+ "description": "Underline text"
+ }
+ },
+ "required": ["slide_index", "box_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.configure_auto_save",
+ "description": "Configure auto-save settings",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "enabled": {
+ "type": "boolean",
+ "description": "Enable auto-save"
+ },
+ "interval_minutes": {
+ "type": "number",
+ "description": "Auto-save interval in minutes (min: 1)"
+ }
+ },
+ "required": ["enabled", "interval_minutes"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_background_color",
+ "description": "Set textbox background color",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Textbox index (0-based)"
+ },
+ "color": {
+ "type": "string",
+ "description": "Color name or hex code"
+ }
+ },
+ "required": ["slide_index", "box_index", "color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_text_color",
+ "description": "Set text color for textbox",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Textbox index (0-based)"
+ },
+ "color": {
+ "type": "string",
+ "description": "Color name or hex code"
+ }
+ },
+ "required": ["slide_index", "box_index", "color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.delete_content",
+ "description": "Delete textbox from slide",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Textbox index (0-based)"
+ }
+ },
+ "required": ["slide_index", "box_index"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_orientation",
+ "description": "Set slide orientation",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "orientation": {
+ "type": "string",
+ "description": "Slide orientation",
+ "enum": ["portrait", "landscape"]
+ }
+ },
+ "required": ["orientation"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.position_box",
+ "description": "Position textbox or image on slide",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Box index (0-based)"
+ },
+ "position": {
+ "type": "string",
+ "description": "Position on slide",
+ "enum": ["left", "right", "center", "top", "bottom", "top-left", "top-right", "bottom-left", "bottom-right"]
+ }
+ },
+ "required": ["slide_index", "box_index", "position"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.insert_file",
+ "description": "Insert video or audio file",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "File path"
+ },
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "position": {
+ "type": "object",
+ "description": "Position coordinates",
+ "properties": {
+ "x": {
+ "type": "number",
+ "description": "X position (% of slide width)"
+ },
+ "y": {
+ "type": "number",
+ "description": "Y position (% of slide height)"
+ }
+ }
+ },
+ "size": {
+ "type": "object",
+ "description": "Size dimensions",
+ "properties": {
+ "width": {
+ "type": "number",
+ "description": "Width (% of slide width)"
+ },
+ "height": {
+ "type": "number",
+ "description": "Height (% of slide height)"
+ }
+ }
+ },
+ "autoplay": {
+ "type": "boolean",
+ "description": "Auto-play media"
+ }
+ },
+ "required": ["file_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_background",
+ "description": "Set slide background color or image",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based). If not provided, applies to all slides"
+ },
+ "color": {
+ "type": "string",
+ "description": "Background color"
+ },
+ "image_path": {
+ "type": "string",
+ "description": "Background image path (overrides color)"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.save_as",
+ "description": "Save document to specified location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "File save path with filename and extension"
+ },
+ "overwrite": {
+ "type": "boolean",
+ "description": "Overwrite existing file (default: false)"
+ }
+ },
+ "required": ["file_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.insert_image",
+ "description": "Insert image to slide",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "image_path": {
+ "type": "string",
+ "description": "Image file path"
+ },
+ "width": {
+ "type": "number",
+ "description": "Image width in cm"
+ },
+ "height": {
+ "type": "number",
+ "description": "Image height in cm"
+ },
+ "position": {
+ "type": "object",
+ "description": "Position coordinates",
+ "properties": {
+ "x": {
+ "type": "number",
+ "description": "X position (% of slide width)"
+ },
+ "y": {
+ "type": "number",
+ "description": "Y position (% of slide height)"
+ }
+ }
+ }
+ },
+ "required": ["slide_index", "image_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.configure_display_settings",
+ "description": "Configure presentation display settings",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "use_presenter_view": {
+ "type": "boolean",
+ "description": "Use presenter view"
+ },
+ "primary_monitor_only": {
+ "type": "boolean",
+ "description": "Use primary monitor only"
+ },
+ "monitor_for_presentation": {
+ "type": "integer",
+ "description": "Monitor number for presentation"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_slide_number_color",
+ "description": "Set slide number color",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "color": {
+ "type": "string",
+ "description": "Color name or hex code"
+ }
+ },
+ "required": ["color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_text_strikethrough",
+ "description": "Apply strikethrough formatting to text",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Textbox index (0-based)"
+ },
+ "line_numbers": {
+ "type": "array",
+ "items": {
+ "type": "integer"
+ },
+ "description": "Line numbers for strikethrough (1-based)"
+ },
+ "apply": {
+ "type": "boolean",
+ "description": "Apply or remove strikethrough"
+ }
+ },
+ "required": ["slide_index", "box_index", "line_numbers", "apply"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.set_textbox_alignment",
+ "description": "Set text alignment for textbox",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "slide_index": {
+ "type": "integer",
+ "description": "Slide index (1-based)"
+ },
+ "box_index": {
+ "type": "integer",
+ "description": "Textbox index (0-based)"
+ },
+ "alignment": {
+ "type": "string",
+ "description": "Text alignment",
+ "enum": ["left", "center", "right", "justify"]
+ }
+ },
+ "required": ["slide_index", "box_index", "alignment"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "ImpressTools.export_to_image",
+ "description": "Export presentation or slide to image",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "file_path": {
+ "type": "string",
+ "description": "Image save path with filename and extension"
+ },
+ "format": {
+ "type": "string",
+ "description": "Image format",
+ "enum": ["png", "jpeg", "jpg", "gif", "bmp", "tiff"]
+ },
+ "slide_index": {
+ "type": "integer",
+ "description": "Specific slide index (1-based). If not provided, exports all slides"
+ }
+ },
+ "required": ["file_path", "format"]
+ }
+ }
+ }
+]
diff --git a/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json b/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json
new file mode 100644
index 00000000..d23b78e7
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json
@@ -0,0 +1,412 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.save",
+ "description": "Save document to current location",
+ "parameters": {
+ "type": "object",
+ "properties": {},
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.write_text",
+ "description": "Write text at cursor position",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "text": {
+ "type": "string",
+ "description": "Text to write"
+ },
+ "bold": {
+ "type": "boolean",
+ "description": "Apply bold formatting"
+ },
+ "italic": {
+ "type": "boolean",
+ "description": "Apply italic formatting"
+ },
+ "size": {
+ "type": "number",
+ "description": "Font size"
+ }
+ },
+ "required": ["text"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_color",
+ "description": "Change text color using regex pattern",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "pattern": {
+ "type": "string",
+ "description": "Regex pattern to match"
+ },
+ "color": {
+ "type": "number",
+ "description": "Hex color code (e.g., 0x000000)"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["pattern", "color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.find_and_replace",
+ "description": "Find and replace text using regex",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "pattern": {
+ "type": "string",
+ "description": "Regex pattern to find"
+ },
+ "replacement": {
+ "type": "string",
+ "description": "Replacement text"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["pattern", "replacement"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_font",
+ "description": "Change font family",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "font_name": {
+ "type": "string",
+ "description": "Font name (e.g., 'Arial', 'Times New Roman')"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["font_name"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_line_spacing",
+ "description": "Set line spacing",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "spacing_value": {
+ "type": "number",
+ "description": "Spacing value (1.0=single, 2.0=double)"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["spacing_value"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.remove_highlighting",
+ "description": "Remove text highlighting",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.find_highlighted_text",
+ "description": "Find text with specific highlight color",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "highlight_color": {
+ "type": "string",
+ "description": "Color name (e.g., 'yellow') or hex code"
+ }
+ },
+ "required": ["highlight_color"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.insert_formula_at_cursor",
+ "description": "Insert formula at cursor",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "formula": {
+ "type": "string",
+ "description": "Formula to insert"
+ }
+ },
+ "required": ["formula"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.insert_image_at_cursor",
+ "description": "Insert image at cursor",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "image_path": {
+ "type": "string",
+ "description": "Full path to image file"
+ },
+ "width": {
+ "type": "integer",
+ "description": "Display width in pixels"
+ },
+ "height": {
+ "type": "integer",
+ "description": "Display height in pixels"
+ }
+ },
+ "required": ["image_path"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_strikethrough",
+ "description": "Apply strikethrough formatting",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "pattern": {
+ "type": "string",
+ "description": "Regex pattern to match"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["pattern"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_font_size",
+ "description": "Change font size",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "font_size": {
+ "type": "number",
+ "description": "Font size in points"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Regex pattern to match"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["font_size", "pattern"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.export_to_pdf",
+ "description": "Export document to PDF",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "output_path": {
+ "type": "string",
+ "description": "PDF save path"
+ },
+ "output_filename": {
+ "type": "string",
+ "description": "PDF filename"
+ },
+ "include_comments": {
+ "type": "boolean",
+ "description": "Include comments in PDF"
+ },
+ "quality": {
+ "type": "string",
+ "description": "Export quality ('standard', 'high', 'print')"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_paragraph_alignment",
+ "description": "Set paragraph alignment",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "alignment": {
+ "type": "string",
+ "description": "Alignment type ('left', 'center', 'right', 'justify')"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["alignment"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.capitalize_words",
+ "description": "Capitalize first letter of each word",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.set_default_font",
+ "description": "Set default font for new text",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "font_name": {
+ "type": "string",
+ "description": "Default font name"
+ },
+ "font_size": {
+ "type": "number",
+ "description": "Default font size in points"
+ }
+ },
+ "required": ["font_name"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.add_page_numbers",
+ "description": "Add page numbers",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "position": {
+ "type": "string",
+ "description": "Position ('bottom_left', 'bottom_center', 'bottom_right', 'top_left', 'top_center', 'top_right')"
+ },
+ "start_number": {
+ "type": "integer",
+ "description": "Starting page number"
+ },
+ "format": {
+ "type": "string",
+ "description": "Number format (e.g., '1', 'Page 1', '1 of N')"
+ }
+ },
+ "required": ["position"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.insert_page_break",
+ "description": "Insert page break",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "position": {
+ "type": "string",
+ "description": "Insert location ('at_cursor', 'end_of_document')"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "WriterTools.change_text_case",
+ "description": "Change text case",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "case_type": {
+ "type": "string",
+ "description": "Case type ('lowercase', 'uppercase')"
+ },
+ "pattern": {
+ "type": "string",
+ "description": "Regex pattern to match"
+ },
+ "paragraph_indices": {
+ "type": "array",
+ "description": "Target paragraph indices (0-based). Applies to all if omitted"
+ }
+ },
+ "required": ["case_type", "pattern"]
+ }
+ }
+ }
+]
diff --git a/mm_agents/autoglm_v/tools/apis/vlc.json b/mm_agents/autoglm_v/tools/apis/vlc.json
new file mode 100644
index 00000000..f90e41db
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/apis/vlc.json
@@ -0,0 +1,166 @@
+[
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_playlist",
+ "description": "Get current playlist with track info",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.play",
+ "description": "Start playing current media",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.pause",
+ "description": "Pause current media",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.next",
+ "description": "Switch to next track",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.previous",
+ "description": "Switch to previous track",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.add_to_playlist",
+ "description": "Add media file to playlist",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "uri": {
+ "type": "string",
+ "description": "Media file URI (file:// or https://)"
+ }
+ },
+ "required": ["uri"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_current_time",
+ "description": "Get current playback position in seconds",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_media_duration",
+ "description": "Get media duration in seconds",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.toggle_fullscreen",
+ "description": "Toggle or set fullscreen mode",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "enable": {
+ "type": "boolean",
+ "description": "Force fullscreen on/off, omit to toggle"
+ }
+ },
+ "required": []
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_settings",
+ "description": "Get VLC settings",
+ "parameters": {
+ "type": "object",
+ "properties": {}
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.set_settings",
+ "description": "Set VLC settings",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "field": {
+ "type": "string",
+ "description": "Setting name (e.g. qt-max-volume, qt-minimal-view)"
+ },
+ "value": {
+ "type": "string",
+ "description": "Setting value (use 0/1 for booleans)"
+ }
+ },
+ "required": ["field", "value"]
+ }
+ }
+ },
+ {
+ "type": "function",
+ "function": {
+ "name": "VLCTools.get_media_files",
+ "description": "Get media files from path",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "path": {
+ "type": "string",
+ "description": "Directory path"
+ },
+ "suffix": {
+ "type": "array",
+ "description": "File extensions, default: ['mp4','avi','mkv','mov','mp3','m4a','wav']"
+ }
+ },
+ "required": ["path"]
+ }
+ }
+ }
+]
\ No newline at end of file
diff --git a/mm_agents/autoglm_v/tools/package/code.py b/mm_agents/autoglm_v/tools/package/code.py
new file mode 100644
index 00000000..e580d55f
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/package/code.py
@@ -0,0 +1,260 @@
+import json
+import os
+import subprocess
+from pathlib import Path
+
+
+class CodeTools:
+ ret = ""
+
+ @classmethod
+ def print_result(cls):
+ """打印执行结果"""
+ print(cls.ret)
+
+ @classmethod
+ def launch_vscode(cls, path):
+ """
+ Launches Visual Studio Code with the specified file path or directory.
+ 在存在的窗口中打开一个文件或目录。
+
+ Args:
+ path (str): 文件路径或目录。
+ """
+ try:
+ subprocess.run(["code", "-r", path], check=True)
+ cls.ret = "Successfully launched VS Code"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error launching VS Code: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def env_info(cls):
+ cls.ret = "None"
+
+ @classmethod
+ def compare_files(cls, file1, file2):
+ """
+ Compares two files in VSCode.
+ 在VSCode中比较两个文件。
+
+ Args:
+ file1 (str): 第一个文件的路径。
+ file2 (str): 第二个文件的路径。
+ """
+ try:
+ # 获取compare结果
+ subprocess.run(["code", "-d", file1, file2], check=True)
+ cls.ret = "The compared files are opened in VSCode"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error comparing files: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def add_folder(cls, folder):
+ """
+ Adds a folder to the last active window in VSCode.
+ 向VSCode的最后一个活动窗口添加文件夹。
+
+ Args:
+ folder (str): 文件夹路径。
+ """
+ try:
+ subprocess.run(["code", "-a", folder], check=True)
+ cls.ret = "Successfully added folder"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error adding folder: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def goto_file(cls, file_path, line=1, character=1):
+ """
+ Opens a file at a specific line and character position.
+ 在特定行和字符的位置打开文件。
+
+ Args:
+ file_path (str): 文件路径。
+ line (int): 行号。
+ character (int): 字符位置。
+ """
+ try:
+ command = f"{file_path}:{line}:{character}"
+ subprocess.run(["code", "-g", command], check=True)
+ cls.ret = "Successfully opened file, line: {}, character: {}".format(line, character)
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error going to file: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def perform_merge(cls, path1, path2, base, result):
+ """
+ Perform a three-way merge.
+ 执行三方合并。
+
+ Args:
+ path1 (str): 第一版本文件路径。
+ path2 (str): 第二版本文件路径。
+ base (str): 基础版本文件路径。
+ result (str): 结果文件的保存路径。
+ """
+ try:
+ subprocess.run(["code", "-m", path1, path2, base, result], check=True)
+ cls.ret = "Successfully performed merge"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error performing merge: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def remove_folder(cls, folder):
+ """
+ Removes a folder from the last active window in VSCode.
+ 在VSCode的最后一个活动窗口中移除文件夹。
+
+ Args:
+ folder (str): 文件夹路径。
+ """
+ try:
+ subprocess.run(["code", "--remove", folder], check=True)
+ cls.ret = "Successfully removed folder"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error removing folder: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def install_extension(cls, extension_id, pre_release=False):
+ """
+ Installs an extension or updates it in VSCode.
+ 安装或更新VSCode中的扩展。
+
+ Args:
+ extension_id (str): 扩展的标识符。
+ pre_release (bool): 是否安装预发布版本。
+ """
+ try:
+ command = ["code", "--install-extension", extension_id]
+ if pre_release:
+ command.append("--pre-release")
+ subprocess.run(command, check=True)
+ cls.ret = "Successfully installed extension"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error installing extension: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def uninstall_extension(cls, extension_id):
+ """
+ Uninstalls an extension from VSCode.
+ 从VSCode中卸载扩展。
+
+ Args:
+ extension_id (str): 扩展的标识符。
+ """
+ try:
+ subprocess.run(["code", "--uninstall-extension", extension_id], check=True)
+ cls.ret = "Successfully uninstalled extension"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error uninstalling extension: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def list_extensions(cls, show_versions=False, category=None):
+ """
+ Lists installed extensions in VSCode.
+ 列出VSCode中安装的扩展。
+
+ Args:
+ show_versions (bool): 是否显示扩展的版本。
+ category (str): 按类别筛选扩展。
+ """
+ try:
+ command = ["code", "--list-extensions"]
+ if show_versions:
+ command.append("--show-versions")
+ if category:
+ command.extend(["--category", category])
+ cls.ret = subprocess.run(command, check=True, capture_output=True, text=True).stdout
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error listing extensions: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def update_extensions(cls):
+ """
+ Updates all installed extensions in VSCode to the latest version.
+ 更新VSCode中所有安装的扩展到最新版本。
+ """
+ try:
+ subprocess.run(["code", "--update-extensions"], check=True)
+ cls.ret = "Successfully updated extensions"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error updating extensions: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def disable_extension(cls, extension_id):
+ """
+ Disables a specific extension for the next instance of VSCode.
+ 禁用在下一个VSCode窗口中的指定扩展。
+
+ Args:
+ extension_id (str): 扩展的标识符。
+ """
+ try:
+ subprocess.run(["code", "--disable-extension", extension_id], check=True)
+ cls.ret = "Successfully disabled extension"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error disabling extension: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
+
+ @classmethod
+ def toggle_sync(cls, state):
+ """
+ Toggles synchronization on or off in VSCode.
+ 在VSCode中开启或关闭同步。
+
+ Args:
+ state (str): 'on' 或 'off' 表示开启或关闭。
+ """
+ try:
+ command = ["code", "--sync", state]
+ subprocess.run(command, check=True)
+ cls.ret = "Successfully toggled sync"
+ except subprocess.CalledProcessError as e:
+ cls.ret = f"Error toggling sync: {e}"
+ except Exception as e:
+ cls.ret = f"Unexpected error: {e}"
+
+ return cls.ret
diff --git a/mm_agents/autoglm_v/tools/package/google_chrome.py b/mm_agents/autoglm_v/tools/package/google_chrome.py
new file mode 100644
index 00000000..68afa4c0
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/package/google_chrome.py
@@ -0,0 +1,107 @@
+class BrowserTools:
+ ret = ""
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def env_info(cls):
+ cls.ret = "None"
+
+ # @classmethod
+ # def show_all_tabs(cls):
+ # cls.ret = "Browser not found"
+ # for attempt in range(3):
+ # with sync_playwright() as p:
+ # try:
+ # browser = p.chromium.connect_over_cdp(cls.remote_debugging_url)
+ # if not browser:
+ # continue
+ # context = browser.contexts[0]
+ # # 获取所有窗口名称
+ # cls.ret = 'Browser Tabs: '
+ # for idx, page in enumerate(context.pages):
+ # cls.ret += f"{idx}. {page.title()} ({page.url})" + '\n'
+ # return cls.ret
+ # except TimeoutError:
+ # cls.ret = 'Failed to get browser tabs'
+ # return None
+ # return None
+
+ @classmethod
+ def open_profile_settings(cls):
+ """
+ Open the profile settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/people"]}}
+
+ @classmethod
+ def open_password_settings(cls):
+ """
+ Open the password settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/autofill"]}}
+
+ @classmethod
+ def open_privacy_settings(cls):
+ """
+ Open the privacy settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/privacy"]}}
+
+ @classmethod
+ def open_appearance_settings(cls):
+ """
+ Open the appearance settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/appearance"]}}
+
+ @classmethod
+ def open_search_engine_settings(cls):
+ """
+ Open the search engine settings page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/search"]}}
+
+ @classmethod
+ def bring_back_last_tab(cls):
+ """
+ Bring back the last tab in the browser.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 't'); print('Brought back last tab')"
+
+ @classmethod
+ def print(cls):
+ """
+ Open the print option in current page.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'p'); print('Opened print option')"
+
+ @classmethod
+ def delete_browsing_data(cls):
+ """
+ Delete browsing data in the browser.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 'del'); print('Deleted browsing data')"
+
+ @classmethod
+ def open_extensions(cls):
+ """
+ open the extensions page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://extensions"]}}
+
+ @classmethod
+ def bookmark_page(cls):
+ """
+ Bookmark the current page in the browser.
+ """
+ return f"import pyautogui; pyautogui.hotkey('ctrl', 'd'); print('Bookmarked page')"
+
+ @classmethod
+ def open_bookmarks(cls):
+ """
+ Open the bookmarks page in the browser.
+ """
+ return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://bookmarks"]}}
diff --git a/mm_agents/autoglm_v/tools/package/libreoffice_calc.py b/mm_agents/autoglm_v/tools/package/libreoffice_calc.py
new file mode 100644
index 00000000..540da7f9
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/package/libreoffice_calc.py
@@ -0,0 +1,1322 @@
+import json
+import os
+import subprocess
+import sys
+
+import uno
+from com.sun.star.beans import PropertyValue
+
+
+class CalcTools:
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
+ desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
+ doc = desktop.getCurrentComponent()
+ sheet = doc.CurrentController.ActiveSheet
+ ret = ""
+
+ @classmethod
+ def close_other_window(cls):
+ """关闭除当前文档外的所有文档"""
+ # 获取所有打开的文档
+ components = cls.desktop.getComponents().createEnumeration()
+ current_url = cls.doc.getURL()
+
+ while components.hasMoreElements():
+ doc = components.nextElement()
+ if doc.getURL() != current_url: # 如果不是当前文档
+ doc.close(True) # True 表示保存更改
+
+ @classmethod
+ def maximize_window(cls):
+ """
+ 将窗口设置为工作区最大尺寸
+ 使用工作区域大小(考虑任务栏等)
+ """
+ window = cls.doc.getCurrentController().getFrame().getContainerWindow()
+ toolkit = window.getToolkit()
+ device = toolkit.createScreenCompatibleDevice(0, 0)
+
+ # 获取工作区域(排除任务栏等)
+ workarea = toolkit.getWorkArea()
+
+ # 设置窗口位置和大小为工作区域
+ window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15)
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def save(cls):
+ """
+ Save the current workbook to its current location
+
+ Returns:
+ bool: True if save successful, False otherwise
+ """
+ try:
+ # Just save the document
+ cls.doc.store()
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def _get_column_index(cls, column_name, sheet=None):
+ """
+ Get the index of a column by its name (A, B, C, ...)
+
+ Args:
+ column_name (str): Name of the column
+
+ Returns:
+ int: Index of the column
+ """
+ try:
+ return ord(column_name[0]) - ord("A")
+ except ValueError:
+ return None
+
+ @classmethod
+ def _get_last_used_column(cls):
+ """
+ Get the last used column index
+
+ Args:
+ None
+
+ Returns:
+ int: Index of the last used column
+ """
+ cursor = cls.sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ return cursor.RangeAddress.EndColumn
+
+ @classmethod
+ def _get_last_used_row(cls):
+ """
+ Get the last used row index
+
+ Args:
+ None
+
+ Returns:
+ int: Index of the last used row
+ """
+ cursor = cls.sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ return cursor.RangeAddress.EndRow
+
+ @classmethod
+ def _column_name_to_index(cls, column_name):
+ """
+ 将列名转换为列索引
+
+ Args:
+ column_name (str): 列名,如 'A', 'AB'
+
+ Returns:
+ int: 列索引(从0开始)
+ """
+ column_name = column_name.upper()
+ result = 0
+ for char in column_name:
+ result = result * 26 + (ord(char) - ord("A") + 1)
+ return result - 1
+
+ @classmethod
+ def get_workbook_info(cls):
+ """
+ Get workbook information
+
+ Args:
+ None
+
+ Returns:
+ dict: Workbook information, including file path, file name, sheets and active sheet
+ """
+ try:
+ info = {
+ "file_path": cls.doc.getLocation(),
+ "file_title": cls.doc.getTitle(),
+ "sheets": [],
+ "active_sheet": cls.sheet.Name,
+ }
+
+ # Get sheets information
+ sheets = cls.doc.getSheets()
+ info["sheet_count"] = sheets.getCount()
+
+ # Get all sheet names and info
+ for i in range(sheets.getCount()):
+ sheet = sheets.getByIndex(i)
+ cursor = sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ end_col = cursor.getRangeAddress().EndColumn
+ end_row = cursor.getRangeAddress().EndRow
+
+ sheet_info = {
+ "name": sheet.getName(),
+ "index": i,
+ "visible": sheet.IsVisible,
+ "row_count": end_row + 1,
+ "column_count": end_col + 1,
+ }
+ info["sheets"].append(sheet_info)
+
+ # Check if this is the active sheet
+ if sheet == cls.sheet:
+ info["active_sheet"] = sheet_info
+
+ cls.ret = json.dumps(info, ensure_ascii=False)
+ return info
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+
+ @classmethod
+ def env_info(cls, sheet_name=None):
+ """
+ Get content of the specified or active sheet
+
+ Args:
+ sheet_name (str, optional): Name of the sheet to read. If None, uses active sheet
+
+ Returns:
+ dict: Sheet information including name, headers and data
+ """
+ try:
+ # Get the target sheet
+ if sheet_name is not None:
+ sheet = cls.doc.getSheets().getByName(sheet_name)
+ else:
+ sheet = cls.sheet
+
+ # Create cursor to find used range
+ cursor = sheet.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ end_col = cursor.getRangeAddress().EndColumn
+ end_row = cursor.getRangeAddress().EndRow
+
+ # Generate column headers (A, B, C, ...)
+ col_headers = [chr(65 + i) for i in range(end_col + 1)]
+
+ # Get displayed values from cells
+ data_array = []
+ for row in range(end_row + 1):
+ row_data = []
+ for col in range(end_col + 1):
+ cell = sheet.getCellByPosition(col, row)
+ row_data.append(cell.getString())
+ data_array.append(row_data)
+
+ # Calculate maximum width for each column
+ col_widths = [len(header) for header in col_headers] # Initialize with header lengths
+ for row in data_array:
+ for i, cell in enumerate(row):
+ col_widths[i] = max(col_widths[i], len(str(cell)))
+
+ # Format the header row
+ header_row = " | " + " | ".join(f"{h:<{w}}" for h, w in zip(col_headers, col_widths)) + " |"
+ separator = "--|-" + "-|-".join("-" * w for w in col_widths) + "-|"
+
+ # Format data rows with row numbers
+ formatted_rows = []
+ for row_idx, row in enumerate(data_array, 1):
+ row_str = f"{row_idx:<2}| " + " | ".join(f"{cell:<{w}}" for cell, w in zip(row, col_widths)) + " |"
+ formatted_rows.append(row_str)
+
+ # Combine all parts
+ formated_data = header_row + "\n" + separator + "\n" + "\n".join(formatted_rows)
+
+ # Get sheet properties
+ sheet_info = {
+ "name": sheet.getName(),
+ "data": formated_data,
+ "row_count": end_row + 1,
+ "column_count": end_col + 1,
+ }
+
+ cls.ret = json.dumps(sheet_info, ensure_ascii=False)
+ return sheet_info
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+
+ @classmethod
+ def get_column_data(cls, column_name):
+ """
+ Get data from the specified column
+
+ Args:
+ column_name (str): Name of the column to read
+
+ Returns:
+ list: List of values in the specified column
+ """
+ column_index = cls._get_column_index(column_name)
+ if column_index is None:
+ return "Column not found"
+ last_row = cls._get_last_used_row()
+ _range = cls.sheet.getCellRangeByPosition(column_index, 0, column_index, last_row)
+ # 获取数据数组并展平
+ cls.ret = json.dumps([row[0] for row in _range.getDataArray()], ensure_ascii=False)
+ return [row[0] for row in _range.getDataArray()]
+
+ @classmethod
+ def switch_active_sheet(cls, sheet_name):
+ """
+ Switch to the specified sheet and make it active, create if not exist
+
+ Args:
+ sheet_name (str): Name of the sheet to switch to or create
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查工作表是否存在
+ if not sheets.hasByName(sheet_name):
+ # 创建新工作表
+ new_sheet = cls.doc.createInstance("com.sun.star.sheet.Spreadsheet")
+ sheets.insertByName(sheet_name, new_sheet)
+
+ # 获取目标工作表
+ sheet = sheets.getByName(sheet_name)
+
+ # 切换到目标工作表
+ cls.doc.getCurrentController().setActiveSheet(sheet)
+
+ # 更新当前工作表引用
+ cls.sheet = sheet
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_column_values(cls, column_name, data, start_index=2):
+ """
+ Set data to the specified column
+
+ Args:
+ column_name (str): Name of the column to write
+ data (list): List of values to write to the column
+ start_index (int): The index of the first row to write to, default is 2 (skip the first row)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ # 获取列的索引
+ column_index = cls._get_column_index(column_name)
+ if column_index is None:
+ cls.ret = "Column not found"
+ return False
+ for i, value in enumerate(data):
+ cell = cls.sheet.getCellByPosition(column_index, i + start_index - 1)
+ if type(value) == float and value.is_integer():
+ cell.setNumber(int(value))
+ else:
+ cell.setString(str(value))
+ cls.ret = "Success"
+ return True
+
+ @classmethod
+ def highlight_range(cls, range_str, color=0xFF0000):
+ """
+ highlight the specified range with the specified color
+
+ Args:
+ range_str (str): Range to highlight, in the format of "A1:B10"
+ color (str): Color to highlight with, default is '0xFF0000' (red)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ _range = cls.sheet.getCellRangeByName(range_str)
+ _range.CellBackColor = color
+ cls.ret = "Success"
+ return True
+ except:
+ cls.ret = "False"
+ return False
+
+ @classmethod
+ def transpose_range(cls, source_range, target_cell):
+ """
+ Transpose the specified range and paste it to the target cell
+
+ Args:
+ source_range (str): Range to transpose, in the format of "A1:B10"
+ target_cell (str): Target cell to paste the transposed data, in the format of "A1"
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ source = cls.sheet.getCellRangeByName(source_range)
+ target = cls.sheet.getCellRangeByName(target_cell)
+
+ data = source.getDataArray()
+ # 转置数据
+ transposed_data = list(map(list, zip(*data)))
+
+ # 设置转置后的数据
+ target_range = cls.sheet.getCellRangeByPosition(
+ target.CellAddress.Column,
+ target.CellAddress.Row,
+ target.CellAddress.Column + len(transposed_data[0]) - 1,
+ target.CellAddress.Row + len(transposed_data) - 1,
+ )
+ target_range.setDataArray(transposed_data)
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def export_to_csv(cls):
+ """
+ Export the current document to a CSV file
+
+ Args:
+ None
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取当前文档的URL
+ doc_url = cls.doc.getURL()
+ if not doc_url:
+ raise ValueError("Document must be saved first")
+
+ # 构造CSV文件路径
+ if doc_url.startswith("file://"):
+ base_path = doc_url[7:] # 移除 'file://' 前缀
+ else:
+ base_path = doc_url
+
+ # 获取基本路径和文件名
+ csv_path = os.path.splitext(base_path)[0] + ".csv"
+
+ # 确保路径是绝对路径
+ csv_path = os.path.abspath(csv_path)
+
+ # 转换为 LibreOffice URL 格式
+ csv_url = uno.systemPathToFileUrl(csv_path)
+
+ # 设置CSV导出选项
+ props = (
+ PropertyValue(Name="FilterName", Value="Text - txt - csv (StarCalc)"),
+ PropertyValue(
+ Name="FilterOptions", Value="44,0,76,0"
+ ), # 44=comma, 34=quote, 76=UTF-8, 1=first row as header
+ )
+
+ # 导出文件
+ cls.doc.storeToURL(csv_url, props)
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def sort_column(cls, column_name, ascending=True, start_index=2):
+ """
+ Sorts the data in the specified column in ascending or descending order
+
+ Args:
+ column_name (str): The name of the column to sort (e.g. 'A') or the title
+ ascending (bool): Whether to sort in ascending order (default True)
+ start_index (int): The index of the first row to sort, default is 1
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+
+ try:
+ column_data = cls.get_column_data(column_name)[start_index - 1 :]
+ column_data = sorted(column_data, key=lambda x: float(x), reverse=not ascending)
+ except:
+ cls.ret = "Error: Invalid column name or data type"
+ return False
+
+ return cls.set_column_values(column_name, column_data, start_index)
+
+ @classmethod
+ def set_validation_list(cls, column_name, values):
+ """
+ Set a validation list for the specified column
+
+ Args:
+ column_name (str): The name of the column to set the validation list for
+ values (list): The list of values to use for the validation list
+
+ Returns:
+ None
+ """
+ try:
+ column_index = cls._get_column_index(column_name)
+ last_row = cls._get_last_used_row()
+ cell_range = cls.sheet.getCellRangeByPosition(column_index, 1, column_index, last_row)
+
+ # 获取现有的验证对象
+ validation = cell_range.getPropertyValue("Validation")
+
+ # 设置基本验证类型
+ validation.Type = uno.Enum("com.sun.star.sheet.ValidationType", "LIST")
+ validation.Operator = uno.Enum("com.sun.star.sheet.ConditionOperator", "EQUAL")
+
+ # 设置下拉列表
+ validation.ShowList = True
+ values_str = ";".join(str(val) for val in values)
+ validation.Formula1 = values_str
+
+ # 应用验证设置回单元格范围
+ cell_range.setPropertyValue("Validation", validation)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def hide_row_data(cls, value="N/A"):
+ """
+ Hide rows that contain the specified value
+
+ Args:
+ value (str): The value to hide rows for, default is 'N/A'
+
+ Returns:
+ None
+ """
+ last_row = cls._get_last_used_row()
+ last_col = cls._get_last_used_column()
+
+ for row in range(1, last_row + 1):
+ has_value = False
+ for col in range(last_col + 1):
+ cell = cls.sheet.getCellByPosition(col, row)
+ if cell.getString() == value:
+ has_value = True
+ break
+ row_range = cls.sheet.getRows().getByIndex(row)
+ row_range.IsVisible = not has_value
+
+ cls.ret = "Success"
+ return True
+
+ @classmethod
+ def reorder_columns(cls, column_order):
+ """
+ Reorder the columns in the sheet according to the specified order
+
+ Args:
+ column_order (list): A list of column names in the desired order
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取新的列索引
+ new_indices = [cls._get_column_index(col) for col in column_order]
+
+ # 创建新的列顺序
+ for new_index, old_index in enumerate(new_indices):
+ if new_index != old_index:
+ cls.sheet.Columns.insertByIndex(new_index, 1)
+ source = cls.sheet.Columns[old_index + (old_index > new_index)]
+ target = cls.sheet.Columns[new_index]
+ target.setDataArray(source.getDataArray())
+ cls.sheet.Columns.removeByIndex(old_index + (old_index > new_index), 1)
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def create_pivot_table(
+ cls,
+ source_sheet,
+ table_name,
+ row_fields=None,
+ col_fields=None,
+ value_fields=None,
+ aggregation_function="sum",
+ target_cell="A1",
+ ):
+ """
+ Create a pivot table in the active worksheet based on data from the active sheet.
+ """
+ try:
+ source = cls.doc.getSheets().getByName(source_sheet)
+
+ # 获取数据范围
+ cursor = source.createCursor()
+ cursor.gotoEndOfUsedArea(False)
+ end_col = cursor.getRangeAddress().EndColumn
+ end_row = cursor.getRangeAddress().EndRow
+
+ # 获取完整的数据范围
+ source_range = source.getCellRangeByPosition(0, 0, end_col, end_row)
+
+ # 获取数据透视表集合
+ dp_tables = cls.sheet.getDataPilotTables()
+
+ # 创建数据透视表描述符
+ dp_descriptor = dp_tables.createDataPilotDescriptor()
+
+ # 设置数据源
+ dp_descriptor.setSourceRange(source_range.getRangeAddress())
+
+ # 设置行字段
+ if row_fields:
+ for field in row_fields:
+ field_index = cls._get_column_index(field)
+ dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index)
+ dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "ROW")
+
+ # 设置列字段
+ if col_fields:
+ for field in col_fields:
+ field_index = cls._get_column_index(field)
+ dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index)
+ dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "COLUMN")
+
+ # 设置数据字段
+ for field in value_fields:
+ field_index = cls._get_column_index(field)
+ dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index)
+ dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "DATA")
+
+ # 设置聚合函数
+ function_map = {"Count": "COUNT", "Sum": "SUM", "Average": "AVERAGE", "Min": "MIN", "Max": "MAX"}
+
+ if aggregation_function in function_map:
+ dimension.Function = uno.Enum(
+ "com.sun.star.sheet.GeneralFunction", function_map[aggregation_function]
+ )
+
+ # 在当前工作表中创建数据透视表
+ dp_tables.insertNewByName(
+ table_name, # 透视表名称
+ cls.sheet.getCellRangeByName(target_cell).CellAddress, # 目标位置
+ dp_descriptor, # 描述符
+ )
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def merge_cells(cls, range_str):
+ """
+ 合并活动工作表中指定范围的单元格
+
+ Args:
+ range_str (str): 要合并的单元格范围,格式为'A1:B10'
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取当前活动工作表
+ sheet = cls.sheet
+
+ # 获取单元格范围
+ cell_range = sheet.getCellRangeByName(range_str)
+
+ # 获取单元格范围的属性
+ range_props = cell_range.getIsMerged()
+
+ # 如果单元格范围尚未合并,则进行合并
+ if not range_props:
+ cell_range.merge(True)
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_cell_value(cls, cell, value):
+ """
+ Set a value to a specific cell in the active worksheet.
+
+ Args:
+ cell (str): Cell reference (e.g., 'A1')
+ value (str): Value to set in the cell
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取单元格对象
+ cell_obj = cls.sheet.getCellRangeByName(cell)
+
+ if isinstance(value, str) and value.startswith("="):
+ # 设置公式
+ cell_obj.Formula = value
+ cls.ret = "Success"
+ return True
+
+ # 尝试将值转换为数字
+ try:
+ # 尝试转换为整数
+ int_value = int(value)
+ cell_obj.Value = int_value
+ except ValueError:
+ try:
+ # 尝试转换为浮点数
+ float_value = float(value)
+ cell_obj.Value = float_value
+ except ValueError:
+ # 如果不是数字,则设置为字符串
+ cell_obj.String = value
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def format_range(cls, range_str, background_color=None, font_color=None, bold=None, alignment=None):
+ """
+ Apply formatting to the specified range in the active worksheet
+
+ Args:
+ range_str (str): Range to format, in the format of 'A1:B10'
+ background_color (str, optional): Background color in hex format (e.g., '#0000ff')
+ font_color (str, optional): Font color in hex format (e.g., '#ffffff')
+ bold (bool, optional): Whether to make the text bold
+ italic (bool, optional): Whether to make the text italic
+ alignment (str, optional): Text alignment (left, center, right)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取指定范围
+ cell_range = cls.sheet.getCellRangeByName(range_str)
+
+ # 设置背景颜色
+ if background_color:
+ # 将十六进制颜色转换为整数
+ bg_color_int = int(background_color.replace("#", ""), 16)
+ cell_range.CellBackColor = bg_color_int
+
+ # 设置字体颜色
+ if font_color:
+ # 将十六进制颜色转换为整数
+ font_color_int = int(font_color.replace("#", ""), 16)
+ cell_range.CharColor = font_color_int
+
+ # 设置粗体
+ if bold is not None:
+ cell_range.CharWeight = 150.0 if bold else 100.0 # 150.0 是粗体,100.0 是正常
+
+ # 设置对齐方式
+ if alignment:
+ # 设置水平对齐方式
+ struct = cell_range.getPropertyValue("HoriJustify")
+ if alignment == "left":
+ struct.value = "LEFT"
+ elif alignment == "center":
+ struct.value = "CENTER"
+ elif alignment == "right":
+ struct.value = "RIGHT"
+ cell_range.setPropertyValue("HoriJustify", struct)
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def create_chart(cls, chart_type, data_range, title=None, x_axis_title=None, y_axis_title=None):
+ """
+ Create a chart in the active worksheet based on the specified data range.
+
+ Args:
+ chart_type (str): Type of chart to create (bar, column, line, pie, scatter, area)
+ data_range (str): Range containing the data for the chart, in the format of 'A1:B10'
+ title (str, optional): Title for the chart
+ x_axis_title (str, optional): Title for the X axis
+ y_axis_title (str, optional): Title for the Y axis
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ # 将图表类型映射到LibreOffice的图表类型常量
+ try:
+ chart_type_map = {
+ "bar": "com.sun.star.chart.BarDiagram",
+ "column": "com.sun.star.chart.ColumnDiagram",
+ "line": "com.sun.star.chart.LineDiagram",
+ "pie": "com.sun.star.chart.PieDiagram",
+ "scatter": "com.sun.star.chart.ScatterDiagram",
+ "area": "com.sun.star.chart.AreaDiagram",
+ }
+
+ # 获取数据范围
+ cell_range_address = cls.sheet.getCellRangeByName(data_range).getRangeAddress()
+
+ # 创建图表
+ charts = cls.sheet.getCharts()
+ rect = uno.createUnoStruct("com.sun.star.awt.Rectangle")
+ rect.Width = 10000 # 默认宽度
+ rect.Height = 7000 # 默认高度
+
+ # 添加图表到工作表
+ charts.addNewByName("MyChart", rect, (cell_range_address,), False, False)
+
+ # 获取图表
+ chart = charts.getByName("MyChart")
+ chart_doc = chart.getEmbeddedObject()
+
+ # 设置图表类型
+ diagram = chart_doc.createInstance(chart_type_map[chart_type])
+ chart_doc.setDiagram(diagram)
+
+ # 设置图表标题
+ if title:
+ chart_doc.Title.String = title
+
+ # 设置X轴标题
+ if x_axis_title:
+ chart_doc.Diagram.XAxis.AxisTitle.String = x_axis_title
+
+ # 设置Y轴标题
+ if y_axis_title:
+ chart_doc.Diagram.YAxis.AxisTitle.String = y_axis_title
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def freeze_panes(cls, rows=0, columns=0):
+ """
+ 冻结活动工作表中的行和/或列
+
+ Args:
+ rows (int): 从顶部开始冻结的行数
+ columns (int): 从左侧开始冻结的列数
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取当前视图
+ view = cls.doc.getCurrentController()
+
+ # 设置冻结窗格
+ view.freezeAtPosition(columns, rows)
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def rename_sheet(cls, old_name, new_name):
+ """
+ 重命名工作表
+
+ Args:
+ old_name (str): 要重命名的工作表的当前名称
+ new_name (str): 工作表的新名称
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查原工作表是否存在
+ if not sheets.hasByName(old_name):
+ return False
+
+ # 检查新名称是否已存在
+ if sheets.hasByName(new_name):
+ return False
+
+ # 获取要重命名的工作表
+ sheet = sheets.getByName(old_name)
+
+ # 重命名工作表
+ sheet.setName(new_name)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def copy_sheet(cls, source_sheet, new_sheet_name=None):
+ """
+ 创建工作簿中现有工作表的副本
+
+ Args:
+ source_sheet (str): 要复制的工作表名称
+ new_sheet_name (str, optional): 新工作表副本的名称,如果不提供则自动生成
+
+ Returns:
+ str: 新创建的工作表名称,如果失败则返回None
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查源工作表是否存在
+ if not sheets.hasByName(source_sheet):
+ return None
+
+ # 如果没有提供新名称,则生成一个
+ if not new_sheet_name:
+ # 生成类似 "Sheet1 (2)" 的名称
+ base_name = source_sheet
+ counter = 1
+ new_sheet_name = f"{base_name} ({counter})"
+
+ # 确保名称不重复
+ while sheets.hasByName(new_sheet_name):
+ counter += 1
+ new_sheet_name = f"{base_name} ({counter})"
+
+ # 检查新名称是否已存在
+ if sheets.hasByName(new_sheet_name):
+ return None # 名称已存在,无法创建
+
+ # 获取源工作表的索引
+ source_index = -1
+ for i in range(sheets.getCount()):
+ if sheets.getByIndex(i).getName() == source_sheet:
+ source_index = i
+ break
+
+ if source_index == -1:
+ return None
+
+ # 复制工作表
+ sheets.copyByName(source_sheet, new_sheet_name, source_index + 1)
+
+ cls.ret = f"New sheet created: {new_sheet_name}"
+ return new_sheet_name
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return None
+
+ @classmethod
+ def reorder_sheets(cls, sheet_name, position):
+ """
+ 重新排序工作表在工作簿中的位置
+
+ Args:
+ sheet_name (str): 要移动的工作表名称
+ position (int): 要移动到的位置(基于0的索引)
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取所有工作表
+ sheets = cls.doc.getSheets()
+
+ # 检查工作表是否存在
+ if not sheets.hasByName(sheet_name):
+ return False
+
+ # 获取工作表总数
+ sheet_count = sheets.getCount()
+
+ # 检查位置是否有效
+ if position < 0 or position >= sheet_count:
+ return False
+
+ # 获取要移动的工作表
+ sheet = sheets.getByName(sheet_name)
+
+ # 获取工作表当前索引
+ current_index = -1
+ for i in range(sheet_count):
+ if sheets.getByIndex(i).Name == sheet_name:
+ current_index = i
+ break
+
+ if current_index == -1:
+ return False
+
+ # 移动工作表到指定位置
+ sheets.moveByName(sheet_name, position)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_chart_legend_position(cls, position):
+ """
+ Set the position of the legend in a chart in the active worksheet.
+
+ Args:
+ position (str): Position of the legend ('top', 'bottom', 'left', 'right', 'none')
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取当前工作表中的所有图表
+ charts = cls.sheet.getCharts()
+ if charts.getCount() == 0:
+ return False
+
+ # 获取第一个图表(假设我们要修改的是第一个图表)
+ chart = charts.getByIndex(0)
+ chart_obj = chart.getEmbeddedObject()
+
+ # 获取图表的图例
+ diagram = chart_obj.getDiagram()
+ legend = chart_obj.getLegend()
+
+ # 根据指定的位置设置图例位置
+ if position == "none":
+ # 如果选择"none",则隐藏图例
+ chart_obj.HasLegend = False
+ else:
+ # 确保图例可见
+ chart_obj.HasLegend = True
+
+ import inspect
+
+ print(inspect.getmembers(legend))
+
+ # 设置图例位置
+ if position == "top":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "TOP")
+ elif position == "bottom":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "BOTTOM")
+ elif position == "left":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "LEFT")
+ elif position == "right":
+ pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "RIGHT")
+
+ legend.Alignment = pos
+
+ cls.ret = "Success"
+ return True
+ except Exception:
+ cls.ret = "Error"
+ return False
+
+ @classmethod
+ def set_number_format(cls, range_str, format_type, decimal_places=None):
+ """
+ Apply a specific number format to a range of cells in the active worksheet.
+
+ Args:
+ range_str (str): Range to format, in the format of 'A1:B10'
+ format_type (str): Type of number format to apply
+ decimal_places (int, optional): Number of decimal places to display
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ # 获取单元格范围
+ cell_range = cls.sheet.getCellRangeByName(range_str)
+
+ # 获取数字格式化服务
+ number_formats = cls.doc.NumberFormats
+ locale = cls.doc.CharLocale
+
+ # 根据格式类型设置格式字符串
+ format_string = ""
+
+ if format_type == "general":
+ format_string = "General"
+ elif format_type == "number":
+ if decimal_places is not None:
+ format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}"
+ else:
+ format_string = "0"
+ elif format_type == "currency":
+ if decimal_places is not None:
+ format_string = f"[$¥-804]#,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}"
+ else:
+ format_string = "[$¥-804]#,##0.00"
+ elif format_type == "accounting":
+ if decimal_places is not None:
+ format_string = f"_-[$¥-804]* #,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}_-;-[$¥-804]* #,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}_-;_-[$¥-804]* \"-\"_-;_-@_-"
+ else:
+ format_string = '_-[$¥-804]* #,##0.00_-;-[$¥-804]* #,##0.00_-;_-[$¥-804]* "-"??_-;_-@_-'
+ elif format_type == "date":
+ format_string = "YYYY/MM/DD"
+ elif format_type == "time":
+ format_string = "HH:MM:SS"
+ elif format_type == "percentage":
+ if decimal_places is not None:
+ format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}%"
+ else:
+ format_string = "0.00%"
+ elif format_type == "fraction":
+ format_string = "# ?/?"
+ elif format_type == "scientific":
+ if decimal_places is not None:
+ format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}E+00"
+ else:
+ format_string = "0.00E+00"
+ elif format_type == "text":
+ format_string = "@"
+
+ # 获取格式键
+ format_key = number_formats.queryKey(format_string, locale, True)
+
+ # 如果格式不存在,则添加
+ if format_key == -1:
+ format_key = number_formats.addNew(format_string, locale)
+
+ # 应用格式
+ cell_range.NumberFormat = format_key
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def adjust_column_width(cls, columns, width=None, autofit=False):
+ """
+ 调整活动工作表中指定列的宽度
+
+ Args:
+ columns (str): 要调整的列范围,例如 'A:C' 表示从A列到C列
+ width (float, optional): 要设置的宽度(以字符为单位)
+ autofit (bool, optional): 是否自动调整列宽以适应内容
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 解析列范围
+ col_range = columns.split(":")
+ start_col = cls._column_name_to_index(col_range[0])
+
+ if len(col_range) > 1:
+ end_col = cls._column_name_to_index(col_range[1])
+ else:
+ end_col = start_col
+
+ # 获取列对象
+ columns_obj = cls.sheet.getColumns()
+
+ # 遍历指定的列范围
+ for col_idx in range(start_col, end_col + 1):
+ column = columns_obj.getByIndex(col_idx)
+
+ if autofit:
+ # 自动调整列宽
+ column.OptimalWidth = True
+ elif width is not None:
+ # 设置指定宽度(转换为1/100毫米)
+ # 大约一个字符宽度为256 (1/100 mm)
+ column.Width = int(width * 256)
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def adjust_row_height(cls, rows, height=None, autofit=False):
+ """
+ 调整活动工作表中指定行的高度
+
+ Args:
+ rows (str): 要调整的行范围,例如 '1:10' 表示第1行到第10行
+ height (float, optional): 要设置的高度(以点为单位)
+ autofit (bool, optional): 是否自动调整行高以适应内容
+
+ Returns:
+ bool: 操作成功返回True,否则返回False
+ """
+ try:
+ # 解析行范围
+ row_range = rows.split(":")
+ start_row = int(row_range[0])
+ end_row = int(row_range[1]) if len(row_range) > 1 else start_row
+
+ # 获取行对象
+ for row_index in range(start_row, end_row + 1):
+ row = cls.sheet.getRows().getByIndex(row_index - 1) # 索引从0开始
+
+ if autofit:
+ # 自动调整行高以适应内容
+ row.OptimalHeight = True
+ elif height is not None:
+ # 设置指定高度(将点转换为1/100毫米,LibreOffice使用的单位)
+ # 1点 ≈ 35.28 1/100毫米
+ row.Height = int(height * 35.28)
+ row.OptimalHeight = False
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def export_to_pdf(cls, file_path=None, sheets=None, open_after_export=False):
+ """
+ 将当前文档或指定工作表导出为PDF文件
+
+ Args:
+ file_path (str, optional): PDF文件保存路径,如果不指定则使用当前文档路径
+ sheets (list, optional): 要包含在PDF中的工作表名称列表,如果不指定则包含所有工作表
+ open_after_export (bool, optional): 导出后是否打开PDF文件
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 如果未指定文件路径,则使用当前文档路径并更改扩展名为.pdf
+ if not file_path:
+ if cls.doc.hasLocation():
+ url = cls.doc.getLocation()
+ file_path = uno.fileUrlToSystemPath(url)
+ file_path = os.path.splitext(file_path)[0] + ".pdf"
+ else:
+ # 如果文档尚未保存,则在用户桌面创建临时文件
+ desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
+ file_path = os.path.join(desktop_path, "LibreOffice_Export.pdf")
+
+ # 确保文件路径是系统路径,然后转换为URL
+ pdf_url = uno.systemPathToFileUrl(os.path.abspath(file_path))
+
+ # 创建导出属性
+ export_props = []
+
+ # 设置过滤器名称
+ export_props.append(PropertyValue(Name="FilterName", Value="calc_pdf_Export"))
+
+ # 如果指定了特定工作表,则只导出这些工作表
+ if sheets and isinstance(sheets, list) and len(sheets) > 0:
+ # 获取所有工作表
+ all_sheets = cls.doc.getSheets()
+ selection = []
+
+ # 查找指定的工作表
+ for sheet_name in sheets:
+ if all_sheets.hasByName(sheet_name):
+ sheet = all_sheets.getByName(sheet_name)
+ selection.append(sheet)
+
+ # 如果找到了指定的工作表,则设置导出选择
+ if selection:
+ export_props.append(PropertyValue(Name="Selection", Value=tuple(selection)))
+
+ # 导出PDF
+ cls.doc.storeToURL(pdf_url, tuple(export_props))
+
+ # 如果需要,导出后打开PDF
+ if open_after_export:
+ if sys.platform.startswith("darwin"): # macOS
+ subprocess.call(("open", file_path))
+ elif os.name == "nt": # Windows
+ os.startfile(file_path)
+ elif os.name == "posix": # Linux
+ subprocess.call(("xdg-open", file_path))
+
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def set_zoom_level(cls, zoom_percentage):
+ """
+ 调整当前工作表的缩放级别,使单元格看起来更大或更小
+
+ Args:
+ zoom_percentage (int): 缩放级别的百分比(例如,75表示75%,100表示正常大小,150表示放大)。
+ 有效范围通常为10-400。
+
+ Returns:
+ bool: 成功返回True,失败返回False
+ """
+ try:
+ # 获取当前控制器
+ controller = cls.doc.getCurrentController()
+
+ # 设置缩放值
+ # 确保缩放值在合理范围内
+ if zoom_percentage < 10:
+ zoom_percentage = 10
+ elif zoom_percentage > 400:
+ zoom_percentage = 400
+
+ # 应用缩放值
+ controller.ZoomValue = zoom_percentage
+ cls.ret = "Success"
+ return True
+
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+
+if __name__ == "__main__":
+ print(CalcTools._get_column_index("A"))
+ print(CalcTools.get_workbook_info())
+ print(CalcTools.get_content())
+ CalcTools.switch_active_sheet("Sheet2")
+ # helper.set_column_values('A', [1, 2, 3, 4, 5])
+ # helper.highlight_range('A1:A3', 'Red')
+ # helper.transpose_range('A1:D5', 'B8')
+ print(CalcTools.get_column_data("A"))
+ CalcTools.sort_column("A", True)
+ CalcTools.hide_row_data("N/A")
+ CalcTools.reorder_columns(["B", "A", "C"])
+ CalcTools.freeze_panes(1, 1)
+ # helper.set_validation_list('C', ['Pass', 'Fail', 'Held'])
+ CalcTools.export_to_csv()
diff --git a/mm_agents/autoglm_v/tools/package/libreoffice_impress.py b/mm_agents/autoglm_v/tools/package/libreoffice_impress.py
new file mode 100644
index 00000000..0b8ba172
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/package/libreoffice_impress.py
@@ -0,0 +1,1424 @@
+import json
+import os
+
+import uno
+from com.sun.star.awt.FontSlant import ITALIC, NONE
+from com.sun.star.awt.FontWeight import BOLD, NORMAL
+from com.sun.star.beans import PropertyValue
+from com.sun.star.drawing.TextHorizontalAdjust import CENTER, LEFT, RIGHT
+
+
+class ImpressTools:
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
+ desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
+ doc = desktop.getCurrentComponent()
+ ret = ""
+
+ @classmethod
+ def close_other_window(cls):
+ """关闭除当前文档外的所有文档"""
+ components = cls.desktop.getComponents().createEnumeration()
+ current_url = cls.doc.getURL()
+ while components.hasMoreElements():
+ doc = components.nextElement()
+ if doc.getURL() != current_url:
+ doc.close(True)
+
+ @classmethod
+ def save(cls):
+ """保存文档到当前位置"""
+ try:
+ if cls.doc.hasLocation():
+ cls.doc.store()
+ cls.ret = "Success"
+ else:
+ cls.ret = "Error: Document has no save location"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {e}"
+ return False
+
+ @classmethod
+ def maximize_window(cls):
+ """
+ 将窗口设置为工作区最大尺寸
+ 使用工作区域大小(考虑任务栏等)
+ """
+ window = cls.doc.getCurrentController().getFrame().getContainerWindow()
+ toolkit = window.getToolkit()
+ device = toolkit.createScreenCompatibleDevice(0, 0)
+ workarea = toolkit.getWorkArea()
+ window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15)
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def env_info(cls, page_indices=None):
+ """
+ 获取指定页面的内容
+ :param page_indices: 页码列表,如果为None则获取所有页面
+ :return: 包含各页面内容的列表
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ content_str = ""
+ if page_indices is None:
+ page_indices = range(pages.getCount())
+ for page_index in page_indices:
+ if 0 <= page_index < pages.getCount():
+ page = pages.getByIndex(page_index)
+ page_content = []
+ for i in range(page.getCount()):
+ shape = page.getByIndex(i)
+ if hasattr(shape, "getText"):
+ text = shape.getText()
+ if text:
+ page_content.append("- Box " + str(i) + ": " + text.getString().strip())
+
+ c = "\n".join(page_content)
+ content_str += f"Slide {page_index+1}:\n{c}\n\n"
+
+ cur_idx = cls.get_current_slide_index() + 1
+ content_str = content_str + f"Current Slide Index: {cur_idx}"
+ cls.ret = content_str
+ return content_str
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return []
+
+ @classmethod
+ def get_current_slide_index(cls):
+ """
+ Gets the index of the currently active slide in the presentation.
+ :return: The index of the currently active slide (0-based)
+ """
+ try:
+ controller = cls.doc.getCurrentController()
+ current_page = controller.getCurrentPage()
+ pages = cls.doc.getDrawPages()
+ for i in range(pages.getCount()):
+ if pages.getByIndex(i) == current_page:
+ cls.ret = i
+ return i
+ cls.ret = "Current slide not found"
+ return -1
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return -1
+
+ @classmethod
+ def go_to_slide(cls, slide_index):
+ """
+ Navigates to a specific slide in the presentation based on its index.
+
+ Args:
+ slide_index (int): The index of the slide to navigate to (1-based indexing)
+
+ Returns:
+ bool: True if navigation was successful, False otherwise
+ """
+ try:
+ zero_based_index = slide_index - 1
+ controller = cls.doc.getCurrentController()
+ if not controller:
+ cls.ret = "Error: Could not get document controller"
+ return False
+ pages = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= pages.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1-{pages.getCount()}"
+ return False
+ target_slide = pages.getByIndex(zero_based_index)
+ controller.setCurrentPage(target_slide)
+ cls.ret = f"Successfully navigated to slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error navigating to slide: {str(e)}"
+ return False
+
+ @classmethod
+ def get_slide_count(cls):
+ """
+ Gets the total number of slides in the current presentation.
+ :return: The total number of slides as an integer
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ count = pages.getCount()
+ cls.ret = count
+ return count
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return 0
+
+ @classmethod
+ def duplicate_slide(cls, slide_index):
+ """
+ Creates a duplicate of a specific slide and places it at the end of the presentation.
+
+ :param slide_index: The index of the slide to duplicate (1-based indexing)
+ :return: True if successful, False otherwise
+ """
+ try:
+ zero_based_index = slide_index - 1
+ draw_pages = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= draw_pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {draw_pages.getCount()}"
+ return False
+ controller = cls.doc.getCurrentController()
+ controller.setCurrentPage(draw_pages.getByIndex(zero_based_index))
+ dispatcher = cls.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", cls.ctx)
+ frame = controller.getFrame()
+ dispatcher.executeDispatch(frame, ".uno:DuplicatePage", "", 0, ())
+ duplicated_slide_index = zero_based_index + 1
+ slide_count = draw_pages.getCount()
+ if duplicated_slide_index < slide_count - 1:
+ controller.setCurrentPage(draw_pages.getByIndex(duplicated_slide_index))
+ moves_needed = slide_count - duplicated_slide_index - 1
+ for _ in range(moves_needed):
+ dispatcher.executeDispatch(frame, ".uno:MovePageDown", "", 0, ())
+ cls.ret = f"Slide {slide_index} duplicated successfully and moved to the end"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_font(cls, slide_index, font_name):
+ """
+ Sets the font style for all text elements in a specific slide, including the title.
+
+ Args:
+ slide_index (int): The index of the slide to modify (1-based indexing)
+ font_name (str): The name of the font to apply (e.g., 'Arial', 'Times New Roman', 'Calibri')
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ zero_based_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1 to {slides.getCount()}."
+ return False
+ slide = slides.getByIndex(zero_based_index)
+ for i in range(slide.getCount()):
+ shape = slide.getByIndex(i)
+ if hasattr(shape, "getText"):
+ text = shape.getText()
+ if text:
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setPropertyValue("CharFontName", font_name)
+ cls.ret = f"Successfully set font to '{font_name}' for all text elements in slide {slide_index}."
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting font: {str(e)}"
+ return False
+
+ @classmethod
+ def write_text(cls, content, page_index, box_index, bold=False, italic=False, size=None, append=False):
+ """
+ Writes text to a specific textbox on a slide
+
+ :param content: The text content to add
+ :param page_index: The index of the slide (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :param bold: Whether to make the text bold, default is False
+ :param italic: Whether to make the text italic, default is False
+ :param size: The size of the text. If None, uses the box's current font size.
+ :param append: Whether to append the text, default is False. If you want to observe some formats(like a bullet at the beginning) or keep the original text, you should set up it.
+ :return: True if successful, False otherwise
+ """
+ try:
+ zero_based_page_index = page_index - 1
+ pages = cls.doc.getDrawPages()
+ if zero_based_page_index < 0 or zero_based_page_index >= pages.getCount():
+ cls.ret = f"Error: Page index {page_index} is out of range"
+ return False
+ page = pages.getByIndex(zero_based_page_index)
+ if box_index < 0 or box_index >= page.getCount():
+ cls.ret = f"Error: Box index {box_index} is out of range"
+ return False
+ shape = page.getByIndex(box_index)
+ if not hasattr(shape, "String"):
+ cls.ret = f"Error: The shape at index {box_index} cannot contain text"
+ return False
+ if append:
+ shape.String = shape.String + content
+ else:
+ shape.String = content
+ if hasattr(shape, "getCharacterProperties"):
+ char_props = shape.getCharacterProperties()
+ if bold:
+ char_props.CharWeight = BOLD
+ else:
+ char_props.CharWeight = NORMAL
+ if italic:
+ char_props.CharPosture = ITALIC
+ else:
+ char_props.CharPosture = NONE
+ if size is not None:
+ char_props.CharHeight = size
+
+ cls.ret = f"Text successfully written to page {page_index}, box {box_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_style(cls, slide_index, box_index, bold=None, italic=None, underline=None):
+ """
+ Sets the style properties for the specified textbox on a slide.
+
+ :param slide_index: The index of the slide to modify (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :param bold: Whether to make the text bold
+ :param italic: Whether to make the text italic
+ :param underline: Whether to underline the text
+ :return: True if successful, False otherwise
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ if slide_index < 1 or slide_index > pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {pages.getCount()}"
+ return False
+ page = pages.getByIndex(slide_index - 1)
+ if box_index < 0 or box_index >= page.getCount():
+ cls.ret = f"Error: Invalid box index {box_index}. Valid range is 0 to {page.getCount() - 1}"
+ return False
+ shape = page.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = "Error: The specified shape does not contain text"
+ return False
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ if bold is not None:
+ cursor.setPropertyValue("CharWeight", BOLD if bold else NORMAL)
+ if italic is not None:
+ cursor.setPropertyValue("CharPosture", ITALIC if italic else NONE)
+ if underline is not None:
+ cursor.setPropertyValue("CharUnderline", 1 if underline else 0)
+ cls.ret = "Style applied successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def configure_auto_save(cls, enabled, interval_minutes):
+ """
+ Enables or disables auto-save functionality for the current document and sets the auto-save interval.
+
+ :param enabled: Whether to enable (True) or disable (False) auto-save
+ :param interval_minutes: The interval in minutes between auto-saves (minimum 1 minute)
+ :return: True if successful, False otherwise
+ """
+ try:
+ if interval_minutes < 1:
+ interval_minutes = 1
+ config_provider = cls.ctx.ServiceManager.createInstanceWithContext(
+ "com.sun.star.configuration.ConfigurationProvider", cls.ctx
+ )
+ prop = PropertyValue()
+ prop.Name = "nodepath"
+ prop.Value = "/org.openoffice.Office.Common/Save/Document"
+ config_access = config_provider.createInstanceWithArguments(
+ "com.sun.star.configuration.ConfigurationUpdateAccess", (prop,)
+ )
+ config_access.setPropertyValue("AutoSave", enabled)
+ config_access.setPropertyValue("AutoSaveTimeIntervall", interval_minutes)
+ config_access.commitChanges()
+ cls.ret = f"Auto-save {'enabled' if enabled else 'disabled'} with interval of {interval_minutes} minutes"
+ return True
+ except Exception as e:
+ cls.ret = f"Error configuring auto-save: {str(e)}"
+ return False
+
+ @classmethod
+ def set_background_color(cls, slide_index, box_index, color):
+ """
+ Sets the background color for the specified textbox on a slide.
+
+ Args:
+ slide_index (int): The index of the slide containing the textbox (1-based indexing)
+ box_index (int): The index of the textbox to modify (0-based indexing)
+ color (str): The color to apply to the textbox (e.g., 'red', 'green', 'blue', 'yellow', or hex color code)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ zero_based_slide_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range"
+ return False
+ slide = slides.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Box index {box_index} is out of range"
+ return False
+ shape = slide.getByIndex(box_index)
+ color_int = 0
+ color_map = {
+ "red": 16711680,
+ "green": 65280,
+ "blue": 255,
+ "yellow": 16776960,
+ "black": 0,
+ "white": 16777215,
+ "purple": 8388736,
+ "orange": 16753920,
+ "pink": 16761035,
+ "gray": 8421504,
+ "brown": 10824234,
+ "cyan": 65535,
+ "magenta": 16711935,
+ }
+ if color.lower() in color_map:
+ color_int = color_map[color.lower()]
+ elif color.startswith("#") and len(color) == 7:
+ color_int = int(color[1:], 16)
+ else:
+ cls.ret = f"Error: Invalid color format: {color}"
+ return False
+ shape.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "SOLID")
+ shape.FillColor = color_int
+ cls.ret = f"Background color of textbox {box_index} on slide {slide_index} set to {color}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_text_color(cls, slide_index, box_index, color):
+ """
+ Sets the text color for the specified textbox on a slide.
+
+ Args:
+ slide_index (int): The index of the slide to modify (1-based indexing)
+ box_index (int): The index of the textbox to modify (0-based indexing)
+ color (str): The color to apply to the text (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ zero_based_slide_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range"
+ return False
+ slide = slides.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Box index {box_index} is out of range"
+ return False
+ shape = slide.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = f"Error: Shape at index {box_index} does not contain text"
+ return False
+ color_int = 0
+ if color.startswith("#"):
+ color_int = int(color[1:], 16)
+ else:
+ color_map = {
+ "red": 16711680,
+ "green": 43315,
+ "blue": 255,
+ "black": 0,
+ "white": 16777215,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ }
+ if color.lower() in color_map:
+ color_int = color_map[color.lower()]
+ else:
+ cls.ret = f"Error: Unsupported color '{color}'"
+ return False
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setPropertyValue("CharColor", color_int)
+ cls.ret = f"Successfully set text color to {color} for textbox {box_index} on slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def delete_content(cls, slide_index, box_index):
+ """
+ Deletes the specified textbox from a slide.
+
+ :param slide_index: The index of the slide to modify (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :return: True if successful, False otherwise
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ zero_based_slide_index = slide_index - 1
+ if zero_based_slide_index < 0 or zero_based_slide_index >= pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {pages.getCount()}"
+ return False
+ slide = pages.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Invalid box index {box_index}. Valid range is 0 to {slide.getCount() - 1}"
+ return False
+ shape = slide.getByIndex(box_index)
+ slide.remove(shape)
+ cls.ret = f"Successfully deleted textbox {box_index} from slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_orientation(cls, orientation):
+ """
+ Changes the orientation of slides in the presentation between portrait (upright) and landscape (sideways).
+
+ :param orientation: The desired orientation for the slides ('portrait' or 'landscape')
+ :return: True if successful, False otherwise
+ """
+ try:
+ draw_pages = cls.doc.getDrawPages()
+ first_page = draw_pages.getByIndex(0)
+ current_width = first_page.Width
+ current_height = first_page.Height
+ if orientation == "portrait" and current_width > current_height:
+ new_width, new_height = current_height, current_width
+ elif orientation == "landscape" and current_width < current_height:
+ new_width, new_height = current_height, current_width
+ else:
+ cls.ret = f"Slides are already in {orientation} orientation"
+ return True
+ for i in range(draw_pages.getCount()):
+ page = draw_pages.getByIndex(i)
+ page.Width = new_width
+ page.Height = new_height
+ cls.ret = f"Changed slide orientation to {orientation}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error changing slide orientation: {str(e)}"
+ return False
+
+ @classmethod
+ def position_box(cls, slide_index, box_index, position):
+ """
+ Positions a textbox or image on a slide at a specific location or predefined position.
+
+ :param slide_index: The index of the slide containing the box (1-based indexing)
+ :param box_index: The index of the box to position (0-based indexing)
+ :param position: Predefined position on the slide (left, right, center, top, bottom, etc.)
+ :return: True if successful, False otherwise
+ """
+ try:
+ pages = cls.doc.getDrawPages()
+ if slide_index < 1 or slide_index > pages.getCount():
+ cls.ret = f"Error: Invalid slide index {slide_index}"
+ return False
+ page = pages.getByIndex(slide_index - 1)
+ if box_index < 0 or box_index >= page.getCount():
+ cls.ret = f"Error: Invalid box index {box_index}"
+ return False
+ shape = page.getByIndex(box_index)
+ controller = cls.doc.getCurrentController()
+ slide_width = 28000
+ slide_height = 21000
+ shape_width = shape.Size.Width
+ shape_height = shape.Size.Height
+ margin = 500
+ if position == "left":
+ new_x = margin
+ new_y = (slide_height - shape_height) / 2
+ elif position == "right":
+ new_x = slide_width - shape_width - margin
+ new_y = (slide_height - shape_height) / 2
+ elif position == "center":
+ new_x = (slide_width - shape_width) / 2
+ new_y = (slide_height - shape_height) / 2
+ elif position == "top":
+ new_x = (slide_width - shape_width) / 2
+ new_y = margin
+ elif position == "bottom":
+ new_x = (slide_width - shape_width) / 2
+ new_y = slide_height - shape_height - margin
+ elif position == "top-left":
+ new_x = margin
+ new_y = margin
+ elif position == "top-right":
+ new_x = slide_width - shape_width - margin
+ new_y = margin
+ elif position == "bottom-left":
+ new_x = margin
+ new_y = slide_height - shape_height - margin
+ elif position == "bottom-right":
+ new_x = slide_width - shape_width - margin
+ new_y = slide_height - shape_height - margin
+ else:
+ cls.ret = f"Error: Invalid position '{position}'"
+ return False
+ try:
+ shape.Position.X = int(new_x)
+ shape.Position.Y = int(new_y)
+ except:
+ try:
+ shape.setPropertyValue("PositionX", int(new_x))
+ shape.setPropertyValue("PositionY", int(new_y))
+ except:
+ point = uno.createUnoStruct("com.sun.star.awt.Point", int(new_x), int(new_y))
+ shape.setPosition(point)
+ cls.ret = f"Box positioned at {position} (X: {new_x}, Y: {new_y})"
+ return True
+ except Exception as e:
+ cls.ret = f"Error positioning box: {str(e)}"
+ return False
+
+ @classmethod
+ def insert_file(cls, file_path, slide_index=None, position=None, size=None, autoplay=False):
+ """
+ Inserts a video file into the current or specified slide in the presentation.
+
+ Args:
+ file_path (str): The full path to the video file to be inserted
+ slide_index (int, optional): The index of the slide to insert the video into (1-based indexing).
+ If not provided, inserts into the current slide.
+ position (dict, optional): The position coordinates for the video as percentages of slide dimensions
+ {'x': float, 'y': float}
+ size (dict, optional): The size dimensions for the video as percentages of slide dimensions
+ {'width': float, 'height': float}
+ autoplay (bool, optional): Whether the video should automatically play when the slide is shown
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ expanded_file_path = os.path.expanduser(file_path)
+ if not os.path.exists(expanded_file_path):
+ cls.ret = f"Error: File not found: {expanded_file_path}"
+ return False
+ file_url = uno.systemPathToFileUrl(os.path.abspath(expanded_file_path))
+ pages = cls.doc.getDrawPages()
+ if slide_index is not None:
+ zero_based_index = slide_index - 1
+ if zero_based_index < 0 or zero_based_index >= pages.getCount():
+ cls.ret = f"Error: Invalid slide index: {slide_index}"
+ return False
+ slide = pages.getByIndex(zero_based_index)
+ else:
+ controller = cls.doc.getCurrentController()
+ slide = controller.getCurrentPage()
+ slide_width = 21000
+ slide_height = 12750
+ if position is None:
+ position = {"x": 10, "y": 10}
+ if size is None:
+ size = {"width": 80, "height": 60}
+ x = int(position["x"] * slide_width / 100)
+ y = int(position["y"] * slide_height / 100)
+ width = int(size["width"] * slide_width / 100)
+ height = int(size["height"] * slide_height / 100)
+ media_shape = cls.doc.createInstance("com.sun.star.presentation.MediaShape")
+ slide.add(media_shape)
+ media_shape.setPosition(uno.createUnoStruct("com.sun.star.awt.Point", x, y))
+ media_shape.setSize(uno.createUnoStruct("com.sun.star.awt.Size", width, height))
+ media_shape.setPropertyValue("MediaURL", file_url)
+ if autoplay:
+ try:
+ media_shape.setPropertyValue("MediaIsAutoPlay", True)
+ except:
+ pass
+ cls.ret = f"Video inserted successfully from {expanded_file_path}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting video: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_background(cls, slide_index=None, color=None, image_path=None):
+ """
+ Sets the background color or image for a specific slide or all slides.
+
+ Args:
+ slide_index (int, optional): The index of the slide to modify (1-based indexing).
+ If not provided, applies to all slides.
+ color (str, optional): The background color to apply (e.g., 'red', 'green', 'blue', or hex color code)
+ image_path (str, optional): Path to an image file to use as background. If provided, overrides color.
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ if not color and not image_path:
+ cls.ret = "Error: Either color or image_path must be provided"
+ return False
+ pages = cls.doc.getDrawPages()
+ page_count = pages.getCount()
+ rgb_color = None
+ if color:
+ if color.startswith("#"):
+ color = color.lstrip("#")
+ rgb_color = int(color, 16)
+ else:
+ color_map = {
+ "red": 16711680,
+ "green": 43315,
+ "blue": 255,
+ "black": 0,
+ "white": 16777215,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ }
+ rgb_color = color_map.get(color.lower(), 0)
+ if slide_index is not None:
+ slide_index = slide_index - 1
+ if slide_index < 0 or slide_index >= page_count:
+ cls.ret = f"Error: Slide index {slide_index + 1} is out of range (1-{page_count})"
+ return False
+ slides_to_modify = [pages.getByIndex(slide_index)]
+ else:
+ slides_to_modify = [pages.getByIndex(i) for i in range(page_count)]
+ for slide in slides_to_modify:
+ fill_props = cls.ctx.ServiceManager.createInstanceWithContext(
+ "com.sun.star.drawing.FillProperties", cls.ctx
+ )
+ if image_path and os.path.exists(image_path):
+ abs_path = os.path.abspath(image_path)
+ file_url = uno.systemPathToFileUrl(abs_path)
+ fill_props.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "BITMAP")
+ fill_props.FillBitmapURL = file_url
+ fill_props.FillBitmapMode = uno.Enum("com.sun.star.drawing.BitmapMode", "STRETCH")
+ elif rgb_color is not None:
+ fill_props.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "SOLID")
+ fill_props.FillColor = rgb_color
+ slide.setPropertyValue("Background", fill_props)
+ cls.ret = "Background set successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting background: {str(e)}"
+ return False
+
+ @classmethod
+ def save_as(cls, file_path, overwrite=False):
+ """
+ Saves the current document to a specified location with a given filename.
+
+ :param file_path: The full path where the file should be saved, including the filename and extension
+ :param overwrite: Whether to overwrite the file if it already exists (default: False)
+ :return: True if successful, False otherwise
+ """
+ try:
+ if os.path.exists(file_path) and not overwrite:
+ cls.ret = f"File already exists and overwrite is set to False: {file_path}"
+ return False
+ abs_path = os.path.abspath(file_path)
+ if os.name == "nt":
+ url = "file:///" + abs_path.replace("\\", "/")
+ else:
+ url = "file://" + abs_path
+ properties = []
+ overwrite_prop = PropertyValue()
+ overwrite_prop.Name = "Overwrite"
+ overwrite_prop.Value = overwrite
+ properties.append(overwrite_prop)
+ extension = os.path.splitext(file_path)[1].lower()
+ if extension == ".odp":
+ filter_name = "impress8"
+ elif extension == ".ppt":
+ filter_name = "MS PowerPoint 97"
+ elif extension == ".pptx":
+ filter_name = "Impress MS PowerPoint 2007 XML"
+ elif extension == ".pdf":
+ filter_name = "impress_pdf_Export"
+ else:
+ filter_name = "impress8"
+ filter_prop = PropertyValue()
+ filter_prop.Name = "FilterName"
+ filter_prop.Value = filter_name
+ properties.append(filter_prop)
+ cls.doc.storeAsURL(url, tuple(properties))
+ cls.ret = f"Document saved successfully to {file_path}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error saving document: {str(e)}"
+ return False
+
+ @classmethod
+ def insert_image(cls, slide_index, image_path, width=None, height=None, position=None):
+ """
+ Inserts an image to a specific slide in the presentation.
+
+ Args:
+ slide_index (int): The index of the slide to add the image to (1-based indexing)
+ image_path (str): The full path to the image file to be added
+ width (float, optional): The width of the image in centimeters
+ height (float, optional): The height of the image in centimeters
+ position (dict, optional): The position coordinates for the image as percentages
+ {
+ 'x': float, # The x-coordinate as a percentage of slide width
+ 'y': float # The y-coordinate as a percentage of slide height
+ }
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ if not os.path.exists(image_path):
+ cls.ret = f"Error: Image file not found at {image_path}"
+ return False
+ zero_based_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_index < 0 or zero_based_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1 to {slides.getCount()}"
+ return False
+ slide = slides.getByIndex(zero_based_index)
+ bitmap = cls.doc.createInstance("com.sun.star.drawing.BitmapTable")
+ image_url = uno.systemPathToFileUrl(os.path.abspath(image_path))
+ shape = cls.doc.createInstance("com.sun.star.drawing.GraphicObjectShape")
+ shape.setPropertyValue("GraphicURL", image_url)
+ slide.add(shape)
+ x_pos = 0
+ y_pos = 0
+ slide_width = slide.Width
+ slide_height = slide.Height
+ if position:
+ if "x" in position:
+ x_pos = int(position["x"] / 100 * slide_width)
+ if "y" in position:
+ y_pos = int(position["y"] / 100 * slide_height)
+ current_width = shape.Size.Width
+ current_height = shape.Size.Height
+ new_width = int(width * 1000) if width is not None else current_width
+ new_height = int(height * 1000) if height is not None else current_height
+ size = uno.createUnoStruct("com.sun.star.awt.Size")
+ size.Width = new_width
+ size.Height = new_height
+ point = uno.createUnoStruct("com.sun.star.awt.Point")
+ point.X = x_pos
+ point.Y = y_pos
+ shape.Size = size
+ shape.Position = point
+ cls.ret = f"Image inserted successfully on slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting image: {str(e)}"
+ return False
+
+ @classmethod
+ def configure_display_settings(
+ cls, use_presenter_view=None, primary_monitor_only=None, monitor_for_presentation=None
+ ):
+ """
+ Configures the display settings for LibreOffice Impress presentations.
+
+ Args:
+ use_presenter_view (bool, optional): Whether to use presenter view. Set to false to disable presenter view.
+ primary_monitor_only (bool, optional): Whether to use only the primary monitor for the presentation.
+ monitor_for_presentation (int, optional): Specify which monitor to use (1 for primary, 2 for secondary, etc.)
+
+ Returns:
+ bool: True if settings were successfully applied, False otherwise
+ """
+ try:
+ controller = cls.doc.getCurrentController()
+ if not hasattr(controller, "getPropertyValue"):
+ cls.ret = "Error: Not an Impress presentation or controller not available"
+ return False
+ if use_presenter_view is not None:
+ try:
+ controller.setPropertyValue("IsPresentationViewEnabled", use_presenter_view)
+ except Exception as e:
+ cls.ret = f"Warning: Could not set presenter view: {str(e)}"
+ if primary_monitor_only is not None:
+ try:
+ controller.setPropertyValue("UsePrimaryMonitorOnly", primary_monitor_only)
+ except Exception as e:
+ cls.ret = f"Warning: Could not set primary monitor usage: {str(e)}"
+ if monitor_for_presentation is not None:
+ try:
+ controller.setPropertyValue("MonitorForPresentation", monitor_for_presentation - 1)
+ except Exception as e:
+ cls.ret = f"Warning: Could not set presentation monitor: {str(e)}"
+ cls.ret = "Display settings configured successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error configuring display settings: {str(e)}"
+ return False
+
+ @classmethod
+ def set_text_strikethrough(cls, slide_index, box_index, line_numbers, apply):
+ """
+ Applies or removes strike-through formatting to specific text content in a slide.
+
+ Args:
+ slide_index (int): The index of the slide containing the text (1-based indexing)
+ box_index (int): The index of the textbox containing the text (0-based indexing)
+ line_numbers (list): The line numbers to apply strike-through formatting to (1-based indexing)
+ apply (bool): Whether to apply (true) or remove (false) strike-through formatting
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ slides = cls.doc.getDrawPages()
+ slide = slides.getByIndex(slide_index - 1)
+ shape = slide.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = f"Error: Shape at index {box_index} does not contain text"
+ return False
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ text_content = text.getString()
+ lines = text_content.split("\n")
+ for line_number in line_numbers:
+ if 1 <= line_number <= len(lines):
+ start_pos = 0
+ for i in range(line_number - 1):
+ start_pos += len(lines[i]) + 1
+ end_pos = start_pos + len(lines[line_number - 1])
+ cursor.gotoStart(False)
+ cursor.goRight(start_pos, False)
+ cursor.goRight(len(lines[line_number - 1]), True)
+ cursor.CharStrikeout = apply
+ cls.ret = f"Strike-through {'applied' if apply else 'removed'} successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_textbox_alignment(cls, slide_index, box_index, alignment):
+ """
+ Sets the text alignment for the specified textbox on a slide.
+
+ :param slide_index: The index of the slide to modify (1-based indexing)
+ :param box_index: The index of the textbox to modify (0-based indexing)
+ :param alignment: The text alignment to apply ('left', 'center', 'right', or 'justify')
+ :return: True if successful, False otherwise
+ """
+ try:
+ zero_based_slide_index = slide_index - 1
+ slides = cls.doc.getDrawPages()
+ if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount():
+ cls.ret = f"Error: Slide index {slide_index} out of range"
+ return False
+ slide = slides.getByIndex(zero_based_slide_index)
+ if box_index < 0 or box_index >= slide.getCount():
+ cls.ret = f"Error: Box index {box_index} out of range"
+ return False
+ shape = slide.getByIndex(box_index)
+ if not hasattr(shape, "getText"):
+ cls.ret = "Error: Selected shape does not support text"
+ return False
+ if alignment == "left":
+ shape.TextHorizontalAdjust = LEFT
+ elif alignment == "center":
+ shape.TextHorizontalAdjust = CENTER
+ elif alignment == "right":
+ shape.TextHorizontalAdjust = RIGHT
+ elif alignment == "justify":
+ text = shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.ParaAdjust = 3
+ else:
+ cls.ret = f"Error: Invalid alignment value: {alignment}"
+ return False
+ cls.ret = f"Successfully set text alignment to {alignment} for textbox {box_index} on slide {slide_index}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_number_properties(
+ cls, color=None, font_size=None, visible=None, position=None, apply_to="all", slide_indices=None
+ ):
+ """
+ Modifies the properties of slide numbers in the presentation.
+
+ Args:
+ color (str, optional): The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+ font_size (float, optional): The font size for slide numbers (in points)
+ visible (bool, optional): Whether slide numbers should be visible or hidden
+ position (str, optional): The position of slide numbers ('bottom-left', 'bottom-center', 'bottom-right',
+ 'top-left', 'top-center', 'top-right')
+ apply_to (str, optional): Whether to apply changes to 'all', 'current', or 'selected' slides
+ slide_indices (list, optional): Indices of specific slides to change (1-based indexing)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ draw_pages = cls.doc.getDrawPages()
+ master_pages = cls.doc.getMasterPages()
+ pages_to_modify = []
+ if apply_to == "all":
+ for i in range(draw_pages.getCount()):
+ pages_to_modify.append(draw_pages.getByIndex(i))
+ elif apply_to == "current":
+ current_page = cls.doc.getCurrentController().getCurrentPage()
+ pages_to_modify.append(current_page)
+ elif apply_to == "selected" and slide_indices:
+ for idx in slide_indices:
+ if 1 <= idx <= draw_pages.getCount():
+ pages_to_modify.append(draw_pages.getByIndex(idx - 1))
+ for i in range(master_pages.getCount()):
+ master_page = master_pages.getByIndex(i)
+ page_number_shape = None
+ for j in range(master_page.getCount()):
+ shape = master_page.getByIndex(j)
+ if hasattr(shape, "TextType"):
+ try:
+ if shape.TextType == 5:
+ page_number_shape = shape
+ break
+ except:
+ pass
+ if hasattr(shape, "getText"):
+ try:
+ text = shape.getText()
+ if text and text.getTextFields().getCount() > 0:
+ fields = text.getTextFields().createEnumeration()
+ while fields.hasMoreElements():
+ field = fields.nextElement()
+ if "PageNumber" in field.getImplementationName():
+ page_number_shape = shape
+ break
+ if page_number_shape:
+ break
+ except:
+ pass
+ if page_number_shape:
+ if color is not None:
+ color_int = 0
+ if color.startswith("#"):
+ color_int = int(color[1:], 16)
+ elif color == "red":
+ color_int = 16711680
+ elif color == "green":
+ color_int = 65280
+ elif color == "blue":
+ color_int = 255
+ elif color == "black":
+ color_int = 0
+ text = page_number_shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharColor = color_int
+ if font_size is not None:
+ text = page_number_shape.getText()
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharHeight = font_size
+ if position is not None:
+ page_width = master_page.Width
+ page_height = master_page.Height
+ width = page_number_shape.Size.Width
+ height = page_number_shape.Size.Height
+ new_x = 0
+ new_y = 0
+ if position.startswith("bottom"):
+ new_y = page_height - height - 100
+ elif position.startswith("top"):
+ new_y = 100
+ if position.endswith("left"):
+ new_x = 100
+ elif position.endswith("center"):
+ new_x = (page_width - width) / 2
+ elif position.endswith("right"):
+ new_x = page_width - width - 100
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", new_x, new_y)
+ if position.endswith("left"):
+ page_number_shape.ParaAdjust = LEFT
+ elif position.endswith("center"):
+ page_number_shape.ParaAdjust = CENTER
+ elif position.endswith("right"):
+ page_number_shape.ParaAdjust = RIGHT
+ if visible is not None:
+ try:
+ page_number_shape.Visible = visible
+ except:
+ if not visible:
+ page_number_shape.Size = uno.createUnoStruct("com.sun.star.awt.Size", 1, 1)
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", -1000, -1000)
+ elif (
+ visible is True
+ or visible is None
+ and (color is not None or font_size is not None or position is not None)
+ ):
+ page_number_shape = cls.doc.createInstance("com.sun.star.drawing.TextShape")
+ master_page.add(page_number_shape)
+ default_width = 2000
+ default_height = 400
+ page_number_shape.Size = uno.createUnoStruct("com.sun.star.awt.Size", default_width, default_height)
+ page_width = master_page.Width
+ page_height = master_page.Height
+ pos_x = page_width - default_width - 100
+ pos_y = page_height - default_height - 100
+ if position is not None:
+ if position.startswith("bottom"):
+ pos_y = page_height - default_height - 100
+ elif position.startswith("top"):
+ pos_y = 100
+ if position.endswith("left"):
+ pos_x = 100
+ page_number_shape.ParaAdjust = LEFT
+ elif position.endswith("center"):
+ pos_x = (page_width - default_width) / 2
+ page_number_shape.ParaAdjust = CENTER
+ elif position.endswith("right"):
+ pos_x = page_width - default_width - 100
+ page_number_shape.ParaAdjust = RIGHT
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", pos_x, pos_y)
+ text = page_number_shape.getText()
+ cursor = text.createTextCursor()
+ try:
+ page_field = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ text.insertTextContent(cursor, page_field, False)
+ except:
+ text.setString("<#>")
+ if color is not None:
+ color_int = 0
+ if color.startswith("#"):
+ color_int = int(color[1:], 16)
+ elif color == "red":
+ color_int = 16711680
+ elif color == "green":
+ color_int = 65280
+ elif color == "blue":
+ color_int = 255
+ elif color == "black":
+ color_int = 0
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharColor = color_int
+ if font_size is not None:
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharHeight = font_size
+ if visible is not None:
+ try:
+ page_number_shape.Visible = visible
+ except:
+ if not visible:
+ page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", -1000, -1000)
+ else:
+ try:
+ page_number_shape.Visible = True
+ except:
+ pass
+ try:
+ controller = cls.doc.getCurrentController()
+ view_data = controller.getViewData()
+ controller.restoreViewData(view_data)
+ except:
+ pass
+ cls.ret = "Slide number properties updated successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting slide number properties: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_number(cls, color=None, font_size=None, visible=None, position=None):
+ """
+ Sets the slide number in the presentation.
+
+ :param color: The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+ :param font_size: The font size for slide numbers (in points)
+ :param visible: Whether slide numbers should be visible or hidden
+ :param position: The position of slide numbers on the slides (bottom-left, bottom-center, bottom-right, top-left, top-center, top-right)
+ :return: True if successful, False otherwise
+ """
+ try:
+ controller = cls.doc.getCurrentController()
+ dispatcher = cls.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", cls.ctx)
+ if visible is False:
+ pages = cls.doc.getDrawPages()
+ for i in range(pages.getCount()):
+ page = pages.getByIndex(i)
+ for j in range(page.getCount()):
+ try:
+ shape = page.getByIndex(j)
+ if hasattr(shape, "Presentation") and shape.Presentation == "Number":
+ page.remove(shape)
+ except:
+ pass
+ master_pages = cls.doc.getMasterPages()
+ for i in range(master_pages.getCount()):
+ master_page = master_pages.getByIndex(i)
+ for j in range(master_page.getCount()):
+ try:
+ shape = master_page.getByIndex(j)
+ if hasattr(shape, "Presentation") and shape.Presentation == "Number":
+ master_page.remove(shape)
+ except:
+ pass
+ cls.ret = "Slide numbers hidden successfully"
+ return True
+ if visible is True or color is not None or font_size is not None or position is not None:
+ current_slide = controller.getCurrentPage()
+ master_pages = cls.doc.getMasterPages()
+ if master_pages.getCount() == 0:
+ cls.ret = "No master pages found"
+ return False
+ master_page = master_pages.getByIndex(0)
+ slide_number_shape = cls.doc.createInstance("com.sun.star.drawing.TextShape")
+ slide_number_shape.setSize(uno.createUnoStruct("com.sun.star.awt.Size", 2000, 500))
+ pos = position or "bottom-right"
+ page_width = master_page.Width
+ page_height = master_page.Height
+ x, y = 0, 0
+ if "bottom" in pos:
+ y = page_height - 1000
+ elif "top" in pos:
+ y = 500
+ if "left" in pos:
+ x = 500
+ elif "center" in pos:
+ x = (page_width - 2000) / 2
+ elif "right" in pos:
+ x = page_width - 2500
+ slide_number_shape.setPosition(uno.createUnoStruct("com.sun.star.awt.Point", x, y))
+ master_page.add(slide_number_shape)
+ text = slide_number_shape.getText()
+ cursor = text.createTextCursor()
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ text.insertTextContent(cursor, page_number, False)
+ if "center" in pos:
+ slide_number_shape.setPropertyValue("TextHorizontalAdjust", CENTER)
+ elif "right" in pos:
+ slide_number_shape.setPropertyValue("TextHorizontalAdjust", RIGHT)
+ elif "left" in pos:
+ slide_number_shape.setPropertyValue("TextHorizontalAdjust", LEFT)
+ if font_size is not None:
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setPropertyValue("CharHeight", font_size)
+ if color is not None:
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ if color.startswith("#") and len(color) == 7:
+ r = int(color[1:3], 16)
+ g = int(color[3:5], 16)
+ b = int(color[5:7], 16)
+ cursor.setPropertyValue("CharColor", (r << 16) + (g << 8) + b)
+ else:
+ color_map = {
+ "red": 16711680,
+ "green": 65280,
+ "blue": 255,
+ "black": 0,
+ "white": 16777215,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ }
+ if color.lower() in color_map:
+ cursor.setPropertyValue("CharColor", color_map[color.lower()])
+ cls.ret = "Slide numbers added and configured successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error setting slide number: {str(e)}"
+ return False
+
+ @classmethod
+ def set_slide_number_color(cls, color):
+ """
+ Sets the color of the slide number in the presentation.
+
+ Args:
+ color (str): The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code)
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ color_map = {
+ "black": 0,
+ "white": 16777215,
+ "red": 16711680,
+ "green": 65280,
+ "blue": 255,
+ "yellow": 16776960,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "gray": 8421504,
+ "orange": 16753920,
+ "purple": 8388736,
+ }
+ if color.lower() in color_map:
+ rgb_color = color_map[color.lower()]
+ else:
+ if color.startswith("#"):
+ color = color[1:]
+ try:
+ if len(color) == 6:
+ rgb_color = int(color, 16)
+ else:
+ rgb_color = 0
+ except ValueError:
+ rgb_color = 0
+ found = False
+ master_pages = cls.doc.getMasterPages()
+ for i in range(master_pages.getCount()):
+ master_page = master_pages.getByIndex(i)
+ for j in range(master_page.getCount()):
+ shape = master_page.getByIndex(j)
+ if hasattr(shape, "getText") and shape.getText() is not None:
+ text = shape.getText()
+ try:
+ enum = text.createEnumeration()
+ while enum.hasMoreElements():
+ para = enum.nextElement()
+ if hasattr(para, "createEnumeration"):
+ para_enum = para.createEnumeration()
+ while para_enum.hasMoreElements():
+ portion = para_enum.nextElement()
+ if (
+ hasattr(portion, "TextPortionType")
+ and portion.TextPortionType == "TextField"
+ ):
+ if hasattr(portion, "TextField") and portion.TextField is not None:
+ field = portion.TextField
+ if hasattr(field, "supportsService") and (
+ field.supportsService(
+ "com.sun.star.presentation.TextField.PageNumber"
+ )
+ or field.supportsService("com.sun.star.text.TextField.PageNumber")
+ ):
+ portion.CharColor = rgb_color
+ found = True
+ except Exception as e:
+ continue
+ draw_pages = cls.doc.getDrawPages()
+ for i in range(draw_pages.getCount()):
+ page = draw_pages.getByIndex(i)
+ for j in range(page.getCount()):
+ shape = page.getByIndex(j)
+ if hasattr(shape, "getText") and shape.getText() is not None:
+ text = shape.getText()
+ try:
+ enum = text.createEnumeration()
+ while enum.hasMoreElements():
+ para = enum.nextElement()
+ if hasattr(para, "createEnumeration"):
+ para_enum = para.createEnumeration()
+ while para_enum.hasMoreElements():
+ portion = para_enum.nextElement()
+ if (
+ hasattr(portion, "TextPortionType")
+ and portion.TextPortionType == "TextField"
+ ):
+ if hasattr(portion, "TextField") and portion.TextField is not None:
+ field = portion.TextField
+ if hasattr(field, "supportsService") and (
+ field.supportsService(
+ "com.sun.star.presentation.TextField.PageNumber"
+ )
+ or field.supportsService("com.sun.star.text.TextField.PageNumber")
+ ):
+ portion.CharColor = rgb_color
+ found = True
+ except Exception as e:
+ continue
+ for i in range(draw_pages.getCount()):
+ page = draw_pages.getByIndex(i)
+ for j in range(page.getCount()):
+ shape = page.getByIndex(j)
+ if hasattr(shape, "getText") and shape.getText() is not None:
+ text = shape.getText()
+ text_string = text.getString()
+ if text_string.isdigit() and len(text_string) <= 3:
+ try:
+ cursor = text.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.CharColor = rgb_color
+ found = True
+ except Exception as e:
+ continue
+ if found:
+ cls.ret = f"Slide number color set to {color}"
+ return True
+ else:
+ cls.ret = "Could not find slide numbers to change color"
+ return False
+ except Exception as e:
+ cls.ret = f"Error setting slide number color: {str(e)}"
+ return False
+
+ @classmethod
+ def export_to_image(cls, file_path, format, slide_index=None):
+ """
+ Exports the current presentation or a specific slide to an image file format.
+
+ Args:
+ file_path (str): The full path where the image file should be saved, including the filename and extension
+ format (str): The image format to export to (e.g., 'png', 'jpeg', 'gif')
+ slide_index (int, optional): The index of the specific slide to export (1-based indexing).
+ If not provided, exports the entire presentation as a series of images.
+
+ Returns:
+ bool: True if export was successful, False otherwise
+ """
+ try:
+ format = format.lower()
+ valid_formats = ["png", "jpeg", "jpg", "gif", "bmp", "tiff"]
+ if format not in valid_formats:
+ cls.ret = f"Error: Invalid format '{format}'. Valid formats are: {', '.join(valid_formats)}"
+ return False
+ if format == "jpg":
+ format = "jpeg"
+ pages = cls.doc.getDrawPages()
+ page_count = pages.getCount()
+ if slide_index is not None:
+ slide_index = slide_index - 1
+ if slide_index < 0 or slide_index >= page_count:
+ cls.ret = f"Error: Invalid slide index {slide_index + 1}. Valid range is 1 to {page_count}"
+ return False
+ controller = cls.doc.getCurrentController()
+ filter_name = f"draw_{format}_Export"
+ filter_data = PropertyValue(Name="FilterData", Value=())
+ if slide_index is not None:
+ controller.setCurrentPage(pages.getByIndex(slide_index))
+ props = PropertyValue(Name="FilterName", Value=filter_name), filter_data
+ cls.doc.storeToURL(uno.systemPathToFileUrl(file_path), props)
+ cls.ret = f"Successfully exported slide {slide_index + 1} to {file_path}"
+ return True
+ else:
+ base_name, ext = os.path.splitext(file_path)
+ for i in range(page_count):
+ controller.setCurrentPage(pages.getByIndex(i))
+ if page_count == 1:
+ current_file = f"{base_name}.{format}"
+ else:
+ current_file = f"{base_name}_{i + 1}.{format}"
+ props = PropertyValue(Name="FilterName", Value=filter_name), filter_data
+ cls.doc.storeToURL(uno.systemPathToFileUrl(current_file), props)
+
+ if page_count == 1:
+ cls.ret = f"Successfully exported {page_count} slides to {base_name}.{format}"
+ else:
+ cls.ret = f"Successfully exported {page_count} slides to {base_name}_[1-{page_count}].{format}"
+ return True
+ except Exception as e:
+ cls.ret = f"Error exporting to image: {str(e)}"
+ return False
diff --git a/mm_agents/autoglm_v/tools/package/libreoffice_writer.py b/mm_agents/autoglm_v/tools/package/libreoffice_writer.py
new file mode 100644
index 00000000..35095c85
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/package/libreoffice_writer.py
@@ -0,0 +1,753 @@
+import os
+import re
+
+import uno
+from com.sun.star.awt.FontSlant import ITALIC, NONE, OBLIQUE
+from com.sun.star.awt.FontWeight import BOLD, NORMAL
+from com.sun.star.beans import PropertyValue
+from com.sun.star.style.ParagraphAdjust import CENTER, LEFT, RIGHT
+from com.sun.star.text.ControlCharacter import PARAGRAPH_BREAK
+from com.sun.star.text.TextContentAnchorType import AS_CHARACTER
+
+
+class WriterTools:
+ localContext = uno.getComponentContext()
+ resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+ ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
+ desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
+ doc = desktop.getCurrentComponent()
+ text = doc.Text
+ cursor = text.createTextCursor()
+ ret = ""
+
+ @classmethod
+ def close_other_window(cls):
+ """关闭除当前文档外的所有文档"""
+ components = cls.desktop.getComponents().createEnumeration()
+ current_url = cls.doc.getURL()
+ while components.hasMoreElements():
+ doc = components.nextElement()
+ if doc.getURL() != current_url:
+ doc.close(True)
+
+ @classmethod
+ def save(cls):
+ """保存文档到当前位置"""
+ try:
+ if cls.doc.hasLocation():
+ cls.doc.store()
+ else:
+ raise Exception("文档没有保存位置,请使用另存为功能")
+ return True
+ except Exception as e:
+ return False
+
+ @classmethod
+ def maximize_window(cls):
+ """
+ 将窗口设置为工作区最大尺寸
+ 使用工作区域大小(考虑任务栏等)
+ """
+ window = cls.doc.getCurrentController().getFrame().getContainerWindow()
+ toolkit = window.getToolkit()
+ device = toolkit.createScreenCompatibleDevice(0, 0)
+ workarea = toolkit.getWorkArea()
+ window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15)
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def write_text(cls, text, bold=False, italic=False, size=None):
+ """写入文本"""
+ cls.cursor.CharWeight = 150 if bold else 100
+ cls.cursor.CharPosture = ITALIC if italic else NONE
+ if size:
+ cls.cursor.CharHeight = size
+ cls.text.insertString(cls.cursor, text, False)
+ cls.ret = "Success"
+
+ @classmethod
+ def get_paragraphs(cls, start_index=0, count=None):
+ """Retrieves paragraphs from the document as a list."""
+ text = cls.doc.getText()
+ paragraphs = text.createEnumeration()
+ paragraph_list = []
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ paragraph_list.append(paragraph.getString())
+ if start_index < 0:
+ start_index = 0
+ elif start_index >= len(paragraph_list):
+ cls.ret = []
+ if count is not None:
+ end_index = min(start_index + count, len(paragraph_list))
+ cls.ret = paragraph_list[start_index:end_index]
+ else:
+ cls.ret = paragraph_list[start_index:]
+ return cls.ret
+
+ @classmethod
+ def env_info(cls):
+ paras = cls.get_paragraphs()
+ para_str = ""
+ for i, para in enumerate(paras):
+ para = para[:500] + "..." if len(para) > 500 else para
+ para_str += "Paragraph " + str(i) + ": " + para.strip() + "\n"
+ cls.ret = para_str
+ return cls.ret
+
+ @classmethod
+ def set_color(cls, pattern, color, paragraph_indices=None):
+ """
+ Changes the color of matched text in the document for specified paragraphs.
+
+ Args:
+ pattern (str): Regular expression pattern to match text
+ color (int): Hex color code (e.g., 0x000000 for black)
+ paragraph_indices (list, optional): List of paragraph indices to modify (0-based).
+ If None, applies to all paragraphs.
+ """
+ try:
+ enum = cls.doc.Text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraphs.append(enum.nextElement())
+ if not paragraph_indices:
+ paragraphs_to_process = range(len(paragraphs))
+ else:
+ paragraphs_to_process = paragraph_indices
+ regex = re.compile(pattern)
+ for idx in paragraphs_to_process:
+ if idx < 0 or idx >= len(paragraphs):
+ continue
+ paragraph = paragraphs[idx]
+ if not paragraph.supportsService("com.sun.star.text.Paragraph"):
+ continue
+ para_text = paragraph.getString()
+ matches = regex.finditer(para_text)
+ for match in matches:
+ para_cursor = cls.text.createTextCursorByRange(paragraph.getStart())
+ para_cursor.goRight(match.start(), False)
+ para_cursor.goRight(match.end() - match.start(), True)
+ para_cursor.CharColor = color
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def find_and_replace(cls, pattern, replacement, paragraph_indices=None):
+ """
+ Finds all occurrences of a specified text pattern and replaces them with another text in the document.
+
+ Args:
+ pattern (str): The pattern to match in the document, should be a regular expression
+ replacement (str): The text to replace the found text with
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing)
+
+ Returns:
+ str: Success message with number of replacements made
+ """
+ try:
+ enum = cls.doc.Text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraphs.append(enum.nextElement())
+ total_replacements = 0
+ if not paragraph_indices:
+ paragraphs_to_process = list(range(len(paragraphs)))
+ else:
+ paragraphs_to_process = [i for i in paragraph_indices if 0 <= i < len(paragraphs)]
+ regex = re.compile(pattern)
+ for idx in paragraphs_to_process:
+ if idx >= len(paragraphs):
+ continue
+ paragraph = paragraphs[idx]
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ text_content = paragraph.getString()
+ new_text, count = regex.subn(replacement, text_content)
+ if count > 0:
+ paragraph.setString(new_text)
+ total_replacements += count
+ cls.ret = f"Successfully made {total_replacements} replacements"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error during find and replace: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_font(cls, font_name, paragraph_indices=None):
+ """
+ Changes the font of text in the document or specified paragraphs.
+
+ Args:
+ font_name (str): The name of the font to apply (e.g., 'Times New Roman', 'Arial', 'Calibri')
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+ """
+ try:
+ text = cls.doc.getText()
+ enum = text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraphs.append(enum.nextElement())
+ if not paragraph_indices:
+ paragraph_indices = range(len(paragraphs))
+ for idx in paragraph_indices:
+ if 0 <= idx < len(paragraphs):
+ paragraph = paragraphs[idx]
+ cursor = text.createTextCursorByRange(paragraph)
+ cursor.CharFontName = font_name
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def set_line_spacing(cls, spacing_value, paragraph_indices=None):
+ """
+ Sets the line spacing for specified paragraphs in the document.
+
+ Args:
+ spacing_value (float): The line spacing value to apply (1.0 for single spacing, 2.0 for double spacing, etc.)
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+ """
+ try:
+ text = cls.doc.getText()
+ paragraph_enum = text.createEnumeration()
+ line_spacing_value = int(spacing_value * 100)
+ current_index = 0
+
+ while paragraph_enum.hasMoreElements():
+ paragraph = paragraph_enum.nextElement()
+
+ if not paragraph_indices or current_index in paragraph_indices:
+ line_spacing = uno.createUnoStruct("com.sun.star.style.LineSpacing")
+ line_spacing.Mode = 0
+ line_spacing.Height = line_spacing_value
+ paragraph.ParaLineSpacing = line_spacing
+
+ if paragraph.String.strip():
+ current_index += 1
+
+ cls.ret = "Success"
+ return True
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return False
+
+ @classmethod
+ def remove_highlighting(cls, paragraph_indices=None):
+ """
+ Removes ALL highlighting from text in the document for specified paragraphs.
+
+ Args:
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ text = cls.doc.getText()
+ paragraphs = text.createEnumeration()
+ target_indices = set(paragraph_indices) if paragraph_indices else None
+ current_index = 0
+
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if target_indices is None or current_index in target_indices:
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ para_cursor = text.createTextCursorByRange(paragraph)
+ # Remove all highlighting by setting back color to -1
+ para_cursor.CharBackColor = -1
+
+ # Additional cleanup for individual text portions (optional)
+ text_portions = paragraph.createEnumeration()
+ while text_portions.hasMoreElements():
+ text_portion = text_portions.nextElement()
+ if hasattr(text_portion, "CharBackColor"):
+ portion_cursor = text.createTextCursorByRange(text_portion)
+ portion_cursor.CharBackColor = -1
+ current_index += 1
+
+ cls.ret = "Successfully removed all highlighting"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error removing highlighting: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def find_highlighted_text(cls, highlight_color):
+ """
+ Finds all text in the document that has a specific highlight color applied to it.
+
+ Args:
+ highlight_color (str): The highlight color to search for. Can be a color name (e.g., 'yellow', 'green') or hex code.
+
+ Returns:
+ list: A list of strings containing all text segments with the specified highlight color.
+ """
+ color_map = {
+ "yellow": 16776960,
+ "green": 65280,
+ "blue": 255,
+ "red": 16711680,
+ "cyan": 65535,
+ "magenta": 16711935,
+ "black": 0,
+ "white": 16777215,
+ "gray": 8421504,
+ "lightgray": 12632256,
+ }
+ target_color = None
+ if highlight_color.lower() in color_map:
+ target_color = color_map[highlight_color.lower()]
+ elif highlight_color.startswith("#") and len(highlight_color) == 7:
+ try:
+ hex_color = highlight_color[1:]
+ r = int(hex_color[0:2], 16)
+ g = int(hex_color[2:4], 16)
+ b = int(hex_color[4:6], 16)
+ target_color = (r << 16) + (g << 8) + b
+ except ValueError:
+ cls.ret = f"Invalid hex color format: {highlight_color}"
+ return []
+ else:
+ cls.ret = f"Unsupported color format: {highlight_color}"
+ return []
+ highlighted_text = []
+ text = cls.doc.getText()
+ enum_paragraphs = text.createEnumeration()
+ while enum_paragraphs.hasMoreElements():
+ paragraph = enum_paragraphs.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ enum_portions = paragraph.createEnumeration()
+ while enum_portions.hasMoreElements():
+ text_portion = enum_portions.nextElement()
+ if hasattr(text_portion, "CharBackColor") and text_portion.CharBackColor == target_color:
+ if text_portion.getString().strip():
+ highlighted_text.append(text_portion.getString())
+ cls.ret = f"Found {len(highlighted_text)} text segments with highlight color {highlight_color}"
+ return highlighted_text
+
+ @classmethod
+ def insert_formula_at_cursor(cls, formula):
+ """
+ Inserts a formula at the current cursor position in the document.
+
+ Args:
+ formula (str): The formula to insert at the current cursor position.
+
+ Returns:
+ bool: True if successful, False otherwise
+ """
+ try:
+ embedded_obj = cls.doc.createInstance("com.sun.star.text.TextEmbeddedObject")
+ embedded_obj.setPropertyValue("CLSID", "078B7ABA-54FC-457F-8551-6147e776a997")
+ embedded_obj.setPropertyValue("AnchorType", AS_CHARACTER)
+ cls.text.insertTextContent(cls.cursor, embedded_obj, False)
+ math_obj = embedded_obj.getEmbeddedObject()
+ math_obj.Formula = formula
+ cls.ret = "Formula inserted successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting formula: {str(e)}"
+ return False
+
+ @classmethod
+ def insert_image_at_cursor(cls, image_path, width=None, height=None):
+ """
+ Inserts an image at the current cursor position in the document.
+
+ Args:
+ image_path (str): Full path to the image file to insert
+ width (int, optional): Width to display the image in pixels
+ height (int, optional): Height to display the image in pixels
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ if image_path.startswith("~"):
+ image_path = os.path.expanduser(image_path)
+ if not os.path.exists(image_path):
+ cls.ret = f"Error: Image file not found at {image_path}"
+ return cls.ret
+ image_path = os.path.abspath(image_path)
+ if os.name == "nt":
+ file_url = "file:///" + image_path.replace("\\", "/")
+ else:
+ file_url = "file://" + image_path
+ graphic = cls.doc.createInstance("com.sun.star.text.GraphicObject")
+ graphic.GraphicURL = file_url
+ graphic.AnchorType = AS_CHARACTER
+ if width is not None:
+ graphic.Width = width * 100
+ if height is not None:
+ graphic.Height = height * 100
+ cls.text.insertTextContent(cls.cursor, graphic, False)
+ cls.ret = "Success: Image inserted"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_strikethrough(cls, pattern, paragraph_indices=None):
+ """
+ Sets the strikethrough formatting for text matching the specified pattern in the document.
+
+ Args:
+ pattern (str): The regular expression pattern to match in the document
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error information
+ """
+ try:
+ paragraphs = cls.doc.getText().createEnumeration()
+ para_index = 0
+ found_matches = 0
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ if paragraph_indices and para_index not in paragraph_indices:
+ para_index += 1
+ continue
+ para_text = paragraph.getString()
+ matches = list(re.finditer(pattern, para_text))
+ for match in matches:
+ text_range = paragraph.getStart()
+ cursor = cls.doc.getText().createTextCursorByRange(text_range)
+ cursor.goRight(match.start(), False)
+ cursor.goRight(match.end() - match.start(), True)
+ cursor.CharStrikeout = 1
+ found_matches += 1
+ para_index += 1
+ cls.ret = f"Successfully applied strikethrough to {found_matches} matches of pattern: {pattern}"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error applying strikethrough: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_font_size(cls, font_size, pattern, paragraph_indices=None):
+ """
+ Changes the font size of specified text in the document.
+
+ Args:
+ font_size (float): The font size to apply (in points).
+ pattern (str): The pattern to match in the document, should be a regular expression.
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Result message indicating success or failure.
+ """
+ try:
+ regex = re.compile(pattern)
+ paragraphs = cls.doc.getText().createEnumeration()
+ current_index = 0
+ while paragraphs.hasMoreElements():
+ paragraph = paragraphs.nextElement()
+ if paragraph_indices and current_index not in paragraph_indices:
+ current_index += 1
+ continue
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ para_cursor = cls.text.createTextCursorByRange(paragraph)
+ para_text = paragraph.getString()
+ matches = list(regex.finditer(para_text))
+ for match in reversed(matches):
+ start_pos = match.start()
+ end_pos = match.end()
+ para_cursor.gotoStart(False)
+ para_cursor.goRight(start_pos, False)
+ para_cursor.goRight(end_pos - start_pos, True)
+ para_cursor.CharHeight = font_size
+ current_index += 1
+ cls.ret = f"Successfully changed font size to {font_size} for text matching '{pattern}'"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error changing font size: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def export_to_pdf(cls, output_path=None, output_filename=None, include_comments=False, quality="standard"):
+ """
+ Exports the current document to PDF format.
+
+ Args:
+ output_path (str, optional): The full path where the PDF should be saved.
+ If not provided, uses the same location as the original document.
+ output_filename (str, optional): The filename to use for the PDF.
+ If not provided, uses the original document's filename with .pdf extension.
+ include_comments (bool, optional): Whether to include comments in the exported PDF.
+ Defaults to False.
+ quality (str, optional): The quality of the PDF export ('standard', 'high', 'print').
+ Defaults to 'standard'.
+
+ Returns:
+ str: Path to the exported PDF file or error message
+ """
+ try:
+ doc_url = cls.doc.getURL()
+ if not doc_url and not output_path:
+ return "Error: Document has not been saved and no output path provided"
+ if doc_url:
+ doc_path = uno.fileUrlToSystemPath(os.path.dirname(doc_url))
+ doc_filename = os.path.basename(doc_url)
+ doc_name = os.path.splitext(doc_filename)[0]
+ else:
+ doc_path = ""
+ doc_name = "export"
+ final_path = output_path if output_path else doc_path
+ final_filename = output_filename if output_filename else f"{doc_name}.pdf"
+ if not final_filename.lower().endswith(".pdf"):
+ final_filename += ".pdf"
+ full_output_path = os.path.join(final_path, final_filename)
+ output_url = uno.systemPathToFileUrl(full_output_path)
+ export_props = []
+ if quality == "high":
+ export_props.append(PropertyValue(Name="SelectPdfVersion", Value=1))
+ elif quality == "print":
+ export_props.append(PropertyValue(Name="SelectPdfVersion", Value=2))
+ else:
+ export_props.append(PropertyValue(Name="SelectPdfVersion", Value=0))
+ export_props.append(PropertyValue(Name="ExportNotes", Value=include_comments))
+ export_props.extend(
+ [
+ PropertyValue(Name="FilterName", Value="writer_pdf_Export"),
+ PropertyValue(Name="Overwrite", Value=True),
+ ]
+ )
+ cls.doc.storeToURL(output_url, tuple(export_props))
+ cls.ret = f"PDF exported to: {full_output_path}"
+ return full_output_path
+ except Exception as e:
+ cls.ret = f"Error exporting to PDF: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_paragraph_alignment(cls, alignment, paragraph_indices=None):
+ """
+ Sets the text alignment for specified paragraphs in the document.
+
+ Args:
+ alignment (str): The alignment to apply ('left', 'center', 'right', 'justify').
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ alignment_map = {"left": LEFT, "center": CENTER, "right": RIGHT, "justify": 3}
+ if alignment.lower() not in alignment_map:
+ cls.ret = f"Error: Invalid alignment '{alignment}'. Use 'left', 'center', 'right', or 'justify'."
+ return cls.ret
+ alignment_value = alignment_map[alignment.lower()]
+ text = cls.doc.getText()
+ paragraph_enum = text.createEnumeration()
+ paragraphs = []
+ while paragraph_enum.hasMoreElements():
+ paragraph = paragraph_enum.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ paragraphs.append(paragraph)
+ if paragraph_indices:
+ valid_indices = [i for i in paragraph_indices if 0 <= i < len(paragraphs)]
+ if len(valid_indices) != len(paragraph_indices):
+ cls.ret = f"Warning: Some paragraph indices were out of range (0-{len(paragraphs) - 1})"
+ for idx in valid_indices:
+ paragraphs[idx].ParaAdjust = alignment_value
+ else:
+ for paragraph in paragraphs:
+ paragraph.ParaAdjust = alignment_value
+ cls.ret = f"Successfully applied '{alignment}' alignment to paragraphs"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error setting paragraph alignment: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def capitalize_words(cls, paragraph_indices=None):
+ """
+ Capitalizes the first letter of each word for specified paragraphs in the document.
+
+ Args:
+ paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+ If not provided, applies to all paragraphs.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ text = cls.doc.getText()
+ enum = text.createEnumeration()
+ paragraphs = []
+ while enum.hasMoreElements():
+ paragraph = enum.nextElement()
+ if paragraph.supportsService("com.sun.star.text.Paragraph"):
+ paragraphs.append(paragraph)
+ if not paragraph_indices:
+ target_paragraphs = list(range(len(paragraphs)))
+ else:
+ target_paragraphs = paragraph_indices
+ valid_indices = [idx for idx in target_paragraphs if 0 <= idx < len(paragraphs)]
+ for idx in valid_indices:
+ paragraph = paragraphs[idx]
+ text_content = paragraph.getString()
+ if not text_content.strip():
+ continue
+ capitalized_text = " ".join(word.capitalize() if word else "" for word in text_content.split(" "))
+ para_cursor = text.createTextCursorByRange(paragraph.getStart())
+ para_cursor.gotoRange(paragraph.getEnd(), True)
+ para_cursor.setString(capitalized_text)
+ cls.ret = f"Successfully capitalized words in {len(valid_indices)} paragraphs"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error capitalizing words: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def set_default_font(cls, font_name, font_size=None):
+ """
+ Sets the default font for new text in the document without changing existing text.
+
+ Args:
+ font_name (str): The name of the font to set as default (e.g., 'Times New Roman', 'Arial', 'Calibri')
+ font_size (float, optional): The default font size in points.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ style_families = cls.doc.getStyleFamilies()
+ paragraph_styles = style_families.getByName("ParagraphStyles")
+ default_style_names = ["Default", "Standard", "Normal"]
+ standard_style = None
+ for style_name in default_style_names:
+ if paragraph_styles.hasByName(style_name):
+ standard_style = paragraph_styles.getByName(style_name)
+ break
+ if standard_style is None:
+ style_names = paragraph_styles.getElementNames()
+ if style_names:
+ standard_style = paragraph_styles.getByName(style_names[0])
+ else:
+ raise Exception("Could not find default paragraph style")
+ standard_style.setPropertyValue("CharFontName", font_name)
+ standard_style.setPropertyValue("CharFontNameAsian", font_name)
+ standard_style.setPropertyValue("CharFontNameComplex", font_name)
+ if font_size is not None:
+ standard_style.setPropertyValue("CharHeight", float(font_size))
+ standard_style.setPropertyValue("CharHeightAsian", float(font_size))
+ standard_style.setPropertyValue("CharHeightComplex", float(font_size))
+ cls.cursor.setPropertyValue("CharFontName", font_name)
+ cls.cursor.setPropertyValue("CharFontNameAsian", font_name)
+ cls.cursor.setPropertyValue("CharFontNameComplex", font_name)
+ if font_size is not None:
+ cls.cursor.setPropertyValue("CharHeight", float(font_size))
+ cls.cursor.setPropertyValue("CharHeightAsian", float(font_size))
+ cls.cursor.setPropertyValue("CharHeightComplex", float(font_size))
+ cls.ret = f"Default font set to '{font_name}'" + (f" with size {font_size}pt" if font_size else "")
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error setting default font: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def add_page_numbers(cls, position, start_number=1, format=None):
+ """
+ Adds page numbers to the document at the specified position.
+
+ Args:
+ position (str): Position of the page numbers ('bottom_left', 'bottom_center', 'bottom_right',
+ 'top_left', 'top_center', 'top_right')
+ start_number (int, optional): The starting page number. Defaults to 1.
+ format (str, optional): Format of the page numbers (e.g., '1', 'Page 1', '1 of N').
+ Defaults to simple number format.
+
+ Returns:
+ str: Success message or error message
+ """
+ try:
+ page_styles = cls.doc.StyleFamilies.getByName("PageStyles")
+ default_style = page_styles.getByName("Standard")
+ try:
+ default_style.setPropertyValue("PageNumberOffset", start_number)
+ except:
+ pass
+ if position.startswith("top"):
+ default_style.HeaderIsOn = True
+ target = default_style.HeaderText
+ else:
+ default_style.FooterIsOn = True
+ target = default_style.FooterText
+ cursor = target.createTextCursor()
+ cursor.gotoStart(False)
+ cursor.gotoEnd(True)
+ cursor.setString("")
+ cursor.gotoStart(False)
+ if position.endswith("_left"):
+ cursor.ParaAdjust = LEFT
+ elif position.endswith("_center"):
+ cursor.ParaAdjust = CENTER
+ elif position.endswith("_right"):
+ cursor.ParaAdjust = RIGHT
+ if not format or format == "1":
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ elif format == "Page 1" or "Page" in format and "of" not in format:
+ target.insertString(cursor, "Page ", False)
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ elif format == "1 of N" or format == "Page {page} of {total}" or "of" in format:
+ if "Page" in format:
+ target.insertString(cursor, "Page ", False)
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ target.insertString(cursor, " of ", False)
+ page_count = cls.doc.createInstance("com.sun.star.text.TextField.PageCount")
+ page_count.NumberingType = 4
+ target.insertTextContent(cursor, page_count, False)
+ else:
+ page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+ page_number.NumberingType = 4
+ target.insertTextContent(cursor, page_number, False)
+ cls.ret = "Successfully added page numbers"
+ return cls.ret
+ except Exception as e:
+ cls.ret = f"Error adding page numbers: {str(e)}"
+ return cls.ret
+
+ @classmethod
+ def insert_page_break(cls, position="at_cursor"):
+ """
+ Inserts a page break at the specified position.
+
+ Args:
+ position (str): Where to insert the page break: 'at_cursor' for current cursor position,
+ 'end_of_document' for end of document. Defaults to 'at_cursor'.
+ """
+ try:
+ if position == "end_of_document":
+ cls.cursor.gotoEnd(False)
+ cls.text.insertControlCharacter(cls.cursor, PARAGRAPH_BREAK, False)
+ cls.cursor.gotoStartOfParagraph(True)
+ cls.cursor.BreakType = uno.Enum("com.sun.star.style.BreakType", "PAGE_BEFORE")
+ cls.ret = "Page break inserted successfully"
+ return True
+ except Exception as e:
+ cls.ret = f"Error inserting page break: {str(e)}"
+ return False
diff --git a/mm_agents/autoglm_v/tools/package/vlc.py b/mm_agents/autoglm_v/tools/package/vlc.py
new file mode 100644
index 00000000..c3a14fc4
--- /dev/null
+++ b/mm_agents/autoglm_v/tools/package/vlc.py
@@ -0,0 +1,233 @@
+import json
+import os
+import re
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from urllib.parse import quote
+
+import requests
+from requests.auth import HTTPBasicAuth
+
+
+class VLCTools:
+ host = "localhost"
+ port = 8080
+ base_url = f"http://{host}:{port}/requests"
+ password = "password"
+ auth = HTTPBasicAuth("", password)
+ ret = ""
+
+ @classmethod
+ def print_result(cls):
+ print(cls.ret)
+
+ @classmethod
+ def _make_request(cls, endpoint, params=None):
+ url = f"{cls.base_url}/{endpoint}"
+ try:
+ response = requests.get(url, params=params, auth=cls.auth)
+ response.raise_for_status()
+ return response
+ except requests.exceptions.RequestException as e:
+ return None
+
+ @classmethod
+ def _get_status(cls):
+ response = cls._make_request("status.xml")
+ if response:
+ return ET.fromstring(response.content)
+ return None
+
+ @classmethod
+ def env_info(cls):
+ cls.ret = "None"
+
+ @classmethod
+ def get_playlist(cls):
+ response = cls._make_request("playlist.xml")
+ if response:
+ info = ET.fromstring(response.content)
+ playlist_node = info.find('.//node[@name="Playlist"]')
+ if playlist_node is not None:
+ playlist_items = []
+ for leaf in playlist_node.findall("leaf"):
+ item = {"name": leaf.get("name"), "uri": leaf.get("uri"), "duration": leaf.get("duration") + "s"}
+ playlist_items.append(item)
+ cls.ret = f"Playlist: {playlist_items}"
+ return cls.ret
+ cls.ret = "Error getting playlist"
+ return None
+
+ @classmethod
+ def play(cls):
+ response = cls._make_request("status.xml", {"command": "pl_play"})
+ if response:
+ cls.ret = "Start playing the media"
+ return cls.ret
+ cls.ret = "Error playing the media"
+ return None
+
+ @classmethod
+ def pause(cls):
+ response = cls._make_request("status.xml", {"command": "pl_pause"})
+ if response:
+ cls.ret = "Pause the media"
+ return cls.ret
+ cls.ret = "Error pausing the media"
+ return None
+
+ @classmethod
+ def next(cls):
+ response = cls._make_request("status.xml", {"command": "pl_next"})
+ if response:
+ cls.ret = "Switch to next media"
+ return cls.ret
+ cls.ret = "Error switching to next media"
+ return None
+
+ @classmethod
+ def previous(cls):
+ response = cls._make_request("status.xml", {"command": "pl_previous"})
+ if response:
+ cls.ret = "Switch to previous media"
+ return cls.ret
+ cls.ret = "Error switching to previous media"
+ return None
+
+ @classmethod
+ def add_to_playlist(cls, uri):
+ if uri.startswith("http"):
+ encoded_uri = uri
+ else:
+ encoded_uri = "file://" + quote(uri.replace("file://", ""))
+
+ response = cls._make_request("status.xml", {"command": "in_play", "input": encoded_uri})
+ if response:
+ cls.ret = f"Add {uri} to playlist"
+ return cls.ret
+ cls.ret = f"Error adding {uri} to playlist"
+ return None
+
+ @classmethod
+ def get_current_time(cls):
+ status = cls._get_status()
+ if status is not None:
+ time = status.find("time")
+ cls.ret = int(time.text) if time is not None else None
+ return cls.ret
+ return None
+
+ @classmethod
+ def get_media_duration(cls):
+ status = cls._get_status()
+ if status is not None:
+ length = status.find("length")
+ if length is not None:
+ cls.ret = f"Media duration: {length.text} seconds"
+ return cls.ret
+ cls.ret = "Error getting media duration"
+ return None
+
+ @classmethod
+ def get_settings(cls):
+ settings = {}
+ with open(Path.home() / ".config/vlc/vlcrc", "r") as f:
+ for line in f:
+ if line:
+ try:
+ key, value = line.split("=")
+ if key.strip().startswith("#"):
+ continue
+ settings[key.strip()] = value.strip()
+ except:
+ continue
+ cls.ret = json.dumps(settings, indent=4, ensure_ascii=False)
+ return cls.ret
+
+ @classmethod
+ def set_settings(cls, field, value):
+ with open(Path.home() / ".config/vlc/vlcrc", "r") as rf:
+ settings = rf.read()
+
+ # 正则表达式匹配settings中的field项并替换
+ pattern = re.compile(r"#? *" + re.escape(field) + r"=.*")
+ # 判断是否存在field项
+ if pattern.search(settings):
+ settings = pattern.sub(f"{field}={value}", settings)
+ else:
+ settings += f"{field}={value}\n"
+
+ with open(Path.home() / ".config/vlc/vlcrc", "w") as wf:
+ wf.write(settings)
+
+ cls.ret = f"Set {field} to {value}"
+ return cls.ret
+
+ @classmethod
+ def toggle_fullscreen(cls, enable=None):
+ """
+ Toggle fullscreen mode or set it explicitly based on the enable parameter.
+
+ Args:
+ enable (bool, optional): If provided, explicitly set fullscreen mode (True for fullscreen, False for windowed)
+
+ Returns:
+ str: Success or error message
+ """
+ if enable is not None:
+ command = "fullscreen" if enable else "fullscreen off"
+ else:
+ command = "fullscreen"
+ response = cls._make_request("status.xml", {"command": command})
+ if response:
+ action = "enabled" if enable is True else "disabled" if enable is False else "toggled"
+ cls.ret = f"Fullscreen mode {action}"
+ return cls.ret
+ cls.ret = "Error changing fullscreen mode"
+ return None
+
+ @classmethod
+ def get_media_files(cls, path, suffix=None):
+ """
+ Gets the media files for the specified path.
+
+ Args:
+ path (str): The path to the media files
+ suffix (List[str], optional): The suffix of the media files.
+ Defaults to ['mp4', 'avi', 'mkv', 'mov', 'mp3', 'm4a', 'wav']
+ """
+ # Set default suffix if not provided
+ if suffix is None:
+ suffix = ["mp4", "avi", "mkv", "mov", "mp3", "m4a", "wav"]
+
+ # Validate path
+ if not path:
+ cls.ret = "Path cannot be empty"
+ return None
+
+ if not os.path.exists(path):
+ cls.ret = f"Path not found: {path}"
+ return None
+
+ # Initialize result list
+ media_files = []
+
+ # Convert suffix list to lowercase for case-insensitive comparison
+ suffix = [s.lower() for s in suffix]
+
+ # Walk through directory
+ try:
+ for root, _, files in os.walk(path):
+ for file in files:
+ # Check if file extension matches any of the specified suffixes
+ if any(file.lower().endswith(f".{s}") for s in suffix):
+ # Add full path of the file to results
+ full_path = os.path.join(root, file)
+ media_files.append(full_path)
+
+ except Exception as e:
+ cls.ret = f"Error while scanning directory: {str(e)}"
+ return None
+
+ cls.ret = media_files
+ return cls.ret
diff --git a/run_autoglm_v.py b/run_autoglm_v.py
new file mode 100644
index 00000000..31794ea9
--- /dev/null
+++ b/run_autoglm_v.py
@@ -0,0 +1,608 @@
+"""Script to run end-to-end evaluation on the benchmark.
+Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py.
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+import math
+import ast
+import time
+import backoff
+import httpx
+import requests
+from openai import APIConnectionError, APIError, RateLimitError
+from requests.exceptions import SSLError
+from tqdm import tqdm
+
+import lib_run_single
+from desktop_env.desktop_env import MAX_RETRIES, DesktopEnv as DesktopEnvBase
+from mm_agents.autoglm_v import AutoGLMAgent
+from typing import Optional, Dict, Any
+from openai import OpenAI
+
+# Almost deprecated since it's not multi-env, use run_multienv_*.py instead
+
+# Logger Configs {{{ #
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(
+ fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s"
+)
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+# }}} Logger Configs #
+
+logger = logging.getLogger("desktopenv.experiment")
+
+
+def config() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Run end-to-end evaluation on the benchmark")
+
+ # environment config
+ parser.add_argument("--path_to_vm", type=str)
+ parser.add_argument(
+ "--provider_name",
+ type=str,
+ default="docker",
+ help="Virtualization provider (vmware, docker, aws, azure, gcp, virtualbox)",
+ )
+ parser.add_argument("--headless", action="store_true", default=True, help="Run in headless machine")
+ parser.add_argument("--action_space", type=str, default="autoglm_computer_use", help="Action type")
+ parser.add_argument(
+ "--observation_type",
+ choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"],
+ default="a11y_tree",
+ help="Observation type",
+ )
+ parser.add_argument("--screen_width", type=int, default=1920)
+ parser.add_argument("--screen_height", type=int, default=1080)
+ parser.add_argument("--sleep_after_execution", type=float, default=1.0)
+ parser.add_argument("--max_steps", type=int, default=50)
+
+ # agent config
+ parser.add_argument("--max_trajectory_length", type=int, default=3)
+ parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples/examples")
+
+ # lm config
+ parser.add_argument("--model", type=str, default="autoglm-os")
+ parser.add_argument("--temperature", type=float, default=0.4)
+ parser.add_argument("--top_p", type=float, default=0.5)
+ parser.add_argument("--max_tokens", type=int, default=4096)
+ parser.add_argument("--stop_token", type=str, default=None)
+ parser.add_argument("--image_width", type=int, default=1280)
+ parser.add_argument("--image_height", type=int, default=720)
+
+ # example config
+ parser.add_argument("--domain", type=str, default="all")
+ parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json")
+
+ # aws config
+ parser.add_argument(
+ "--region", type=str, default="us-east-1", help="AWS region for the VM"
+ )
+ parser.add_argument(
+ "--client_password", type=str, default="", help="Client password"
+ )
+
+ # logging related
+ parser.add_argument("--result_dir", type=str, default="./results")
+ args = parser.parse_args()
+
+ return args
+
+
+class DesktopEnv(DesktopEnvBase):
+ def step(self, action, pause=2):
+ self._step_no += 1
+ self.action_history.append(action)
+
+ # Mark environment as used when step is called
+ self.is_environment_used = True
+
+ reward = 0 # todo: Define reward calculation for each example
+ done = False # todo: Define episode termination condition for each example
+ info = {}
+ logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}")
+
+ # handle the special actions
+ if action in ['WAIT', 'FAIL', 'DONE']:
+ if action == 'WAIT':
+ time.sleep(pause)
+ exe_result = 'Wait ' + str(pause) + ' seconds'
+ elif action == 'FAIL':
+ done = True
+ info = {"fail": True}
+ exe_result = 'Finish: fail'
+ elif action == 'DONE':
+ done = True
+ info = {"done": True}
+ exe_result = 'Finish: success'
+ elif type(action) == dict:
+ if action['action_type'] == 'OPEN_APP':
+ self.setup_controller._launch_setup(action['parameters']['launch_app_command'], shell=True)
+ exe_result = 'Open ' + action['parameters']['app_name']
+ elif action['action_type'] == 'OPEN_CHROME_TAB':
+ self.setup_controller._chrome_open_tabs_setup(action['parameters']['urls_to_open'])
+ exe_result = 'Open ' + str(action['parameters']['urls_to_open']) + ' in Chrome successfully'
+ else:
+ # the set of all possible python commands insides `pyautogui`
+ result = self.controller.execute_python_command(action)
+ try:
+ if result['error']:
+ exe_result = result['error'].strip()
+ else:
+ exe_result = result['output'].strip()
+ except Exception as e:
+ exe_result = 'Error Action: ' + action
+ logger.error(f"Error executing action: {e}")
+
+ time.sleep(pause)
+ observation = self._get_obs()
+ observation['exe_result'] = exe_result
+
+ return observation, reward, done, info
+
+ def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
+ # Reset to certain task in OSWorld
+ logger.info("Resetting environment...")
+ logger.info("Switching task...")
+ logger.info("Setting counters...")
+ self._traj_no += 1
+ self._step_no = 0
+ self.action_history.clear()
+
+ for attempt in range(MAX_RETRIES):
+ # Only revert to snapshot if environment has been used (step/setup)
+ # This optimization is especially important for cloud providers like AWS
+ # where unnecessary snapshot operations are costly and time-consuming
+
+ if task_config is not None:
+ # Only consider task proxy requirement if proxy is enabled at system level
+ task_use_proxy = task_config.get("proxy", False) and self.enable_proxy
+ if not self.enable_proxy and task_config.get("proxy", False):
+ logger.info("Task requires proxy but proxy is disabled at system level, ignoring proxy requirement.")
+
+ if task_use_proxy != self.current_use_proxy:
+ # keep because get_info_from_website depend on this
+ self.current_use_proxy = task_use_proxy
+
+ if self.is_environment_used:
+ logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
+ self._revert_to_snapshot()
+ logger.info("Starting emulator...")
+ self._start_emulator()
+ logger.info("Emulator started.")
+ # Reset the usage flag after reverting
+ self.is_environment_used = False
+ else:
+ logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))
+
+ if task_config is not None:
+ if task_config.get("proxy", False) and self.enable_proxy:
+ # If using proxy and proxy is enabled, set up the proxy configuration
+ self.setup_controller._proxy_setup(self.client_password)
+ self._set_task_info(task_config)
+ self.setup_controller.reset_cache_dir(self.cache_dir)
+ logger.info("Setting up environment...")
+ success = self.setup_controller.setup(self.config, task_config.get("proxy", False) and self.enable_proxy)
+ if success:
+ # Mark environment as used when setup is successfully executed
+ if self.config: # Only mark as used if there were actual setup operations
+ self.is_environment_used = True
+ break
+ else:
+ logger.error(
+ "Environment setup failed, retrying (%d/%d)...",
+ attempt + 1,
+ MAX_RETRIES,
+ )
+ time.sleep(5)
+ else:
+ break
+
+ logger.info("Environment setup complete.")
+
+ # Upload tools from autoglm package
+ import mm_agents.autoglm_v
+ tool_dir = os.path.join(os.path.dirname(mm_agents.autoglm_v.__file__), 'tools', 'package')
+ for file in os.listdir(tool_dir):
+ if os.path.isdir(os.path.join(tool_dir, file)):
+ continue
+ self.setup_controller._upload_file_setup([{
+ "local_path": os.path.join(tool_dir, file),
+ "path": os.path.join('~', file)
+ }])
+
+ # start soffice service for office tools
+ self.setup_controller._launch_setup('soffice --accept="socket,host=localhost,port=2002;urp;" --norestore --nologo --nodefault', shell=True)
+ time.sleep(5)
+
+ observation = self._get_obs()
+ return observation
+
+ def get_current_apps(self):
+ apps_code = r"""import subprocess;
+command = "wmctrl -xl";
+apps = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip().split('\n');
+print(apps);"""
+ window_code = r"""import subprocess;
+command = "wmctrl -a :ACTIVE: -v 2>&1 | grep 'Using window' | awk '{print $3}'";
+window_id = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip();
+print(window_id);"""
+
+ apps = self.controller.execute_python_command(apps_code)['output'].strip()
+ apps = ast.literal_eval(apps)
+ app_list = {}
+
+ for app in apps:
+ parts = app.split(maxsplit=4)
+ if len(parts) < 4:
+ continue
+ if parts[1] != '0':
+ continue
+ window_id = parts[0]
+ app_name = '.'.join(parts[2].split('.')[-(math.ceil(parts[2].count('.') / 2)):])
+ title = parts[3]
+ app_list[window_id] = {
+ 'app_name': app_name,
+ 'title': title
+ }
+
+ cur_id = self.controller.execute_python_command(window_code)['output'].strip()
+
+ return app_list, cur_id
+
+ def maximize_window(self):
+ window_state = r"""import subprocess;
+command = "xprop -id $(xprop -root _NET_ACTIVE_WINDOW | awk -F' ' '{print $5}') _NET_WM_STATE"
+output = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip();
+print(output);"""
+ for _ in range(5):
+ try:
+ self.setup_controller._launch_setup('wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz', shell=True)
+ time.sleep(2)
+ output = self.controller.execute_python_command(window_state)['output'].strip()
+ if '_NET_WM_STATE_FOCUSED' not in output or '_NET_WM_STATE_SKIP_TASKBAR' in output or '_NET_WM_STATE_MODAL' in output or '_NET_WM_STATE_MAXIMIZED' in output: # 没有窗口 or popups or 模态窗口 or 窗口已经最大化
+ return
+ except Exception as e:
+ logger.error(f"Failed to maximize window: {e}")
+ time.sleep(1)
+
+ def _get_obs(self):
+ tool_list = {
+ "libreoffice_calc": "CalcTools",
+ "libreoffice_impress": "ImpressTools",
+ "libreoffice_writer": "WriterTools",
+ "code": "CodeTools",
+ "vlc": "VLCTools",
+ "google_chrome": "BrowserTools"
+ }
+
+ self.maximize_window()
+
+ for i in range(3):
+ try:
+ app_list, cur_id = self.get_current_apps()
+ except Exception as e:
+ if i == 2:
+ raise e
+ logger.error(f"Failed to get current apps: {e}")
+ time.sleep(1)
+
+ if cur_id in app_list:
+ cur_app = app_list[cur_id]['app_name']
+
+ tool_name = cur_app.strip().lower().replace('-', '_')
+ if tool_name in tool_list:
+ class_name = tool_list[tool_name]
+ command = f"from {tool_name} import *; "
+ command += f"{class_name}.env_info(); "
+ command += f"{class_name}.print_result();"
+ app_info = self.controller.execute_python_command(command)['output'].strip()
+ else:
+ app_info = None
+ else:
+ cur_app = None
+ app_info = None
+
+ tree = self.controller.get_accessibility_tree()
+ screenshot = self.controller.get_screenshot()
+ if screenshot is None:
+ logger.error("Failed to get screenshot.")
+ screenshot = b''
+
+ return {
+ "screenshot": screenshot,
+ "accessibility_tree": tree,
+ "instruction": self.instruction,
+ "apps": app_list,
+ "cur_window_id": cur_id,
+ "cur_app": cur_app,
+ "app_info": app_info,
+ }
+
+
+def test(args: argparse.Namespace, test_all_meta: dict) -> None:
+ scores = []
+ max_steps = args.max_steps
+
+ # log args
+ logger.info("Args: %s", args)
+ # set wandb project
+ cfg_args = {
+ "path_to_vm": args.path_to_vm,
+ "provider_name": args.provider_name,
+ "headless": args.headless,
+ "action_space": args.action_space,
+ "observation_type": args.observation_type,
+ "screen_width": args.screen_width,
+ "screen_height": args.screen_height,
+ "sleep_after_execution": args.sleep_after_execution,
+ "max_steps": args.max_steps,
+ "max_trajectory_length": args.max_trajectory_length,
+ "model": args.model,
+ "temperature": args.temperature,
+ "top_p": args.top_p,
+ "max_tokens": args.max_tokens,
+ "stop_token": args.stop_token,
+ "result_dir": args.result_dir,
+ }
+
+ @backoff.on_exception(
+ backoff.constant,
+ (RateLimitError, APIConnectionError),
+ interval=0.1,
+ )
+ def call_llm(messages):
+ logger.info("Calling LLM...")
+
+ # Prepare the request data
+ data = {
+ "model": args.model,
+ "messages": messages,
+ "max_tokens": args.max_tokens,
+ "temperature": args.temperature,
+ "top_p": args.top_p,
+ "skip_special_tokens": False,
+ "stream": False,
+ "include_stop_str_in_output": True,
+ "stop": ["<|user|>", "<|observation|>", ""]
+ }
+
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY', '')}"
+ }
+
+ # Get API base URL from environment or use default
+ base_url = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
+ url = f"{base_url}/chat/completions"
+
+ response = requests.post(
+ url,
+ json=data,
+ headers=headers,
+ timeout=60.0
+ )
+ response.raise_for_status()
+
+ result = response.json()
+ logger.info("LLM called successfully.")
+ return result['choices'][0]['message']['content']
+
+ env = DesktopEnv(
+ provider_name=args.provider_name,
+ region=args.region,
+ client_password=args.client_password,
+ path_to_vm=args.path_to_vm,
+ action_space=args.action_space,
+ screen_size=(args.screen_width, args.screen_height),
+ headless=args.headless,
+ os_type="Ubuntu",
+ require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
+ )
+ agent = AutoGLMAgent(
+ action_space=args.action_space,
+ observation_type=args.observation_type,
+ screen_size=(args.screen_width, args.screen_height),
+ image_size=(args.image_width, args.image_height),
+ max_trajectory_length=args.max_trajectory_length,
+ client_password=args.client_password,
+ gen_func=call_llm,
+ )
+
+ for domain in tqdm(test_all_meta, desc="Domain"):
+ for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
+ config_file = os.path.join(args.test_config_base_dir, f"{domain}/{example_id}.json")
+ with open(config_file, "r", encoding="utf-8") as f:
+ example = json.load(f)
+
+ logger.info(f"[Domain]: {domain}")
+ logger.info(f"[Example ID]: {example_id}")
+
+ instruction = example["instruction"]
+
+ logger.info(f"[Instruction]: {instruction}")
+ # wandb each example config settings
+ cfg_args["instruction"] = instruction
+ cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
+
+ example_result_dir = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ domain,
+ example_id,
+ )
+ os.makedirs(example_result_dir, exist_ok=True)
+ # example start running
+ try:
+ lib_run_single.run_single_example_autoglm(
+ agent,
+ env,
+ example,
+ max_steps,
+ instruction,
+ args,
+ example_result_dir,
+ scores,
+ )
+ except Exception as e:
+ logger.error(f"Exception in {domain}/{example_id}: {e}")
+ # Only attempt to end recording if controller exists (not Docker provider)
+ if hasattr(env, "controller") and env.controller is not None:
+ env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+ with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+ f.write(json.dumps({"Error": f"Time limit exceeded in {domain}/{example_id}"}))
+ f.write("\n")
+
+ env.close()
+ logger.info(f"Average score: {sum(scores) / len(scores)}")
+
+
+def get_unfinished(action_space, use_model, observation_type, result_dir, total_file_json):
+ target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+
+ if not os.path.exists(target_dir):
+ return total_file_json
+
+ finished = {}
+ for domain in os.listdir(target_dir):
+ finished[domain] = []
+ domain_path = os.path.join(target_dir, domain)
+ if os.path.isdir(domain_path):
+ for example_id in os.listdir(domain_path):
+ if example_id == "onboard":
+ continue
+ example_path = os.path.join(domain_path, example_id)
+ if os.path.isdir(example_path):
+ if "result.txt" not in os.listdir(example_path):
+ # empty all files under example_id
+ for file in os.listdir(example_path):
+ os.remove(os.path.join(example_path, file))
+ else:
+ finished[domain].append(example_id)
+
+ if not finished:
+ return total_file_json
+
+ for domain, examples in finished.items():
+ if domain in total_file_json:
+ total_file_json[domain] = [x for x in total_file_json[domain] if x not in examples]
+
+ return total_file_json
+
+
+def get_result(action_space, use_model, observation_type, result_dir, total_file_json):
+ target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+ if not os.path.exists(target_dir):
+ print("New experiment, no result yet.")
+ return None
+
+ all_result = []
+
+ for domain in os.listdir(target_dir):
+ domain_path = os.path.join(target_dir, domain)
+ if os.path.isdir(domain_path):
+ for example_id in os.listdir(domain_path):
+ example_path = os.path.join(domain_path, example_id)
+ if os.path.isdir(example_path):
+ if "result.txt" in os.listdir(example_path):
+ result_path = os.path.join(example_path, "result.txt")
+ try:
+ with open(result_path, "r") as rf:
+ res = rf.read().strip()
+ if res.lower() == "true":
+ score = 1.0
+ else:
+ score = float(res)
+ except Exception:
+ score = 0.0
+ all_result.append(score)
+
+ if not all_result:
+ print("New experiment, no result yet.")
+ return None
+ else:
+ print("Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
+ return all_result
+
+
+if __name__ == "__main__":
+ ####### The complete version of the list of examples #######
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ args = config()
+ if args.client_password == "":
+ if args.provider_name == "aws":
+ args.client_password = "osworld-public-evaluation"
+ else:
+ args.client_password = "password"
+ else:
+ args.client_password = args.client_password
+
+ # save args to json in result_dir/action_space/observation_type/model/args.json
+ path_to_args = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ "args.json",
+ )
+ os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+ with open(path_to_args, "w", encoding="utf-8") as f:
+ json.dump(vars(args), f, indent=4)
+
+ with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
+ test_all_meta = json.load(f)
+
+ if args.domain != "all":
+ test_all_meta = {args.domain: test_all_meta[args.domain]}
+
+ test_file_list = get_unfinished(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ left_info = ""
+ for domain in test_file_list:
+ left_info += f"{domain}: {len(test_file_list[domain])}\n"
+ logger.info(f"Left tasks:\n{left_info}")
+
+ get_result(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ test(args, test_file_list)
diff --git a/run_multienv_autoglm_v.py b/run_multienv_autoglm_v.py
new file mode 100644
index 00000000..3716050c
--- /dev/null
+++ b/run_multienv_autoglm_v.py
@@ -0,0 +1,294 @@
+"""Script to run end-to-end evaluation on the benchmark.
+Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py.
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+import math
+import ast
+import time
+import backoff
+import httpx
+import requests
+from tqdm import tqdm
+from typing import Optional, Dict, Any
+from multiprocessing import Pool
+from openai import APIConnectionError, APIError, RateLimitError
+from types import SimpleNamespace
+
+import lib_run_single
+from run_autoglm_v import DesktopEnv, get_unfinished, get_result
+from desktop_env.desktop_env import MAX_RETRIES, DesktopEnv as DesktopEnvBase
+from mm_agents.autoglm_v import AutoGLMAgent
+from openai import OpenAI
+
+logger = logging.getLogger("desktopenv.experiment")
+
+def config() -> argparse.Namespace:
+ parser = argparse.ArgumentParser(description="Run end-to-end evaluation on the benchmark")
+
+ # environment config
+ parser.add_argument("--path_to_vm", type=str)
+ parser.add_argument(
+ "--provider_name",
+ type=str,
+ default="docker",
+ help="Virtualization provider (vmware, docker, aws, azure, gcp, virtualbox)",
+ )
+ parser.add_argument("--headless", action="store_true", default=True, help="Run in headless machine")
+ parser.add_argument("--action_space", type=str, default="autoglm_computer_use", help="Action type")
+ parser.add_argument(
+ "--observation_type",
+ choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"],
+ default="a11y_tree",
+ help="Observation type",
+ )
+ parser.add_argument("--screen_width", type=int, default=1920)
+ parser.add_argument("--screen_height", type=int, default=1080)
+ parser.add_argument("--sleep_after_execution", type=float, default=1.0)
+ parser.add_argument("--max_steps", type=int, default=30)
+
+ # agent config
+ parser.add_argument("--max_trajectory_length", type=int, default=3)
+ parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples/examples")
+
+ # lm config
+ parser.add_argument("--model", type=str, default="autoglm-os")
+ parser.add_argument("--temperature", type=float, default=0.4)
+ parser.add_argument("--top_p", type=float, default=0.5)
+ parser.add_argument("--max_tokens", type=int, default=2048)
+ parser.add_argument("--stop_token", type=str, default=None)
+ parser.add_argument("--image_width", type=int, default=1280)
+ parser.add_argument("--image_height", type=int, default=720)
+
+ # example config
+ parser.add_argument("--domain", type=str, default="all")
+ parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json")
+
+ # aws config
+ parser.add_argument(
+ "--region", type=str, default="us-east-1", help="AWS region for the VM"
+ )
+ parser.add_argument("--client_password", type=str, default="", help="Client password")
+
+ # logging related
+ parser.add_argument("--result_dir", type=str, default="./results")
+
+ # parallel number
+ parser.add_argument("--num_workers", type=int, default=20, help="Number of parallel workers")
+ args = parser.parse_args()
+
+ return args
+
+def _worker_run(task):
+ domain, example_id, args = task # args 为 argparse.Namespace
+ logger = logging.getLogger("desktopenv.experiment")
+ try:
+ config_file = os.path.join(args.test_config_base_dir, f"{domain}/{example_id}.json")
+ with open(config_file, "r", encoding="utf-8") as f:
+ example = json.load(f)
+ instruction = example["instruction"]
+
+ @backoff.on_exception(backoff.constant, (RateLimitError, APIConnectionError), interval=0.1)
+ def call_llm(messages):
+ logger.info("Calling LLM...")
+
+ # Prepare the request data
+ data = {
+ "model": args.model,
+ "messages": messages,
+ "max_tokens": args.max_tokens,
+ "temperature": args.temperature,
+ "top_p": args.top_p,
+ "skip_special_tokens": False,
+ "stream": False,
+ "include_stop_str_in_output": True,
+ "stop": ["<|user|>", "<|observation|>", ""]
+ }
+
+ # Set up proxy
+ # if os.environ.get('LAN_PROXY', None):
+ # proxies = {
+ # "http": os.environ.get('LAN_PROXY'),
+ # "https": os.environ.get('LAN_PROXY')
+ # }
+ # else:
+ # proxies = None
+
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY', '')}"
+ }
+
+ # Get API base URL from environment or use default
+ base_url = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
+ url = f"{base_url}/chat/completions"
+
+ response = requests.post(
+ url,
+ json=data,
+ headers=headers,
+ # proxies=proxies,
+ timeout=60.0
+ )
+ response.raise_for_status()
+
+ result = response.json()
+ logger.info("LLM called successfully.")
+ return result['choices'][0]['message']['content']
+
+ env = DesktopEnv(
+ provider_name=args.provider_name,
+ region=args.region,
+ client_password=args.client_password,
+ path_to_vm=args.path_to_vm,
+ action_space=args.action_space,
+ screen_size=(args.screen_width, args.screen_height),
+ headless=args.headless,
+ os_type="Ubuntu",
+ require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
+ )
+ agent = AutoGLMAgent(
+ action_space=args.action_space,
+ observation_type=args.observation_type,
+ screen_size=(args.screen_width, args.screen_height),
+ image_size=(args.image_width, args.image_height),
+ max_trajectory_length=args.max_trajectory_length,
+ client_password=args.client_password,
+ gen_func=call_llm,
+ )
+
+ example_result_dir = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ domain,
+ example_id,
+ )
+ os.makedirs(example_result_dir, exist_ok=True)
+
+ local_scores = []
+ try:
+ lib_run_single.run_single_example_autoglm(
+ agent,
+ env,
+ example,
+ args.max_steps,
+ instruction,
+ args,
+ example_result_dir,
+ local_scores,
+ )
+ except Exception as e:
+ logger.error(f"[并发任务异常] {domain}/{example_id}: {e}")
+ if hasattr(env, "controller") and env.controller is not None:
+ try:
+ env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+ except Exception:
+ pass
+ with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+ f.write(json.dumps({"Error": f"Exception in {domain}/{example_id}: {str(e)}"}) + "\n")
+ finally:
+ try:
+ env.close()
+ except Exception:
+ pass
+
+ score = None
+ result_path = os.path.join(example_result_dir, "result.txt")
+ if os.path.exists(result_path):
+ try:
+ with open(result_path, "r") as rf:
+ res = rf.read().strip()
+ if res.lower() == "true":
+ score = 1.0
+ else:
+ score = float(res)
+ except Exception:
+ score = 0.0
+ else:
+ score = 0.0
+ logger.info(f"[Finish] {domain}/{example_id} score={score}")
+ return (domain, example_id, score)
+ except Exception as e:
+ logger = logging.getLogger("desktopenv.experiment")
+ logger.error(f"[Initializing Fail] {domain}/{example_id}: {e}")
+ return (domain, example_id, 0.0)
+
+def test_parallel(args: argparse.Namespace, test_all_meta: dict):
+ tasks = []
+ for domain in test_all_meta:
+ for example_id in test_all_meta[domain]:
+ tasks.append((domain, example_id, args))
+ if not tasks:
+ logger.info("No pending tasks")
+ return
+ logger.info(f"Starting parallel execution: {args.num_workers} processes, {len(tasks)} tasks total")
+
+ results = []
+ with Pool(processes=args.num_workers) as pool:
+ for res in tqdm(pool.imap_unordered(_worker_run, tasks), total=len(tasks), desc="Parallel execution"):
+ results.append(res)
+
+ scores = [s for (_, _, s) in results if s is not None]
+ if scores:
+ avg = sum(scores) / len(scores)
+ logger.info(f"Parallel execution completed. Average score: {avg}")
+ else:
+ logger.info("No scores obtained.")
+
+if __name__ == "__main__":
+ ####### The complete version of the list of examples #######
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ args = config()
+ if args.client_password == "":
+ if args.provider_name == "aws":
+ args.client_password = "osworld-public-evaluation"
+ else:
+ args.client_password = "password"
+ else:
+ args.client_password = args.client_password
+
+ # save args to json in result_dir/action_space/observation_type/model/args.json
+ path_to_args = os.path.join(
+ args.result_dir,
+ args.action_space,
+ args.observation_type,
+ args.model,
+ "args.json",
+ )
+ os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+ with open(path_to_args, "w", encoding="utf-8") as f:
+ json.dump(vars(args), f, indent=4)
+
+ with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
+ test_all_meta = json.load(f)
+
+ if args.domain != "all":
+ test_all_meta = {args.domain: test_all_meta[args.domain]}
+
+ test_file_list = get_unfinished(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ left_info = ""
+ for domain in test_file_list:
+ left_info += f"{domain}: {len(test_file_list[domain])}\n"
+ logger.info(f"Left tasks:\n{left_info}")
+
+ get_result(
+ args.action_space,
+ args.model,
+ args.observation_type,
+ args.result_dir,
+ test_all_meta,
+ )
+ test_parallel(args, test_file_list)