From a4f8fe2f00a2ca9bb38943f070beeb5ae39e953a Mon Sep 17 00:00:00 2001 From: Yanxiao Zhao Date: Wed, 24 Sep 2025 19:43:28 +0800 Subject: [PATCH] Add autoglm-os-9b-v (#344) * update for autoglm-v * Update run_autoglm.py --------- Co-authored-by: hanyullai --- lib_run_single.py | 10 +- mm_agents/autoglm_v/__init__.py | 7 + mm_agents/autoglm_v/main.py | 265 +++ .../prompt/accessibility_tree_handle.py | 329 ++++ .../autoglm_v/prompt/deduplicate_node.py | 100 ++ mm_agents/autoglm_v/prompt/grounding_agent.py | 260 +++ .../autoglm_v/prompt/procedural_memory.py | 194 +++ mm_agents/autoglm_v/tools/apis/__init__.py | 3 + mm_agents/autoglm_v/tools/apis/code.json | 236 +++ mm_agents/autoglm_v/tools/apis/func.py | 117 ++ .../autoglm_v/tools/apis/google_chrome.json | 134 ++ .../tools/apis/libreoffice_calc.json | 634 ++++++++ .../tools/apis/libreoffice_impress.json | 559 +++++++ .../tools/apis/libreoffice_writer.json | 412 +++++ mm_agents/autoglm_v/tools/apis/vlc.json | 166 ++ mm_agents/autoglm_v/tools/package/code.py | 260 +++ .../autoglm_v/tools/package/google_chrome.py | 107 ++ .../tools/package/libreoffice_calc.py | 1322 +++++++++++++++ .../tools/package/libreoffice_impress.py | 1424 +++++++++++++++++ .../tools/package/libreoffice_writer.py | 753 +++++++++ mm_agents/autoglm_v/tools/package/vlc.py | 233 +++ run_autoglm_v.py | 608 +++++++ run_multienv_autoglm_v.py | 294 ++++ 23 files changed, 8425 insertions(+), 2 deletions(-) create mode 100644 mm_agents/autoglm_v/__init__.py create mode 100644 mm_agents/autoglm_v/main.py create mode 100644 mm_agents/autoglm_v/prompt/accessibility_tree_handle.py create mode 100644 mm_agents/autoglm_v/prompt/deduplicate_node.py create mode 100644 mm_agents/autoglm_v/prompt/grounding_agent.py create mode 100644 mm_agents/autoglm_v/prompt/procedural_memory.py create mode 100644 mm_agents/autoglm_v/tools/apis/__init__.py create mode 100644 mm_agents/autoglm_v/tools/apis/code.json create mode 100644 mm_agents/autoglm_v/tools/apis/func.py create mode 100644 mm_agents/autoglm_v/tools/apis/google_chrome.json create mode 100644 mm_agents/autoglm_v/tools/apis/libreoffice_calc.json create mode 100644 mm_agents/autoglm_v/tools/apis/libreoffice_impress.json create mode 100644 mm_agents/autoglm_v/tools/apis/libreoffice_writer.json create mode 100644 mm_agents/autoglm_v/tools/apis/vlc.json create mode 100644 mm_agents/autoglm_v/tools/package/code.py create mode 100644 mm_agents/autoglm_v/tools/package/google_chrome.py create mode 100644 mm_agents/autoglm_v/tools/package/libreoffice_calc.py create mode 100644 mm_agents/autoglm_v/tools/package/libreoffice_impress.py create mode 100644 mm_agents/autoglm_v/tools/package/libreoffice_writer.py create mode 100644 mm_agents/autoglm_v/tools/package/vlc.py create mode 100644 run_autoglm_v.py create mode 100644 run_multienv_autoglm_v.py diff --git a/lib_run_single.py b/lib_run_single.py index be4bc545..d19599ca 100644 --- a/lib_run_single.py +++ b/lib_run_single.py @@ -253,14 +253,20 @@ def run_single_example_autoglm(agent, env, example, max_steps, instruction, args "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png" })) f.write("\n") + if done: logger.info("The episode is done.") break - if not done: # not completed the task yet - env.action_history.append('FAIL') + # Invalid Action + if not actions: + obs = env._get_obs() # update observation step_idx += 1 + + if not done: # not completed the task yet + env.action_history.append('FAIL') + result = env.evaluate() logger.info("Result: %.2f", result) scores.append(result) diff --git a/mm_agents/autoglm_v/__init__.py b/mm_agents/autoglm_v/__init__.py new file mode 100644 index 00000000..68226a16 --- /dev/null +++ b/mm_agents/autoglm_v/__init__.py @@ -0,0 +1,7 @@ +""" +AutoGLM agent implementation +""" + +from .main import AutoGLMAgent + +__all__ = ["AutoGLMAgent"] diff --git a/mm_agents/autoglm_v/main.py b/mm_agents/autoglm_v/main.py new file mode 100644 index 00000000..0095949d --- /dev/null +++ b/mm_agents/autoglm_v/main.py @@ -0,0 +1,265 @@ +import logging +import re +from base64 import b64encode +from PIL import Image +from io import BytesIO +from typing import Dict, List + +from .prompt.accessibility_tree_handle import linearize_accessibility_tree, trim_accessibility_tree +from .prompt.grounding_agent import GroundingAgent as Agent +from .tools.package.google_chrome import BrowserTools +from .prompt.procedural_memory import Prompt + +logger = logging.getLogger("desktopenv.agent") + +pure_text_settings = ["a11y_tree"] + +def resize_image(image, w, h): + img = Image.open(BytesIO(image)) + # resize to max_pixel_num max_pixels + img = img.resize((w, h)) + buf = BytesIO() + img.save(buf, format='PNG') # 指定保存格式,比如 PNG、JPEG + img_bytes = buf.getvalue() # 得到 bytes 数据 + return img_bytes + +def parse_code_from_string(input_string): + # input_string = "\n".join([line.strip() for line in input_string.split(';') if line.strip()]) + if input_string.strip() in ["WAIT", "DONE", "FAIL"]: + return [input_string.strip()] + + # This regular expression will match both ```code``` and ```python code``` + # and capture the `code` part. It uses a non-greedy match for the content inside. + pattern = r"```(?:\w+\s+)?(.*?)```" + # Find all non-overlapping matches in the string + matches = re.findall(pattern, input_string, re.DOTALL) + + # The regex above captures the content inside the triple backticks. + # The `re.DOTALL` flag allows the dot `.` to match newline characters as well, + # so the code inside backticks can span multiple lines. + + # matches now contains all the captured code snippets + + codes = [] + + for match in matches: + match = match.strip() + commands = ["WAIT", "DONE", "FAIL"] # fixme: updates this part when we have more commands + + if match in commands: + codes.append(match.strip()) + elif match.split("\n")[-1] in commands: + if len(match.split("\n")) > 1: + codes.append("\n".join(match.split("\n")[:-1])) + codes.append(match.split("\n")[-1]) + else: + codes.append(match) + + return codes + + +class AutoGLMAgent: + def __init__( + self, + action_space="autoglm_computer_use", + observation_type="a11y_tree", + max_trajectory_length=3, + a11y_tree_max_items=300, + with_image: bool = True, + screen_size = (1920, 1080), + image_size=(1920, 1080), + with_atree: bool = False, + glm41v_format: bool = True, + relative_coordinate: bool = True, + client_password="password", + gen_func=None, + tool_in_sys_msg: bool = True, + ): + self.action_space = action_space + self.observation_type = observation_type + assert action_space in ["autoglm_computer_use"], "Invalid action space" + assert observation_type in ["a11y_tree"], "Invalid observation type" + self.max_trajectory_length = max_trajectory_length + self.a11y_tree_max_items = a11y_tree_max_items + self.with_image = with_image + self.screen_size = screen_size + self.image_size = image_size + self.with_atree = with_atree + self.glm41v_format = glm41v_format + self.relative_coordinate = relative_coordinate + self.client_password = client_password + self.gen_func = gen_func + self.tool_in_sys_msg = tool_in_sys_msg + + self.tool_list = { + "libreoffice_calc": "CalcTools", + "libreoffice_impress": "ImpressTools", + "libreoffice_writer": "WriterTools", + "code": "CodeTools", + "vlc": "VLCTools", + "google_chrome": "BrowserTools", + } + + Agent.relative_coordinate = relative_coordinate + + self.contents = [] + + @property + def turn_number(self): + return len(self.contents) + + def prepare(self, instruction: str, obs: Dict, history: List, last_result: str = "") -> List: + """ + Predict the next action(s) based on the current observation. + """ + if "exe_result" in obs and not last_result: + last_result = obs["exe_result"] + if self.contents: + self.contents[-1]["exe_result"] = last_result + + cur_app = obs["cur_app"] + logger.info(f"current app is {cur_app}") + + if cur_app: + tool_name = cur_app.strip().lower().replace("-", "_") + tool_name = tool_name if tool_name in self.tool_list.keys() else None + else: + tool_name = None + + setup_prompt, func_def_prompt, note_prompt = Prompt.construct_procedural_memory( + Agent, app_name=tool_name, client_password=self.client_password, with_image=self.with_image, with_atree=self.with_atree, relative_coordinate=self.relative_coordinate, glm41v_format=self.glm41v_format + ) + if self.tool_in_sys_msg: + system_message = setup_prompt + "\n\n" + func_def_prompt + "\n\n" + note_prompt + else: + system_message = setup_prompt + "\n\n" + note_prompt + system_message += "\n\n**IMPORTANT** You are asked to complete the following task: {}".format(instruction) + + messages = [ + { + "role": "system", + "content": system_message, + } + ] + messages.extend(history) + + if obs["apps"]: + app_str = "Window ID App Name Title\n" + for window_id, app in obs["apps"].items(): + app_str += f"{window_id} {app['app_name']} {app['title']}\n" + else: + app_str = "None" + + last_result = last_result.strip() if last_result else "None" + last_result = last_result[:2000] + "..." if len(last_result) > 2000 else last_result + + tree = linearize_accessibility_tree(obs["accessibility_tree"], "Ubuntu") + tree = trim_accessibility_tree(tree, 300) + + app_info = obs["app_info"].strip() if obs["app_info"] else "None" + app_info = app_info[:5000] + "..." if len(app_info) > 5000 else app_info + + prompt = "* Apps: {}\n\n* Current App: {}{}\n\n* App Info: {}\n\n* Previous Action Result: {}".format( + app_str.strip(), + obs["cur_window_id"].strip() if obs["cur_window_id"] in app_str else "None", + '\n\n* A11y Tree: {}'.format(tree.strip()) if self.with_atree else "", + app_info, + last_result if last_result else "None", + ) + ( + "\n\n" + func_def_prompt if not self.tool_in_sys_msg else "" + ) + + content = [{"type": "text", "text": prompt}] + if self.with_image and obs.get('screenshot'): + screenshot = resize_image(obs['screenshot'], self.image_size[0], self.image_size[1]) + content = [ + { + "type": "image_url", + "image_url": { + "url": f"data:image/png;base64,{b64encode(screenshot).decode('utf-8')}", + "detail": "high", + }, + } + ] + content + + messages.append({"role": "user", "content": content}) + + return messages + + def execute(self, response, obs): + try: + actions = parse_code_from_string(response) + action = actions[0] + logger.info(f"The pesudo action is {action}") + + if "Agent." in action: + actions = [ + eval(action), + ] + elif "BrowserTools." in action: # TODO: special check for BrowserTools + actions = [ + eval(action), + ] + else: + actions = Agent.tool_commands(action, obs["cur_app"].strip().replace("-", "_").lower()) + logger.info(f"The grounded action is {actions[0]}") + except Exception as e: + print("Failed to parse action from response", e) + actions = [] + + return actions + + def format_history(self, max_turns=30): + history = [] + for ix in range(self.turn_number): + if ix == 0: + env_input = "**Environment State (Omitted)**" + else: + env_input = ( + f"**Environment State (Omitted)**\nPrevious Action Result: {self.contents[ix - 1]['exe_result']}" + ) + + env_input = env_input[:2000] + "..." if len(env_input) > 2000 else env_input + response = ( + self.contents[ix]["response"][:1500] + "..." + if len(self.contents[ix]["response"]) > 1500 + else self.contents[ix]["response"] + ) + history.append({"role": "user", "content": [{"type": "text", "text": env_input}]}) + history.append({"role": "assistant", "content": [{"type": "text", "text": response}]}) + + return history[-max_turns * 2:] + + def predict(self, instruction: str, obs: Dict) -> List: + history = self.format_history() + messages = self.prepare(instruction, obs, history) + + assert self.gen_func is not None, "gen_func is not set" + try: + response = self.gen_func(messages) + except Exception as e: + logger.error("Failed to call gen_func, Error: " + str(e)) + response = "" + + logger.info("RESPONSE: %s", response) + + actions = self.execute(response, obs) + + # update the contents + self.contents.append( + { + "instruction": instruction, + "index": len(self.contents), + "response": response, + "action": "Parse error" if not actions else actions[0], + "exe_result": "Invalid action" if not actions else "", + **obs, + } + ) + return response, actions + + def reset(self, _logger=None): + global logger + logger = _logger if _logger is not None else logging.getLogger("desktopenv.aguvis_agent") + + self.contents = [] diff --git a/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py b/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py new file mode 100644 index 00000000..a9a392d2 --- /dev/null +++ b/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py @@ -0,0 +1,329 @@ +import io +import re +import xml.etree.ElementTree as ET +from typing import List, Tuple + +from PIL import Image, ImageDraw, ImageFont + +from .deduplicate_node import filter_similar_nodes + +attributes_ns_ubuntu = "https://accessibility.windows.example.org/ns/attributes" +attributes_ns_windows = "https://accessibility.windows.example.org/ns/attributes" +state_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/state" +state_ns_windows = "https://accessibility.windows.example.org/ns/state" +component_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/component" +component_ns_windows = "https://accessibility.windows.example.org/ns/component" +value_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/value" +value_ns_windows = "https://accessibility.windows.example.org/ns/value" +class_ns_windows = "https://accessibility.windows.example.org/ns/class" + + +def find_leaf_nodes(xlm_file_str): + if not xlm_file_str: + return [] + + root = ET.fromstring(xlm_file_str) + + # Recursive function to traverse the XML tree and collect leaf nodes + def collect_leaf_nodes(node, leaf_nodes): + # If the node has no children, it is a leaf node, add it to the list + if not list(node): + leaf_nodes.append(node) + # If the node has children, recurse on each child + for child in node: + collect_leaf_nodes(child, leaf_nodes) + + # List to hold all leaf nodes + leaf_nodes = [] + collect_leaf_nodes(root, leaf_nodes) + return leaf_nodes + + +def judge_node(node: ET, platform="Ubuntu", check_image=False) -> bool: + if platform == "Ubuntu": + _state_ns = state_ns_ubuntu + _component_ns = component_ns_ubuntu + elif platform == "Windows": + _state_ns = state_ns_windows + _component_ns = component_ns_windows + else: + raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'") + + keeps: bool = ( + node.tag.startswith("document") + or node.tag.endswith("item") + or node.tag.endswith("button") + or node.tag.endswith("heading") + or node.tag.endswith("label") + or node.tag.endswith("scrollbar") + or node.tag.endswith("searchbox") + or node.tag.endswith("textbox") + or node.tag.endswith("link") + or node.tag.endswith("tabelement") + or node.tag.endswith("textfield") + or node.tag.endswith("textarea") + or node.tag.endswith("menu") + or node.tag + in { + "alert", + "canvas", + "check-box", + "combo-box", + "entry", + "icon", + "image", + "paragraph", + "scroll-bar", + "section", + "slider", + "static", + "table-cell", + "terminal", + "text", + "netuiribbontab", + "start", + "trayclockwclass", + "traydummysearchcontrol", + "uiimage", + "uiproperty", + "uiribboncommandbar", + } + ) + keeps = ( + keeps + and ( + platform == "Ubuntu" + and node.get("{{{:}}}showing".format(_state_ns), "false") == "true" + and node.get("{{{:}}}visible".format(_state_ns), "false") == "true" + or platform == "Windows" + and node.get("{{{:}}}visible".format(_state_ns), "false") == "true" + ) + and ( + node.get("name", "") != "" + or node.text is not None + and len(node.text) > 0 + or check_image + and node.get("image", "false") == "true" + ) + ) + # and ( + # node.get("{{{:}}}enabled".format(_state_ns), "false") == "true" + # or node.get("{{{:}}}editable".format(_state_ns), "false") == "true" + # or node.get("{{{:}}}expandable".format(_state_ns), "false") == "true" + # or node.get("{{{:}}}checkable".format(_state_ns), "false") == "true" + # ) \ + + coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(_component_ns), "(-1, -1)")) + sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(_component_ns), "(-1, -1)")) + keeps = keeps and coordinates[0] >= 0 and coordinates[1] >= 0 and sizes[0] > 0 and sizes[1] > 0 + return keeps + + +def filter_nodes(root: ET, platform="Ubuntu", check_image=False): + filtered_nodes = [] + + for node in root.iter(): + if judge_node(node, platform, check_image): + filtered_nodes.append(node) + + return filtered_nodes + + +def draw_bounding_boxes(nodes, image_file_content, down_sampling_ratio=1.0, platform="Ubuntu"): + + if platform == "Ubuntu": + _state_ns = state_ns_ubuntu + _component_ns = component_ns_ubuntu + _value_ns = value_ns_ubuntu + elif platform == "Windows": + _state_ns = state_ns_windows + _component_ns = component_ns_windows + _value_ns = value_ns_windows + else: + raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'") + + # Load the screenshot image + image_stream = io.BytesIO(image_file_content) + image = Image.open(image_stream) + if float(down_sampling_ratio) != 1.0: + image = image.resize((int(image.size[0] * down_sampling_ratio), int(image.size[1] * down_sampling_ratio))) + draw = ImageDraw.Draw(image) + marks = [] + drew_nodes = [] + text_informations: List[str] = ["index\ttag\tname\ttext"] + + try: + # Adjust the path to the font file you have or use a default one + font = ImageFont.truetype("arial.ttf", 15) + except IOError: + # Fallback to a basic font if the specified font can't be loaded + font = ImageFont.load_default() + + index = 1 + + # Loop over all the visible nodes and draw their bounding boxes + for _node in nodes: + coords_str = _node.attrib.get("{{{:}}}screencoord".format(_component_ns)) + size_str = _node.attrib.get("{{{:}}}size".format(_component_ns)) + + if coords_str and size_str: + try: + # Parse the coordinates and size from the strings + coords = tuple(map(int, coords_str.strip("()").split(", "))) + size = tuple(map(int, size_str.strip("()").split(", "))) + + import copy + + original_coords = copy.deepcopy(coords) + original_size = copy.deepcopy(size) + + if float(down_sampling_ratio) != 1.0: + # Downsample the coordinates and size + coords = tuple(int(coord * down_sampling_ratio) for coord in coords) + size = tuple(int(s * down_sampling_ratio) for s in size) + + # Check for negative sizes + if size[0] <= 0 or size[1] <= 0: + raise ValueError(f"Size must be positive, got: {size}") + + # Calculate the bottom-right corner of the bounding box + bottom_right = (coords[0] + size[0], coords[1] + size[1]) + + # Check that bottom_right > coords (x1 >= x0, y1 >= y0) + if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]: + raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}") + + # Check if the area only contains one color + cropped_image = image.crop((*coords, *bottom_right)) + if len(set(list(cropped_image.getdata()))) == 1: + continue + + # Draw rectangle on image + draw.rectangle([coords, bottom_right], outline="red", width=1) + + # Draw index number at the bottom left of the bounding box with black background + text_position = (coords[0], bottom_right[1]) # Adjust Y to be above the bottom right + text_bbox: Tuple[int, int, int, int] = draw.textbbox(text_position, str(index), font=font, anchor="lb") + # offset: int = bottom_right[1]-text_bbox[3] + # text_bbox = (text_bbox[0], text_bbox[1]+offset, text_bbox[2], text_bbox[3]+offset) + + # draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black') + draw.rectangle(text_bbox, fill="black") + draw.text(text_position, str(index), font=font, anchor="lb", fill="white") + + # each mark is an x, y, w, h tuple + marks.append([original_coords[0], original_coords[1], original_size[0], original_size[1]]) + drew_nodes.append(_node) + + if _node.text: + node_text = _node.text if '"' not in _node.text else '"{:}"'.format(_node.text.replace('"', '""')) + elif _node.get("{{{:}}}class".format(class_ns_windows), "").endswith("EditWrapper") and _node.get( + "{{{:}}}value".format(_value_ns) + ): + node_text = _node.get("{{{:}}}value".format(_value_ns), "") + node_text = node_text if '"' not in node_text else '"{:}"'.format(node_text.replace('"', '""')) + else: + node_text = '""' + text_information: str = "{:d}\t{:}\t{:}\t{:}".format(index, _node.tag, _node.get("name", ""), node_text) + text_informations.append(text_information) + + index += 1 + + except ValueError: + pass + + output_image_stream = io.BytesIO() + image.save(output_image_stream, format="PNG") + image_content = output_image_stream.getvalue() + + return marks, drew_nodes, "\n".join(text_informations), image_content + + +def print_nodes_with_indent(nodes, indent=0): + for node in nodes: + print(" " * indent, node.tag, node.attrib) + print_nodes_with_indent(node, indent + 2) + + +def find_active_applications(tree, state_ns): + apps_with_active_tag = [] + for application in list(tree.getroot()): + app_name = application.attrib.get("name") + for frame in application: + is_active = frame.attrib.get("{{{:}}}active".format(state_ns), "false") + if is_active == "true": + apps_with_active_tag.append(app_name) + if apps_with_active_tag: + to_keep = apps_with_active_tag + ["gnome-shell"] + else: + to_keep = ["gjs", "gnome-shell"] + return to_keep + + +def linearize_accessibility_tree(accessibility_tree, platform="Ubuntu"): + if platform == "Ubuntu": + _attributes_ns = attributes_ns_ubuntu + _state_ns = state_ns_ubuntu + _component_ns = component_ns_ubuntu + _value_ns = value_ns_ubuntu + elif platform == "Windows": + _attributes_ns = attributes_ns_windows + _state_ns = state_ns_windows + _component_ns = component_ns_windows + _value_ns = value_ns_windows + else: + raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'") + + try: + tree = ET.ElementTree(ET.fromstring(accessibility_tree)) + keep_apps = find_active_applications(tree, _state_ns) + + # Remove inactive applications + for application in list(tree.getroot()): + if application.get("name") not in keep_apps: + tree.getroot().remove(application) + + filtered_nodes = filter_nodes(tree.getroot(), platform, check_image=True) + linearized_accessibility_tree = ["tag\ttext\tposition (center x & y)\tsize (w & h)"] + + # Linearize the accessibility tree nodes into a table format + for node in filtered_nodes: + try: + text = node.text if node.text is not None else "" + text = text.strip() + name = node.get("name", "").strip() + if text == "": + text = name + elif name != "" and text != name: + text = f"{name} ({text})" + + text = text.replace("\n", "\\n") + pos = node.get("{{{:}}}screencoord".format(_component_ns), "") + size = node.get("{{{:}}}size".format(_component_ns), "") + + x, y = re.match(f"\((\d+), (\d+)\)", pos).groups() + w, h = re.match(f"\((\d+), (\d+)\)", size).groups() + x_mid, y_mid = int(x) + int(w) // 2, int(y) + int(h) // 2 + + linearized_accessibility_tree.append( + "{:}\t{:}\t{:}\t{:}".format(node.tag, text, f"({x_mid}, {y_mid})", size) + ) + except Exception as e: + continue + + # Filter out similar nodes + linearized_accessibility_tree = filter_similar_nodes("\n".join(linearized_accessibility_tree)) + except Exception as e: + print(f"Error in linearize_accessibility_tree: {e}") + linearized_accessibility_tree = "" + + return linearized_accessibility_tree + + +def trim_accessibility_tree(linearized_accessibility_tree, max_items): + lines = linearized_accessibility_tree.strip().split("\n") + if len(lines) > max_items: + lines = lines[:max_items] + linearized_accessibility_tree = "\n".join(lines) + linearized_accessibility_tree += "\n..." + return linearized_accessibility_tree diff --git a/mm_agents/autoglm_v/prompt/deduplicate_node.py b/mm_agents/autoglm_v/prompt/deduplicate_node.py new file mode 100644 index 00000000..824a2e1f --- /dev/null +++ b/mm_agents/autoglm_v/prompt/deduplicate_node.py @@ -0,0 +1,100 @@ +import re + + +def parse_line(line): + # 解析格式,如:label Google Chrome (191, 13) (104, 17) + pattern = r"^(\S+)\s+(.+?)\s+\((\d+), (\d+)\)\s+\((\d+), (\d+)\)" + m = re.match(pattern, line) + if not m: + return None + node_type, text, cx, cy, w, h = m.groups() + cx, cy, w, h = map(int, (cx, cy, w, h)) + # bounding box as (x1, y1, x2, y2) + x1 = cx - w // 2 + y1 = cy - h // 2 + x2 = x1 + w + y2 = y1 + h + return { + "type": node_type, + "text": text.strip(), + "bbox": (x1, y1, x2, y2), + "center": (cx, cy), + "size": (w, h), + "raw": line, + } + + +def iou(box1, box2): + # box: (x1, y1, x2, y2) + xi1 = max(box1[0], box2[0]) + yi1 = max(box1[1], box2[1]) + xi2 = min(box1[2], box2[2]) + yi2 = min(box1[3], box2[3]) + inter_width = max(0, xi2 - xi1) + inter_height = max(0, yi2 - yi1) + inter_area = inter_width * inter_height + area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) + area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) + union = area1 + area2 - inter_area + if union == 0: + return 0 + return inter_area / union + + +def norm_text(s): + # 归一化文本:小写、去空格等 + return re.sub(r"\s+", "", s.lower()) + + +def text_similarity(a, b): + # 简单判定:完全一致为1,否则0 + na, nb = norm_text(a), norm_text(b) + if na == nb: + return 1.0 + else: + return 0 + + +def filter_similar_nodes(linearized_accessibility_tree): + lines = [ln for ln in linearized_accessibility_tree.split("\n") if ln.strip()] + # parse all nodes + nodes = [] + for ln in lines: + node = parse_line(ln) + if node: + nodes.append(node) + else: + # 解析不了的保留 + nodes.append({"raw": ln, "invalid": True}) + filtered = [] + removed = [False] * len(nodes) + # 阈值可自行调整 + IOU_THRESH = 0.2 + TEXT_THRESH = 0.9 + for i, ni in enumerate(nodes): + if ni.get("invalid"): + filtered.append(ni["raw"]) + continue + if removed[i]: + continue + for j in range(i + 1, len(nodes)): + nj = nodes[j] + if nj.get("invalid"): + continue + iou_val = iou(ni["bbox"], nj["bbox"]) + text_sim = text_similarity(ni["text"], nj["text"]) + if iou_val > IOU_THRESH and text_sim > TEXT_THRESH: + # 二者极其相似,移除后者 + removed[j] = True + # print(f"移除: {nj['raw']} (与 {ni['raw']} 相似度高)") + # 保留未被标记为移除的 + if not removed[i]: + filtered.append(ni["raw"]) + return "\n".join(filtered) + + +# 示例用法 +if __name__ == "__main__": + linearized_accessibility_tree = "tag\ttext\tposition (center x & y)\tsize (w & h)\nicon\t\t(1853, 1001)\t(64, 64)\nlabel\tHome\t(1853, 1045)\t(40, 17)\nlabel\tActivities\t(49, 13)\t(63, 17)\ntext\tActivities\t(49, 13)\t(63, 17)\nlabel\tApr 17 17‎∶04\t(995, 13)\t(117, 27)\ntext\tApr 17 17‎∶04\t(995, 13)\t(87, 18)\nmenu\tSystem\t(1867, 13)\t(106, 27)\npush-button\tGoogle Chrome\t(35, 65)\t(70, 64)\npush-button\tThunderbird Mail\t(35, 133)\t(70, 64)\npush-button\tVisual Studio Code\t(35, 201)\t(70, 64)\npush-button\tVLC media player\t(35, 269)\t(70, 64)\npush-button\tLibreOffice Writer\t(35, 337)\t(70, 64)\npush-button\tLibreOffice Calc\t(35, 405)\t(70, 64)\npush-button\tLibreOffice Impress\t(35, 473)\t(70, 64)\npush-button\tGNU Image Manipulation Program\t(35, 541)\t(70, 64)\npush-button\tFiles\t(35, 609)\t(70, 64)\npush-button\tUbuntu Software\t(35, 677)\t(70, 64)\npush-button\tHelp\t(35, 745)\t(70, 64)\npush-button\tTrash\t(35, 816)\t(70, 64)\ntoggle-button\tShow Applications\t(35, 1045)\t(70, 70)" + result = filter_similar_nodes(linearized_accessibility_tree) + print(result) diff --git a/mm_agents/autoglm_v/prompt/grounding_agent.py b/mm_agents/autoglm_v/prompt/grounding_agent.py new file mode 100644 index 00000000..e29c7513 --- /dev/null +++ b/mm_agents/autoglm_v/prompt/grounding_agent.py @@ -0,0 +1,260 @@ +import base64 +import json +import logging +import os +import xml.etree.ElementTree as ET +from typing import Dict, List, Optional, Tuple + +logger = logging.getLogger("desktopenv.agent") + + +def agent_action(func): + func.is_agent_action = True + return func + + +switch_window_code = """import subprocess; +import pyautogui; +pyautogui.press('escape'); +time.sleep(0.5); +subprocess.run(['wmctrl', '-ia', 'WINDOW_ID']) +subprocess.run(['wmctrl', '-ir', 'WINDOW_ID', '-b', 'add,maximized_vert,maximized_horz']) +print('Switch to WINDOW_ID')""" + +launch_app_commands = { + # Web Browser + "chrome": "google-chrome --remote-debugging-port=1337", + # File Manager + "files": "nautilus", + # Terminal + "terminal": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-terminal', + # Utilities + "gedit": "gedit", + # Office + "libreoffice writer": "libreoffice --writer", + "libreoffice calc": "libreoffice --calc", + "libreoffice impress": "libreoffice --impress", + # System + "settings": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-control-center', + # Multimedia + "vlc": "vlc", + "gimp": "gimp", + # IDE + "vs code": "code", + # Email + "thunderbird": "thunderbird", +} + + +class GroundingAgent: + + tool_list = { + "libreoffice_calc": "CalcTools", + "libreoffice_impress": "ImpressTools", + "libreoffice_writer": "WriterTools", + "code": "CodeTools", + "vlc": "VLCTools", + "google_chrome": "BrowserTools", + } + + relative_coordinate = True # whether the coordinates are relative (0-1000) or absolute (e.g. 1920x1080) + + @classmethod + def tool_commands(cls, code: str, tool_name: str): + command = f"from {tool_name} import *; " + command += code + + tool_class = cls.tool_list[tool_name] + command += f"; {tool_class}.print_result()" + + return [ + command, + ] + + @classmethod + @agent_action + def click( + cls, + coordinate: List, + num_clicks: int = 1, + button_type: str = "left", + ): + """ + Click on the element + + Args: + coordinate (List): [x, y], coordinate of the element to click on + num_clicks (int): number of times to click the element + button_type (str): which mouse button to press ("left", "middle", or "right") + """ + command = "" + x, y = coordinate + if cls.relative_coordinate: + x, y = round(x * 1920 / 1000), round(y * 1080 / 1000) + command += f"""pyautogui.click({x}, {y}, clicks={num_clicks}, button={repr(button_type)}); print("Click Success")""" # TODO: 最大化窗口需要一次调用 + return command + + @classmethod + @agent_action + def type( + cls, + coordinate: Optional[List] = None, + text: str = "", + overwrite: bool = False, + enter: bool = False, + ): + """ + Type text into the element + + Args: + coordinate (List): [x, y], coordinate of the element to type into. If None, typing starts at current cursor location + text (str): the text to type + overwrite (bool): True to overwrite existing text, False otherwise + enter (bool): True to press enter after typing, False otherwise + """ + + command = "" + + if coordinate is not None: + # Start typing at the center of the element + x, y = coordinate + if cls.relative_coordinate: + x, y = round(x * 1920 / 1000), round(y * 1080 / 1000) + command += f"pyautogui.click({x}, {y}); " + + if overwrite: + command += f"pyautogui.hotkey('ctrl', 'a'); pyautogui.press('backspace'); " + + command += f"pyautogui.write({repr(text)}); " + + if enter: + command += "pyautogui.press('enter'); " + + command += "print('Type Success')" + + return command + + @classmethod + @agent_action + def drag_and_drop(cls, drag_from_coordinate: List, drop_on_coordinate: List): + """ + Drag element1 and drop it on element2 + + Args: + drag_from_coordinate (List): [x, y], coordinate of element to drag + drop_on_coordinate (List): [x, y], coordinate of element to drop on + """ + x1, y1 = drag_from_coordinate + if cls.relative_coordinate: + x1, y1 = round(x1 * 1920 / 1000), round(y1 * 1080 / 1000) + x2, y2 = drop_on_coordinate + if cls.relative_coordinate: + x2, y2 = round(x2 * 1920 / 1000), round(y2 * 1080 / 1000) + + command = f"pyautogui.moveTo({x1}, {y1}); " + # TODO: specified duration? + command += f"pyautogui.dragTo({x2}, {y2}, duration=1.); pyautogui.mouseUp(); " + + command += "print('Drag and Drop Success')" + + return command + + @classmethod + @agent_action + def scroll(cls, coordinate: List, direction: str): + """ + Scroll the element in the specified direction + + Args: + coordinate (List): [x, y], coordinate of the element to scroll in + direction (str): the direction to scroll ("up" or "down") + """ + x, y = coordinate + if cls.relative_coordinate: + x, y = round(x * 1920 / 1000), round(y * 1080 / 1000) + amount = 100 if direction == "up" else -100 + return f"import pyautogui; pyautogui.moveTo({x}, {y}); pyautogui.scroll({amount}); print('Scroll Success')" + + @classmethod + @agent_action + def open_app(cls, app_name: str): + """ + Open a specified application + + Supported apps: chrome, files, terminal, gedit, libreoffice writer, + libreoffice calc, libreoffice impress, vs code, vlc, gimp, settings, thunderbird + + Args: + app_name (str): name of the application to open + """ + + app_name = app_name.lower().strip() + + if app_name not in launch_app_commands: + command = f"print(f'{app_name} is not supported or recognized')" + else: + command = { + "action_type": "OPEN_APP", + "parameters": {"launch_app_command": launch_app_commands[app_name], "app_name": app_name}, + } + + return command + + @classmethod + @agent_action + def switch_window(cls, window_id: str): + """ + Switch to the window with the given window id + + Args: + window_id (str): the window id to switch to from the provided list of open windows + """ + return switch_window_code.replace("WINDOW_ID", window_id) + + @classmethod + @agent_action + def hotkey(cls, keys: List): + """ + Press a hotkey combination + + Args: + keys (List): the keys to press in combination (e.g. ['ctrl', 'c'] for copy, ['prtsc'] for screenshot) + """ + # add quotes around the keys + keys = [f"'{key}'" for key in keys] + key_str = ", ".join(keys).replace("'", "\\'") + return f"import pyautogui; pyautogui.hotkey({', '.join(keys)}); print(f'Press Hotkey: {key_str}')" + + @classmethod + @agent_action + def quote(cls, content: str): + """ + Quote information from the current page for memory + + Args: + content (str): text summarized or copied from the page for later operation + """ + return f'''print("""{content}""")''' + + @classmethod + @agent_action + def wait(cls): + """ + Wait for a while + + """ + return "WAIT" + + @classmethod + @agent_action + def exit(cls, success: bool): + """ + End the current task + + Args: + success (bool): True if successfully finish a task, False otherwise + """ + if success: + return "DONE" + else: + return "FAIL" diff --git a/mm_agents/autoglm_v/prompt/procedural_memory.py b/mm_agents/autoglm_v/prompt/procedural_memory.py new file mode 100644 index 00000000..2003b5a2 --- /dev/null +++ b/mm_agents/autoglm_v/prompt/procedural_memory.py @@ -0,0 +1,194 @@ +import inspect +import json +import os +import textwrap + +current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + +def generate_func(json_data): + # 收集所有类名和它们的函数 + class_funcs = {} + no_class_funcs = [] + cls_name = "" + + for item in json_data: + if item["type"] == "function": + func = item["function"] + func_parts = func["name"].split(".") + + if len(func_parts) == 2: + class_name, func_name = func_parts + if class_name not in class_funcs: + class_funcs[class_name] = [] + class_funcs[class_name].append(item) + else: + no_class_funcs.append(item) + + code = "" + + # 生成有类的函数 + for class_name, funcs in class_funcs.items(): + code += f"class {class_name}:\n" + cls_name = class_name + for item in funcs: + func = item["function"] + func_name = func["name"].split(".")[-1] + description = func["description"] + params = func["parameters"]["properties"] + required = func["parameters"].get("required", []) + + # 构建参数列表 + param_list = ["cls"] + # 首先添加必需参数 + for param_name in required: + param_list.append(f"{param_name}") + # 然后添加可选参数 + for param_name in params: + if param_name not in required: + param_list.append(f"{param_name}") # 可选参数默认值设为None + + # 构建函数定义 + func_def = f" def {func_name}({', '.join(param_list)}):\n" + + # 构建文档字符串 + docstring = f' """\n {description}\n\n Args:\n' + if len(param_list) == 1: # 只有cls参数 + docstring += " None\n" + else: + # 首先记录必需参数 + for param_name in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}): {param_desc}\n" + # 然后记录可选参数 + for param_name in params: + if param_name not in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}, optional): {param_desc}\n" + + docstring += ' """\n' + + code += func_def + docstring + "\n" + + code += "\n" + + # 生成没有类的函数 + for item in no_class_funcs: + func = item["function"] + func_name = func["name"] + description = func["description"] + params = func["parameters"]["properties"] + required = func["parameters"].get("required", []) + + # 构建参数列表 + param_list = [] + # 首先添加必需参数 + for param_name in required: + param_list.append(f"{param_name}") + # 然后添加可选参数 + for param_name in params: + if param_name not in required: + param_list.append(f"{param_name}") + + # 构建函数定义 + func_def = f"def {func_name}({', '.join(param_list)}):\n" + + # 构建文档字符串 + docstring = f' """\n {description}\n\n Args:\n' + if not param_list: + docstring += " None\n" + else: + # 首先记录必需参数 + for param_name in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}): {param_desc}\n" + # 然后记录可选参数 + for param_name in params: + if param_name not in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}, optional): {param_desc}\n" + + docstring += ' """\n' + + code += func_def + docstring + "\n" + + return code.strip(), cls_name + + +setup_prompt = """You are a GUI operation agent. You will be given a task and your action history, with current observation ({observation_list}). You should help me control the computer, output the best action step by step to accomplish the task. +You should first generate a plan, reflect on the current observation, then generate actions to complete the task in python-style pseudo code using the predefined functions. + +* Output Format: +{format_hint}""" + +func_def_template = """* Available Functions: +```python +{class_content} +```""" + +note_prompt = """* Note: +- Your code should only be wrapped in ```python```. +- Only **ONE-LINE-OF-CODE** at a time. +- Each code block is context independent, and variables from the previous round cannot be used in the next round. +{relative_coordinate_hint}- Return with `Agent.exit(success=True)` immediately after the task is completed. +- The computer's environment is Linux, e.g., Desktop path is '/home/user/Desktop' +- My computer's password is '{client_password}', feel free to use it when you need sudo rights""" + + +class Prompt: + @staticmethod + def construct_procedural_memory(agent_class, app_name=None, client_password="password", with_image=True, with_atree=False, relative_coordinate=True, glm41v_format=True): + agent_class_content = "Class Agent:" + for attr_name in dir(agent_class): + attr = getattr(agent_class, attr_name) + if callable(attr) and hasattr(attr, "is_agent_action"): + # Use inspect to get the full function signature + signature = inspect.signature(attr) + agent_class_content += f""" + def {attr_name}{signature}: + '''{attr.__doc__}''' + """ + + if app_name is not None: + tool_path = os.path.join(current_dir, "tools", "apis", f"{app_name.lower()}.json") + with open(tool_path, "r") as f: + json_data = json.load(f) + + tool_class_content, tool_class_name = generate_func(json_data) + + agent_class_content += "\n\n{}".format(tool_class_content) + + func_def_prompt = func_def_template.format(class_content=agent_class_content.strip()) + + # --- dynamic observation list --- + obs_items = [] + if with_image: + obs_items.append("screenshot") + obs_items.append("current app name") + if with_atree: + obs_items.append("a11y tree (based on AT-SPI library)") + obs_items.append("app info") + obs_items.append("last action result") + observation_list = ", ".join(obs_items) + + setup_prompt_formatted = setup_prompt.format( + observation_list=observation_list, + format_hint="\n{**YOUR-PLAN-AND-THINKING**}\n```python\n{**ONE-LINE-OF-CODE**}\n```" if glm41v_format else "\n{**YOUR-PLAN-AND-THINKING**}\n\n```python\n{**ONE-LINE-OF-CODE**}\n```" + ) + + note_prompt_formatted = note_prompt.format( + relative_coordinate_hint="- The coordinate [x, y] should be normalized to 0-1000, which usually should be the center of a specific target element.\n" if relative_coordinate else "", + client_password=client_password + ) + + return setup_prompt_formatted, func_def_prompt, note_prompt_formatted + + +if __name__ == "__main__": + from grounding_agent import GroundingAgent + + print(Prompt.construct_procedural_memory(GroundingAgent, "vlc")) diff --git a/mm_agents/autoglm_v/tools/apis/__init__.py b/mm_agents/autoglm_v/tools/apis/__init__.py new file mode 100644 index 00000000..a43137a8 --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/__init__.py @@ -0,0 +1,3 @@ +from .func import generate_func + +__all__ = ["generate_func"] diff --git a/mm_agents/autoglm_v/tools/apis/code.json b/mm_agents/autoglm_v/tools/apis/code.json new file mode 100644 index 00000000..8af9510a --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/code.json @@ -0,0 +1,236 @@ +[ + { + "type": "function", + "function": { + "name": "CodeTools.launch_vscode", + "description": "Launch VS Code with specified path", + "parameters": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "File path or directory to open" + } + }, + "required": ["path"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.compare_files", + "description": "Compare two files in VS Code", + "parameters": { + "type": "object", + "properties": { + "file1": { + "type": "string", + "description": "First file path" + }, + "file2": { + "type": "string", + "description": "Second file path" + } + }, + "required": ["file1", "file2"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.add_folder", + "description": "Add folder to active VS Code window", + "parameters": { + "type": "object", + "properties": { + "folder": { + "type": "string", + "description": "Folder path to add" + } + }, + "required": ["folder"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.goto_file", + "description": "Open file at specific position", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "File path to open" + }, + "line": { + "type": "integer", + "description": "Line number", + "default": 1 + }, + "character": { + "type": "integer", + "description": "Character position", + "default": 1 + } + }, + "required": ["file_path"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.perform_merge", + "description": "Perform three-way merge", + "parameters": { + "type": "object", + "properties": { + "path1": { + "type": "string", + "description": "First version file path" + }, + "path2": { + "type": "string", + "description": "Second version file path" + }, + "base": { + "type": "string", + "description": "Base version file path" + }, + "result": { + "type": "string", + "description": "Output file path" + } + }, + "required": ["path1", "path2", "base", "result"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.remove_folder", + "description": "Remove folder from active VS Code window", + "parameters": { + "type": "object", + "properties": { + "folder": { + "type": "string", + "description": "Folder path to remove" + } + }, + "required": ["folder"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.install_extension", + "description": "Install or update VS Code extension", + "parameters": { + "type": "object", + "properties": { + "extension_id": { + "type": "string", + "description": "Extension identifier" + }, + "pre_release": { + "type": "boolean", + "description": "Install pre-release version", + "default": false + } + }, + "required": ["extension_id"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.uninstall_extension", + "description": "Uninstall VS Code extension", + "parameters": { + "type": "object", + "properties": { + "extension_id": { + "type": "string", + "description": "Extension identifier" + } + }, + "required": ["extension_id"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.list_extensions", + "description": "List installed extensions", + "parameters": { + "type": "object", + "properties": { + "show_versions": { + "type": "boolean", + "description": "Show extension versions", + "default": false + }, + "category": { + "type": "string", + "description": "Filter by category" + } + } + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.update_extensions", + "description": "Update all extensions to latest version", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.disable_extension", + "description": "Disable extension for next VS Code instance", + "parameters": { + "type": "object", + "properties": { + "extension_id": { + "type": "string", + "description": "Extension identifier" + } + }, + "required": ["extension_id"] + } + } + }, + { + "type": "function", + "function": { + "name": "CodeTools.toggle_sync", + "description": "Toggle VS Code synchronization", + "parameters": { + "type": "object", + "properties": { + "state": { + "type": "string", + "description": "Sync state", + "enum": ["on", "off"] + } + }, + "required": ["state"] + } + } + } +] \ No newline at end of file diff --git a/mm_agents/autoglm_v/tools/apis/func.py b/mm_agents/autoglm_v/tools/apis/func.py new file mode 100644 index 00000000..84ee5480 --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/func.py @@ -0,0 +1,117 @@ +def generate_func(json_data): + # 收集所有类名和它们的函数 + class_funcs = {} + no_class_funcs = [] + + for item in json_data: + if item["type"] == "function": + func = item["function"] + func_parts = func["name"].split(".") + + if len(func_parts) == 2: + class_name, func_name = func_parts + if class_name not in class_funcs: + class_funcs[class_name] = [] + class_funcs[class_name].append(item) + else: + no_class_funcs.append(item) + + code = "" + + # 生成有类的函数 + for class_name, funcs in class_funcs.items(): + code += f"class {class_name}:\n" + for item in funcs: + func = item["function"] + func_name = func["name"].split(".")[-1] + description = func["description"] + params = func["parameters"]["properties"] + required = func["parameters"].get("required", []) + + # 构建参数列表 + param_list = ["cls"] + # 首先添加必需参数 + for param_name in required: + param_list.append(f"{param_name}") + # 然后添加可选参数 + for param_name in params: + if param_name not in required: + param_list.append(f"{param_name}") # 可选参数默认值设为None + + # 构建函数定义 + func_def = f" def {func_name}({', '.join(param_list)}):\n" + + # 构建文档字符串 + docstring = f' """\n {description}\n\n Args:\n' + if len(param_list) == 1: # 只有cls参数 + docstring += " None\n" + else: + # 首先记录必需参数 + for param_name in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}): {param_desc}\n" + # 然后记录可选参数 + for param_name in params: + if param_name not in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}, optional): {param_desc}\n" + + docstring += ' """\n' + + code += func_def + docstring + "\n" + + code += "\n" + + # 生成没有类的函数 + for item in no_class_funcs: + func = item["function"] + func_name = func["name"] + description = func["description"] + params = func["parameters"]["properties"] + required = func["parameters"].get("required", []) + + # 构建参数列表 + param_list = [] + # 首先添加必需参数 + for param_name in required: + param_list.append(f"{param_name}") + # 然后添加可选参数 + for param_name in params: + if param_name not in required: + param_list.append(f"{param_name}") + + # 构建函数定义 + func_def = f"def {func_name}({', '.join(param_list)}):\n" + + # 构建文档字符串 + docstring = f' """\n {description}\n\n Args:\n' + if not param_list: + docstring += " None\n" + else: + # 首先记录必需参数 + for param_name in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}): {param_desc}\n" + # 然后记录可选参数 + for param_name in params: + if param_name not in required: + param_type = params[param_name]["type"] + param_desc = params[param_name].get("description", "") + docstring += f" {param_name} ({param_type}, optional): {param_desc}\n" + + docstring += ' """\n' + + code += func_def + docstring + "\n" + + return code.strip() + + +if __name__ == "__main__": + import json + + with open("libreoffice_calc.json", "r") as f: + json_data = json.load(f) + print(generate_func(json_data)) diff --git a/mm_agents/autoglm_v/tools/apis/google_chrome.json b/mm_agents/autoglm_v/tools/apis/google_chrome.json new file mode 100644 index 00000000..f7ae26a6 --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/google_chrome.json @@ -0,0 +1,134 @@ +[ + { + "type": "function", + "function": { + "name": "BrowserTools.open_profile_settings", + "description": "Opens profile settings page.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.open_password_settings", + "description": "Opens password/autofill settings page.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.open_privacy_settings", + "description": "Opens privacy settings page.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.open_appearance_settings", + "description": "Opens appearance settings page.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.open_search_engine_settings", + "description": "Opens search engine settings page.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.bring_back_last_tab", + "description": "Restores last-closed tab (Ctrl+Shift+T).", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.print", + "description": "Opens print dialog (Ctrl+P).", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.delete_browsing_data", + "description": "Opens clear browsing data dialog (Ctrl+Shift+Del).", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.open_extensions", + "description": "Opens extensions management page.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.bookmark_page", + "description": "Bookmarks current page (Ctrl+D).", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "BrowserTools.open_bookmarks", + "description": "Opens bookmarks page.", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + } +] \ No newline at end of file diff --git a/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json b/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json new file mode 100644 index 00000000..90fe4e79 --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json @@ -0,0 +1,634 @@ +[ + { + "type": "function", + "function": { + "name": "CalcTools.get_workbook_info", + "description": "Get workbook info: file path, name, sheets, and active sheet", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.save", + "description": "Save workbook to current location", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.get_column_data", + "description": "Get all data from specified column", + "parameters": { + "type": "object", + "properties": { + "column_name": { + "type": "string", + "description": "Column name (e.g. 'A', 'B')" + } + }, + "required": [ + "column_name" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.switch_active_sheet", + "description": "Switch to sheet (creates if not exists)", + "parameters": { + "type": "object", + "properties": { + "sheet_name": { + "type": "string", + "description": "Sheet name" + } + }, + "required": [ + "sheet_name" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.set_column_values", + "description": "Set values to column (values only, not formulas)", + "parameters": { + "type": "object", + "properties": { + "column_name": { + "type": "string", + "description": "Column name (e.g. 'A', 'B')" + }, + "data": { + "type": "array", + "description": "Values to write" + }, + "start_index": { + "type": "integer", + "description": "First row index (default: 2)" + } + }, + "required": [ + "column_name", + "data" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.highlight_range", + "description": "Highlight range with color", + "parameters": { + "type": "object", + "properties": { + "range_str": { + "type": "string", + "description": "Range (e.g. 'A1:B10')" + }, + "color": { + "type": "integer", + "description": "Color value (default: 0xFF0000)" + } + }, + "required": [ + "range_str" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.transpose_range", + "description": "Transpose range and paste to target cell", + "parameters": { + "type": "object", + "properties": { + "source_range": { + "type": "string", + "description": "Source range (e.g. 'A1:B10')" + }, + "target_cell": { + "type": "string", + "description": "Target cell (e.g. 'A1')" + } + }, + "required": [ + "source_range", + "target_cell" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.export_to_csv", + "description": "Export to CSV with same path/name", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.sort_column", + "description": "Sort column data", + "parameters": { + "type": "object", + "properties": { + "column_name": { + "type": "string", + "description": "Column name (e.g. 'A', 'B')" + }, + "ascending": { + "type": "boolean", + "description": "Sort ascending (default: true)" + }, + "start_index": { + "type": "integer", + "description": "First row index (default: 2)" + } + }, + "required": [ + "column_name" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.set_validation_list", + "description": "Set validation list for column", + "parameters": { + "type": "object", + "properties": { + "column_name": { + "type": "string", + "description": "Column name (e.g. 'A', 'B')" + }, + "values": { + "type": "array", + "description": "Validation values" + } + }, + "required": [ + "column_name", + "values" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.hide_row_data", + "description": "Hide rows containing value", + "parameters": { + "type": "object", + "properties": { + "value": { + "type": "string", + "description": "Value to hide (default: 'N/A')" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.reorder_columns", + "description": "Reorder columns by specified order", + "parameters": { + "type": "object", + "properties": { + "column_order": { + "type": "array", + "description": "Column names in desired order (e.g. ['A', 'B', 'C'])" + } + }, + "required": [ + "column_order" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.create_pivot_table", + "description": "Create pivot table from source sheet", + "parameters": { + "type": "object", + "properties": { + "source_sheet": { + "type": "string", + "description": "Source sheet name" + }, + "table_name": { + "type": "string", + "description": "Pivot table name" + }, + "row_fields": { + "type": "array", + "description": "Row labels (e.g. ['A', 'B'])" + }, + "col_fields": { + "type": "array", + "description": "Column labels (e.g. ['A', 'B'])" + }, + "value_fields": { + "type": "array", + "description": "Value fields (e.g. ['A', 'B'])" + }, + "aggregation_function": { + "type": "string", + "description": "Aggregation function (sum, count, average, min, max)" + }, + "target_cell": { + "type": "string", + "description": "Target cell (default: 'A1')" + } + }, + "required": [ + "source_sheet", + "table_name", + "value_fields" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.merge_cells", + "description": "Merge cells in range", + "parameters": { + "type": "object", + "properties": { + "range_str": { + "type": "string", + "description": "Cell range (e.g. 'A1:B10')" + } + }, + "required": [ + "range_str" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.set_cell_value", + "description": "Set cell value", + "parameters": { + "type": "object", + "properties": { + "cell": { + "type": "string", + "description": "Cell reference (e.g. 'A1')" + }, + "value": { + "type": "string", + "description": "Cell value" + } + }, + "required": [ + "cell", + "value" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.format_range", + "description": "Apply formatting to range", + "parameters": { + "type": "object", + "properties": { + "range_str": { + "type": "string", + "description": "Range (e.g. 'A1:B10')" + }, + "background_color": { + "type": "string", + "description": "Background color (e.g. '#0000ff')" + }, + "font_color": { + "type": "string", + "description": "Font color (e.g. '#ffffff')" + }, + "bold": { + "type": "boolean", + "description": "Bold text" + }, + "alignment": { + "type": "string", + "description": "Text alignment (left, center, right)" + } + }, + "required": [ + "range_str" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.create_chart", + "description": "Create chart from data range", + "parameters": { + "type": "object", + "properties": { + "chart_type": { + "type": "string", + "description": "Chart type (bar, column, line, pie, scatter, area)" + }, + "data_range": { + "type": "string", + "description": "Data range (e.g. 'A1:B10')" + }, + "title": { + "type": "string", + "description": "Chart title" + }, + "x_axis_title": { + "type": "string", + "description": "X axis title" + }, + "y_axis_title": { + "type": "string", + "description": "Y axis title" + } + }, + "required": [ + "chart_type", + "data_range" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.freeze_panes", + "description": "Freeze rows/columns", + "parameters": { + "type": "object", + "properties": { + "rows": { + "type": "integer", + "description": "Rows to freeze from top" + }, + "columns": { + "type": "integer", + "description": "Columns to freeze from left" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.rename_sheet", + "description": "Rename worksheet", + "parameters": { + "type": "object", + "properties": { + "old_name": { + "type": "string", + "description": "Current sheet name" + }, + "new_name": { + "type": "string", + "description": "New sheet name" + } + }, + "required": [ + "old_name", + "new_name" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.copy_sheet", + "description": "Copy worksheet", + "parameters": { + "type": "object", + "properties": { + "source_sheet": { + "type": "string", + "description": "Source sheet name" + }, + "new_sheet_name": { + "type": "string", + "description": "New sheet name (optional)" + } + }, + "required": [ + "source_sheet" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.reorder_sheets", + "description": "Change sheet order", + "parameters": { + "type": "object", + "properties": { + "sheet_name": { + "type": "string", + "description": "Sheet to move" + }, + "position": { + "type": "integer", + "description": "New position (0-based)" + } + }, + "required": [ + "sheet_name", + "position" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.set_chart_legend_position", + "description": "Set chart legend position", + "parameters": { + "type": "object", + "properties": { + "position": { + "type": "string", + "description": "Legend position (top, bottom, left, right, none)" + } + }, + "required": [ + "position" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.set_number_format", + "description": "Apply number format to range", + "parameters": { + "type": "object", + "properties": { + "range_str": { + "type": "string", + "description": "Range (e.g. 'A1:B10')" + }, + "format_type": { + "type": "string", + "description": "Format type (general, number, currency, accounting, date, time, percentage, fraction, scientific, text)" + }, + "decimal_places": { + "type": "integer", + "description": "Decimal places (optional)" + } + }, + "required": [ + "range_str", + "format_type" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.adjust_column_width", + "description": "Adjust column width", + "parameters": { + "type": "object", + "properties": { + "columns": { + "type": "string", + "description": "Column range (e.g. 'A:C')" + }, + "width": { + "type": "number", + "description": "Width in characters" + }, + "autofit": { + "type": "boolean", + "description": "Autofit to content" + } + }, + "required": [ + "columns" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.adjust_row_height", + "description": "Adjust row height", + "parameters": { + "type": "object", + "properties": { + "rows": { + "type": "string", + "description": "Row range (e.g. '1:10')" + }, + "height": { + "type": "number", + "description": "Height in points" + }, + "autofit": { + "type": "boolean", + "description": "Autofit to content" + } + }, + "required": [ + "rows" + ] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.export_to_pdf", + "description": "Export to PDF", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "PDF save path (default: same as original)" + }, + "sheets": { + "type": "array", + "description": "Sheets to include (default: all)" + }, + "open_after_export": { + "type": "boolean", + "description": "Open PDF after export (default: false)" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "CalcTools.set_zoom_level", + "description": "Set worksheet zoom level", + "parameters": { + "type": "object", + "properties": { + "zoom_percentage": { + "type": "integer", + "description": "Zoom percentage (10-400)" + } + }, + "required": [ + "zoom_percentage" + ] + } + } + } +] \ No newline at end of file diff --git a/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json b/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json new file mode 100644 index 00000000..524e87b8 --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json @@ -0,0 +1,559 @@ +[ + { + "type": "function", + "function": { + "name": "ImpressTools.save", + "description": "Save current presentation", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.go_to_slide", + "description": "Navigate to specific slide", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + } + }, + "required": ["slide_index"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.get_slide_count", + "description": "Get total slide count", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.duplicate_slide", + "description": "Duplicate slide and place at end", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index to duplicate (1-based)" + } + }, + "required": ["slide_index"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_slide_font", + "description": "Set font for all text in slide", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "font_name": { + "type": "string", + "description": "Font name (e.g., 'Arial', 'Times New Roman')" + } + }, + "required": ["slide_index", "font_name"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.write_text", + "description": "Write text to textbox", + "parameters": { + "type": "object", + "properties": { + "content": { + "type": "string", + "description": "Text content" + }, + "page_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Textbox index (0-based)" + }, + "bold": { + "type": "boolean", + "description": "Bold text (default: false)" + }, + "italic": { + "type": "boolean", + "description": "Italic text (default: false)" + }, + "size": { + "type": "integer", + "description": "Font size" + }, + "append": { + "type": "boolean", + "description": "Append to existing text (default: false)" + } + }, + "required": ["content", "page_index", "box_index"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_style", + "description": "Set text style for textbox", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Textbox index (0-based)" + }, + "bold": { + "type": "boolean", + "description": "Bold text" + }, + "italic": { + "type": "boolean", + "description": "Italic text" + }, + "underline": { + "type": "boolean", + "description": "Underline text" + } + }, + "required": ["slide_index", "box_index"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.configure_auto_save", + "description": "Configure auto-save settings", + "parameters": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean", + "description": "Enable auto-save" + }, + "interval_minutes": { + "type": "number", + "description": "Auto-save interval in minutes (min: 1)" + } + }, + "required": ["enabled", "interval_minutes"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_background_color", + "description": "Set textbox background color", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Textbox index (0-based)" + }, + "color": { + "type": "string", + "description": "Color name or hex code" + } + }, + "required": ["slide_index", "box_index", "color"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_text_color", + "description": "Set text color for textbox", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Textbox index (0-based)" + }, + "color": { + "type": "string", + "description": "Color name or hex code" + } + }, + "required": ["slide_index", "box_index", "color"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.delete_content", + "description": "Delete textbox from slide", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Textbox index (0-based)" + } + }, + "required": ["slide_index", "box_index"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_slide_orientation", + "description": "Set slide orientation", + "parameters": { + "type": "object", + "properties": { + "orientation": { + "type": "string", + "description": "Slide orientation", + "enum": ["portrait", "landscape"] + } + }, + "required": ["orientation"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.position_box", + "description": "Position textbox or image on slide", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Box index (0-based)" + }, + "position": { + "type": "string", + "description": "Position on slide", + "enum": ["left", "right", "center", "top", "bottom", "top-left", "top-right", "bottom-left", "bottom-right"] + } + }, + "required": ["slide_index", "box_index", "position"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.insert_file", + "description": "Insert video or audio file", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "File path" + }, + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "position": { + "type": "object", + "description": "Position coordinates", + "properties": { + "x": { + "type": "number", + "description": "X position (% of slide width)" + }, + "y": { + "type": "number", + "description": "Y position (% of slide height)" + } + } + }, + "size": { + "type": "object", + "description": "Size dimensions", + "properties": { + "width": { + "type": "number", + "description": "Width (% of slide width)" + }, + "height": { + "type": "number", + "description": "Height (% of slide height)" + } + } + }, + "autoplay": { + "type": "boolean", + "description": "Auto-play media" + } + }, + "required": ["file_path"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_slide_background", + "description": "Set slide background color or image", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based). If not provided, applies to all slides" + }, + "color": { + "type": "string", + "description": "Background color" + }, + "image_path": { + "type": "string", + "description": "Background image path (overrides color)" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.save_as", + "description": "Save document to specified location", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "File save path with filename and extension" + }, + "overwrite": { + "type": "boolean", + "description": "Overwrite existing file (default: false)" + } + }, + "required": ["file_path"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.insert_image", + "description": "Insert image to slide", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "image_path": { + "type": "string", + "description": "Image file path" + }, + "width": { + "type": "number", + "description": "Image width in cm" + }, + "height": { + "type": "number", + "description": "Image height in cm" + }, + "position": { + "type": "object", + "description": "Position coordinates", + "properties": { + "x": { + "type": "number", + "description": "X position (% of slide width)" + }, + "y": { + "type": "number", + "description": "Y position (% of slide height)" + } + } + } + }, + "required": ["slide_index", "image_path"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.configure_display_settings", + "description": "Configure presentation display settings", + "parameters": { + "type": "object", + "properties": { + "use_presenter_view": { + "type": "boolean", + "description": "Use presenter view" + }, + "primary_monitor_only": { + "type": "boolean", + "description": "Use primary monitor only" + }, + "monitor_for_presentation": { + "type": "integer", + "description": "Monitor number for presentation" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_slide_number_color", + "description": "Set slide number color", + "parameters": { + "type": "object", + "properties": { + "color": { + "type": "string", + "description": "Color name or hex code" + } + }, + "required": ["color"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_text_strikethrough", + "description": "Apply strikethrough formatting to text", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Textbox index (0-based)" + }, + "line_numbers": { + "type": "array", + "items": { + "type": "integer" + }, + "description": "Line numbers for strikethrough (1-based)" + }, + "apply": { + "type": "boolean", + "description": "Apply or remove strikethrough" + } + }, + "required": ["slide_index", "box_index", "line_numbers", "apply"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.set_textbox_alignment", + "description": "Set text alignment for textbox", + "parameters": { + "type": "object", + "properties": { + "slide_index": { + "type": "integer", + "description": "Slide index (1-based)" + }, + "box_index": { + "type": "integer", + "description": "Textbox index (0-based)" + }, + "alignment": { + "type": "string", + "description": "Text alignment", + "enum": ["left", "center", "right", "justify"] + } + }, + "required": ["slide_index", "box_index", "alignment"] + } + } + }, + { + "type": "function", + "function": { + "name": "ImpressTools.export_to_image", + "description": "Export presentation or slide to image", + "parameters": { + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "Image save path with filename and extension" + }, + "format": { + "type": "string", + "description": "Image format", + "enum": ["png", "jpeg", "jpg", "gif", "bmp", "tiff"] + }, + "slide_index": { + "type": "integer", + "description": "Specific slide index (1-based). If not provided, exports all slides" + } + }, + "required": ["file_path", "format"] + } + } + } +] diff --git a/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json b/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json new file mode 100644 index 00000000..d23b78e7 --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json @@ -0,0 +1,412 @@ +[ + { + "type": "function", + "function": { + "name": "WriterTools.save", + "description": "Save document to current location", + "parameters": { + "type": "object", + "properties": {}, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.write_text", + "description": "Write text at cursor position", + "parameters": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "Text to write" + }, + "bold": { + "type": "boolean", + "description": "Apply bold formatting" + }, + "italic": { + "type": "boolean", + "description": "Apply italic formatting" + }, + "size": { + "type": "number", + "description": "Font size" + } + }, + "required": ["text"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.set_color", + "description": "Change text color using regex pattern", + "parameters": { + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "Regex pattern to match" + }, + "color": { + "type": "number", + "description": "Hex color code (e.g., 0x000000)" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["pattern", "color"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.find_and_replace", + "description": "Find and replace text using regex", + "parameters": { + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "Regex pattern to find" + }, + "replacement": { + "type": "string", + "description": "Replacement text" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["pattern", "replacement"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.set_font", + "description": "Change font family", + "parameters": { + "type": "object", + "properties": { + "font_name": { + "type": "string", + "description": "Font name (e.g., 'Arial', 'Times New Roman')" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["font_name"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.set_line_spacing", + "description": "Set line spacing", + "parameters": { + "type": "object", + "properties": { + "spacing_value": { + "type": "number", + "description": "Spacing value (1.0=single, 2.0=double)" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["spacing_value"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.remove_highlighting", + "description": "Remove text highlighting", + "parameters": { + "type": "object", + "properties": { + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.find_highlighted_text", + "description": "Find text with specific highlight color", + "parameters": { + "type": "object", + "properties": { + "highlight_color": { + "type": "string", + "description": "Color name (e.g., 'yellow') or hex code" + } + }, + "required": ["highlight_color"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.insert_formula_at_cursor", + "description": "Insert formula at cursor", + "parameters": { + "type": "object", + "properties": { + "formula": { + "type": "string", + "description": "Formula to insert" + } + }, + "required": ["formula"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.insert_image_at_cursor", + "description": "Insert image at cursor", + "parameters": { + "type": "object", + "properties": { + "image_path": { + "type": "string", + "description": "Full path to image file" + }, + "width": { + "type": "integer", + "description": "Display width in pixels" + }, + "height": { + "type": "integer", + "description": "Display height in pixels" + } + }, + "required": ["image_path"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.set_strikethrough", + "description": "Apply strikethrough formatting", + "parameters": { + "type": "object", + "properties": { + "pattern": { + "type": "string", + "description": "Regex pattern to match" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["pattern"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.set_font_size", + "description": "Change font size", + "parameters": { + "type": "object", + "properties": { + "font_size": { + "type": "number", + "description": "Font size in points" + }, + "pattern": { + "type": "string", + "description": "Regex pattern to match" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["font_size", "pattern"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.export_to_pdf", + "description": "Export document to PDF", + "parameters": { + "type": "object", + "properties": { + "output_path": { + "type": "string", + "description": "PDF save path" + }, + "output_filename": { + "type": "string", + "description": "PDF filename" + }, + "include_comments": { + "type": "boolean", + "description": "Include comments in PDF" + }, + "quality": { + "type": "string", + "description": "Export quality ('standard', 'high', 'print')" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.set_paragraph_alignment", + "description": "Set paragraph alignment", + "parameters": { + "type": "object", + "properties": { + "alignment": { + "type": "string", + "description": "Alignment type ('left', 'center', 'right', 'justify')" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["alignment"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.capitalize_words", + "description": "Capitalize first letter of each word", + "parameters": { + "type": "object", + "properties": { + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.set_default_font", + "description": "Set default font for new text", + "parameters": { + "type": "object", + "properties": { + "font_name": { + "type": "string", + "description": "Default font name" + }, + "font_size": { + "type": "number", + "description": "Default font size in points" + } + }, + "required": ["font_name"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.add_page_numbers", + "description": "Add page numbers", + "parameters": { + "type": "object", + "properties": { + "position": { + "type": "string", + "description": "Position ('bottom_left', 'bottom_center', 'bottom_right', 'top_left', 'top_center', 'top_right')" + }, + "start_number": { + "type": "integer", + "description": "Starting page number" + }, + "format": { + "type": "string", + "description": "Number format (e.g., '1', 'Page 1', '1 of N')" + } + }, + "required": ["position"] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.insert_page_break", + "description": "Insert page break", + "parameters": { + "type": "object", + "properties": { + "position": { + "type": "string", + "description": "Insert location ('at_cursor', 'end_of_document')" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "WriterTools.change_text_case", + "description": "Change text case", + "parameters": { + "type": "object", + "properties": { + "case_type": { + "type": "string", + "description": "Case type ('lowercase', 'uppercase')" + }, + "pattern": { + "type": "string", + "description": "Regex pattern to match" + }, + "paragraph_indices": { + "type": "array", + "description": "Target paragraph indices (0-based). Applies to all if omitted" + } + }, + "required": ["case_type", "pattern"] + } + } + } +] diff --git a/mm_agents/autoglm_v/tools/apis/vlc.json b/mm_agents/autoglm_v/tools/apis/vlc.json new file mode 100644 index 00000000..f90e41db --- /dev/null +++ b/mm_agents/autoglm_v/tools/apis/vlc.json @@ -0,0 +1,166 @@ +[ + { + "type": "function", + "function": { + "name": "VLCTools.get_playlist", + "description": "Get current playlist with track info", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.play", + "description": "Start playing current media", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.pause", + "description": "Pause current media", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.next", + "description": "Switch to next track", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.previous", + "description": "Switch to previous track", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.add_to_playlist", + "description": "Add media file to playlist", + "parameters": { + "type": "object", + "properties": { + "uri": { + "type": "string", + "description": "Media file URI (file:// or https://)" + } + }, + "required": ["uri"] + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.get_current_time", + "description": "Get current playback position in seconds", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.get_media_duration", + "description": "Get media duration in seconds", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.toggle_fullscreen", + "description": "Toggle or set fullscreen mode", + "parameters": { + "type": "object", + "properties": { + "enable": { + "type": "boolean", + "description": "Force fullscreen on/off, omit to toggle" + } + }, + "required": [] + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.get_settings", + "description": "Get VLC settings", + "parameters": { + "type": "object", + "properties": {} + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.set_settings", + "description": "Set VLC settings", + "parameters": { + "type": "object", + "properties": { + "field": { + "type": "string", + "description": "Setting name (e.g. qt-max-volume, qt-minimal-view)" + }, + "value": { + "type": "string", + "description": "Setting value (use 0/1 for booleans)" + } + }, + "required": ["field", "value"] + } + } + }, + { + "type": "function", + "function": { + "name": "VLCTools.get_media_files", + "description": "Get media files from path", + "parameters": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Directory path" + }, + "suffix": { + "type": "array", + "description": "File extensions, default: ['mp4','avi','mkv','mov','mp3','m4a','wav']" + } + }, + "required": ["path"] + } + } + } +] \ No newline at end of file diff --git a/mm_agents/autoglm_v/tools/package/code.py b/mm_agents/autoglm_v/tools/package/code.py new file mode 100644 index 00000000..e580d55f --- /dev/null +++ b/mm_agents/autoglm_v/tools/package/code.py @@ -0,0 +1,260 @@ +import json +import os +import subprocess +from pathlib import Path + + +class CodeTools: + ret = "" + + @classmethod + def print_result(cls): + """打印执行结果""" + print(cls.ret) + + @classmethod + def launch_vscode(cls, path): + """ + Launches Visual Studio Code with the specified file path or directory. + 在存在的窗口中打开一个文件或目录。 + + Args: + path (str): 文件路径或目录。 + """ + try: + subprocess.run(["code", "-r", path], check=True) + cls.ret = "Successfully launched VS Code" + except subprocess.CalledProcessError as e: + cls.ret = f"Error launching VS Code: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def env_info(cls): + cls.ret = "None" + + @classmethod + def compare_files(cls, file1, file2): + """ + Compares two files in VSCode. + 在VSCode中比较两个文件。 + + Args: + file1 (str): 第一个文件的路径。 + file2 (str): 第二个文件的路径。 + """ + try: + # 获取compare结果 + subprocess.run(["code", "-d", file1, file2], check=True) + cls.ret = "The compared files are opened in VSCode" + except subprocess.CalledProcessError as e: + cls.ret = f"Error comparing files: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def add_folder(cls, folder): + """ + Adds a folder to the last active window in VSCode. + 向VSCode的最后一个活动窗口添加文件夹。 + + Args: + folder (str): 文件夹路径。 + """ + try: + subprocess.run(["code", "-a", folder], check=True) + cls.ret = "Successfully added folder" + except subprocess.CalledProcessError as e: + cls.ret = f"Error adding folder: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def goto_file(cls, file_path, line=1, character=1): + """ + Opens a file at a specific line and character position. + 在特定行和字符的位置打开文件。 + + Args: + file_path (str): 文件路径。 + line (int): 行号。 + character (int): 字符位置。 + """ + try: + command = f"{file_path}:{line}:{character}" + subprocess.run(["code", "-g", command], check=True) + cls.ret = "Successfully opened file, line: {}, character: {}".format(line, character) + except subprocess.CalledProcessError as e: + cls.ret = f"Error going to file: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def perform_merge(cls, path1, path2, base, result): + """ + Perform a three-way merge. + 执行三方合并。 + + Args: + path1 (str): 第一版本文件路径。 + path2 (str): 第二版本文件路径。 + base (str): 基础版本文件路径。 + result (str): 结果文件的保存路径。 + """ + try: + subprocess.run(["code", "-m", path1, path2, base, result], check=True) + cls.ret = "Successfully performed merge" + except subprocess.CalledProcessError as e: + cls.ret = f"Error performing merge: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def remove_folder(cls, folder): + """ + Removes a folder from the last active window in VSCode. + 在VSCode的最后一个活动窗口中移除文件夹。 + + Args: + folder (str): 文件夹路径。 + """ + try: + subprocess.run(["code", "--remove", folder], check=True) + cls.ret = "Successfully removed folder" + except subprocess.CalledProcessError as e: + cls.ret = f"Error removing folder: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def install_extension(cls, extension_id, pre_release=False): + """ + Installs an extension or updates it in VSCode. + 安装或更新VSCode中的扩展。 + + Args: + extension_id (str): 扩展的标识符。 + pre_release (bool): 是否安装预发布版本。 + """ + try: + command = ["code", "--install-extension", extension_id] + if pre_release: + command.append("--pre-release") + subprocess.run(command, check=True) + cls.ret = "Successfully installed extension" + except subprocess.CalledProcessError as e: + cls.ret = f"Error installing extension: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def uninstall_extension(cls, extension_id): + """ + Uninstalls an extension from VSCode. + 从VSCode中卸载扩展。 + + Args: + extension_id (str): 扩展的标识符。 + """ + try: + subprocess.run(["code", "--uninstall-extension", extension_id], check=True) + cls.ret = "Successfully uninstalled extension" + except subprocess.CalledProcessError as e: + cls.ret = f"Error uninstalling extension: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def list_extensions(cls, show_versions=False, category=None): + """ + Lists installed extensions in VSCode. + 列出VSCode中安装的扩展。 + + Args: + show_versions (bool): 是否显示扩展的版本。 + category (str): 按类别筛选扩展。 + """ + try: + command = ["code", "--list-extensions"] + if show_versions: + command.append("--show-versions") + if category: + command.extend(["--category", category]) + cls.ret = subprocess.run(command, check=True, capture_output=True, text=True).stdout + except subprocess.CalledProcessError as e: + cls.ret = f"Error listing extensions: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def update_extensions(cls): + """ + Updates all installed extensions in VSCode to the latest version. + 更新VSCode中所有安装的扩展到最新版本。 + """ + try: + subprocess.run(["code", "--update-extensions"], check=True) + cls.ret = "Successfully updated extensions" + except subprocess.CalledProcessError as e: + cls.ret = f"Error updating extensions: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def disable_extension(cls, extension_id): + """ + Disables a specific extension for the next instance of VSCode. + 禁用在下一个VSCode窗口中的指定扩展。 + + Args: + extension_id (str): 扩展的标识符。 + """ + try: + subprocess.run(["code", "--disable-extension", extension_id], check=True) + cls.ret = "Successfully disabled extension" + except subprocess.CalledProcessError as e: + cls.ret = f"Error disabling extension: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret + + @classmethod + def toggle_sync(cls, state): + """ + Toggles synchronization on or off in VSCode. + 在VSCode中开启或关闭同步。 + + Args: + state (str): 'on' 或 'off' 表示开启或关闭。 + """ + try: + command = ["code", "--sync", state] + subprocess.run(command, check=True) + cls.ret = "Successfully toggled sync" + except subprocess.CalledProcessError as e: + cls.ret = f"Error toggling sync: {e}" + except Exception as e: + cls.ret = f"Unexpected error: {e}" + + return cls.ret diff --git a/mm_agents/autoglm_v/tools/package/google_chrome.py b/mm_agents/autoglm_v/tools/package/google_chrome.py new file mode 100644 index 00000000..68afa4c0 --- /dev/null +++ b/mm_agents/autoglm_v/tools/package/google_chrome.py @@ -0,0 +1,107 @@ +class BrowserTools: + ret = "" + + @classmethod + def print_result(cls): + print(cls.ret) + + @classmethod + def env_info(cls): + cls.ret = "None" + + # @classmethod + # def show_all_tabs(cls): + # cls.ret = "Browser not found" + # for attempt in range(3): + # with sync_playwright() as p: + # try: + # browser = p.chromium.connect_over_cdp(cls.remote_debugging_url) + # if not browser: + # continue + # context = browser.contexts[0] + # # 获取所有窗口名称 + # cls.ret = 'Browser Tabs: ' + # for idx, page in enumerate(context.pages): + # cls.ret += f"{idx}. {page.title()} ({page.url})" + '\n' + # return cls.ret + # except TimeoutError: + # cls.ret = 'Failed to get browser tabs' + # return None + # return None + + @classmethod + def open_profile_settings(cls): + """ + Open the profile settings page in the browser. + """ + return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/people"]}} + + @classmethod + def open_password_settings(cls): + """ + Open the password settings page in the browser. + """ + return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/autofill"]}} + + @classmethod + def open_privacy_settings(cls): + """ + Open the privacy settings page in the browser. + """ + return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/privacy"]}} + + @classmethod + def open_appearance_settings(cls): + """ + Open the appearance settings page in the browser. + """ + return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/appearance"]}} + + @classmethod + def open_search_engine_settings(cls): + """ + Open the search engine settings page in the browser. + """ + return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/search"]}} + + @classmethod + def bring_back_last_tab(cls): + """ + Bring back the last tab in the browser. + """ + return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 't'); print('Brought back last tab')" + + @classmethod + def print(cls): + """ + Open the print option in current page. + """ + return f"import pyautogui; pyautogui.hotkey('ctrl', 'p'); print('Opened print option')" + + @classmethod + def delete_browsing_data(cls): + """ + Delete browsing data in the browser. + """ + return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 'del'); print('Deleted browsing data')" + + @classmethod + def open_extensions(cls): + """ + open the extensions page in the browser. + """ + return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://extensions"]}} + + @classmethod + def bookmark_page(cls): + """ + Bookmark the current page in the browser. + """ + return f"import pyautogui; pyautogui.hotkey('ctrl', 'd'); print('Bookmarked page')" + + @classmethod + def open_bookmarks(cls): + """ + Open the bookmarks page in the browser. + """ + return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://bookmarks"]}} diff --git a/mm_agents/autoglm_v/tools/package/libreoffice_calc.py b/mm_agents/autoglm_v/tools/package/libreoffice_calc.py new file mode 100644 index 00000000..540da7f9 --- /dev/null +++ b/mm_agents/autoglm_v/tools/package/libreoffice_calc.py @@ -0,0 +1,1322 @@ +import json +import os +import subprocess +import sys + +import uno +from com.sun.star.beans import PropertyValue + + +class CalcTools: + localContext = uno.getComponentContext() + resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) + ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext") + desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + doc = desktop.getCurrentComponent() + sheet = doc.CurrentController.ActiveSheet + ret = "" + + @classmethod + def close_other_window(cls): + """关闭除当前文档外的所有文档""" + # 获取所有打开的文档 + components = cls.desktop.getComponents().createEnumeration() + current_url = cls.doc.getURL() + + while components.hasMoreElements(): + doc = components.nextElement() + if doc.getURL() != current_url: # 如果不是当前文档 + doc.close(True) # True 表示保存更改 + + @classmethod + def maximize_window(cls): + """ + 将窗口设置为工作区最大尺寸 + 使用工作区域大小(考虑任务栏等) + """ + window = cls.doc.getCurrentController().getFrame().getContainerWindow() + toolkit = window.getToolkit() + device = toolkit.createScreenCompatibleDevice(0, 0) + + # 获取工作区域(排除任务栏等) + workarea = toolkit.getWorkArea() + + # 设置窗口位置和大小为工作区域 + window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15) + + @classmethod + def print_result(cls): + print(cls.ret) + + @classmethod + def save(cls): + """ + Save the current workbook to its current location + + Returns: + bool: True if save successful, False otherwise + """ + try: + # Just save the document + cls.doc.store() + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def _get_column_index(cls, column_name, sheet=None): + """ + Get the index of a column by its name (A, B, C, ...) + + Args: + column_name (str): Name of the column + + Returns: + int: Index of the column + """ + try: + return ord(column_name[0]) - ord("A") + except ValueError: + return None + + @classmethod + def _get_last_used_column(cls): + """ + Get the last used column index + + Args: + None + + Returns: + int: Index of the last used column + """ + cursor = cls.sheet.createCursor() + cursor.gotoEndOfUsedArea(False) + return cursor.RangeAddress.EndColumn + + @classmethod + def _get_last_used_row(cls): + """ + Get the last used row index + + Args: + None + + Returns: + int: Index of the last used row + """ + cursor = cls.sheet.createCursor() + cursor.gotoEndOfUsedArea(False) + return cursor.RangeAddress.EndRow + + @classmethod + def _column_name_to_index(cls, column_name): + """ + 将列名转换为列索引 + + Args: + column_name (str): 列名,如 'A', 'AB' + + Returns: + int: 列索引(从0开始) + """ + column_name = column_name.upper() + result = 0 + for char in column_name: + result = result * 26 + (ord(char) - ord("A") + 1) + return result - 1 + + @classmethod + def get_workbook_info(cls): + """ + Get workbook information + + Args: + None + + Returns: + dict: Workbook information, including file path, file name, sheets and active sheet + """ + try: + info = { + "file_path": cls.doc.getLocation(), + "file_title": cls.doc.getTitle(), + "sheets": [], + "active_sheet": cls.sheet.Name, + } + + # Get sheets information + sheets = cls.doc.getSheets() + info["sheet_count"] = sheets.getCount() + + # Get all sheet names and info + for i in range(sheets.getCount()): + sheet = sheets.getByIndex(i) + cursor = sheet.createCursor() + cursor.gotoEndOfUsedArea(False) + end_col = cursor.getRangeAddress().EndColumn + end_row = cursor.getRangeAddress().EndRow + + sheet_info = { + "name": sheet.getName(), + "index": i, + "visible": sheet.IsVisible, + "row_count": end_row + 1, + "column_count": end_col + 1, + } + info["sheets"].append(sheet_info) + + # Check if this is the active sheet + if sheet == cls.sheet: + info["active_sheet"] = sheet_info + + cls.ret = json.dumps(info, ensure_ascii=False) + return info + + except Exception as e: + cls.ret = f"Error: {e}" + + @classmethod + def env_info(cls, sheet_name=None): + """ + Get content of the specified or active sheet + + Args: + sheet_name (str, optional): Name of the sheet to read. If None, uses active sheet + + Returns: + dict: Sheet information including name, headers and data + """ + try: + # Get the target sheet + if sheet_name is not None: + sheet = cls.doc.getSheets().getByName(sheet_name) + else: + sheet = cls.sheet + + # Create cursor to find used range + cursor = sheet.createCursor() + cursor.gotoEndOfUsedArea(False) + end_col = cursor.getRangeAddress().EndColumn + end_row = cursor.getRangeAddress().EndRow + + # Generate column headers (A, B, C, ...) + col_headers = [chr(65 + i) for i in range(end_col + 1)] + + # Get displayed values from cells + data_array = [] + for row in range(end_row + 1): + row_data = [] + for col in range(end_col + 1): + cell = sheet.getCellByPosition(col, row) + row_data.append(cell.getString()) + data_array.append(row_data) + + # Calculate maximum width for each column + col_widths = [len(header) for header in col_headers] # Initialize with header lengths + for row in data_array: + for i, cell in enumerate(row): + col_widths[i] = max(col_widths[i], len(str(cell))) + + # Format the header row + header_row = " | " + " | ".join(f"{h:<{w}}" for h, w in zip(col_headers, col_widths)) + " |" + separator = "--|-" + "-|-".join("-" * w for w in col_widths) + "-|" + + # Format data rows with row numbers + formatted_rows = [] + for row_idx, row in enumerate(data_array, 1): + row_str = f"{row_idx:<2}| " + " | ".join(f"{cell:<{w}}" for cell, w in zip(row, col_widths)) + " |" + formatted_rows.append(row_str) + + # Combine all parts + formated_data = header_row + "\n" + separator + "\n" + "\n".join(formatted_rows) + + # Get sheet properties + sheet_info = { + "name": sheet.getName(), + "data": formated_data, + "row_count": end_row + 1, + "column_count": end_col + 1, + } + + cls.ret = json.dumps(sheet_info, ensure_ascii=False) + return sheet_info + + except Exception as e: + cls.ret = f"Error: {e}" + + @classmethod + def get_column_data(cls, column_name): + """ + Get data from the specified column + + Args: + column_name (str): Name of the column to read + + Returns: + list: List of values in the specified column + """ + column_index = cls._get_column_index(column_name) + if column_index is None: + return "Column not found" + last_row = cls._get_last_used_row() + _range = cls.sheet.getCellRangeByPosition(column_index, 0, column_index, last_row) + # 获取数据数组并展平 + cls.ret = json.dumps([row[0] for row in _range.getDataArray()], ensure_ascii=False) + return [row[0] for row in _range.getDataArray()] + + @classmethod + def switch_active_sheet(cls, sheet_name): + """ + Switch to the specified sheet and make it active, create if not exist + + Args: + sheet_name (str): Name of the sheet to switch to or create + + Returns: + bool: True if successful, False otherwise + """ + try: + # 获取所有工作表 + sheets = cls.doc.getSheets() + + # 检查工作表是否存在 + if not sheets.hasByName(sheet_name): + # 创建新工作表 + new_sheet = cls.doc.createInstance("com.sun.star.sheet.Spreadsheet") + sheets.insertByName(sheet_name, new_sheet) + + # 获取目标工作表 + sheet = sheets.getByName(sheet_name) + + # 切换到目标工作表 + cls.doc.getCurrentController().setActiveSheet(sheet) + + # 更新当前工作表引用 + cls.sheet = sheet + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def set_column_values(cls, column_name, data, start_index=2): + """ + Set data to the specified column + + Args: + column_name (str): Name of the column to write + data (list): List of values to write to the column + start_index (int): The index of the first row to write to, default is 2 (skip the first row) + + Returns: + bool: True if successful, False otherwise + """ + # 获取列的索引 + column_index = cls._get_column_index(column_name) + if column_index is None: + cls.ret = "Column not found" + return False + for i, value in enumerate(data): + cell = cls.sheet.getCellByPosition(column_index, i + start_index - 1) + if type(value) == float and value.is_integer(): + cell.setNumber(int(value)) + else: + cell.setString(str(value)) + cls.ret = "Success" + return True + + @classmethod + def highlight_range(cls, range_str, color=0xFF0000): + """ + highlight the specified range with the specified color + + Args: + range_str (str): Range to highlight, in the format of "A1:B10" + color (str): Color to highlight with, default is '0xFF0000' (red) + + Returns: + bool: True if successful, False otherwise + """ + try: + _range = cls.sheet.getCellRangeByName(range_str) + _range.CellBackColor = color + cls.ret = "Success" + return True + except: + cls.ret = "False" + return False + + @classmethod + def transpose_range(cls, source_range, target_cell): + """ + Transpose the specified range and paste it to the target cell + + Args: + source_range (str): Range to transpose, in the format of "A1:B10" + target_cell (str): Target cell to paste the transposed data, in the format of "A1" + + Returns: + bool: True if successful, False otherwise + """ + try: + source = cls.sheet.getCellRangeByName(source_range) + target = cls.sheet.getCellRangeByName(target_cell) + + data = source.getDataArray() + # 转置数据 + transposed_data = list(map(list, zip(*data))) + + # 设置转置后的数据 + target_range = cls.sheet.getCellRangeByPosition( + target.CellAddress.Column, + target.CellAddress.Row, + target.CellAddress.Column + len(transposed_data[0]) - 1, + target.CellAddress.Row + len(transposed_data) - 1, + ) + target_range.setDataArray(transposed_data) + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def export_to_csv(cls): + """ + Export the current document to a CSV file + + Args: + None + + Returns: + bool: True if successful, False otherwise + """ + try: + # 获取当前文档的URL + doc_url = cls.doc.getURL() + if not doc_url: + raise ValueError("Document must be saved first") + + # 构造CSV文件路径 + if doc_url.startswith("file://"): + base_path = doc_url[7:] # 移除 'file://' 前缀 + else: + base_path = doc_url + + # 获取基本路径和文件名 + csv_path = os.path.splitext(base_path)[0] + ".csv" + + # 确保路径是绝对路径 + csv_path = os.path.abspath(csv_path) + + # 转换为 LibreOffice URL 格式 + csv_url = uno.systemPathToFileUrl(csv_path) + + # 设置CSV导出选项 + props = ( + PropertyValue(Name="FilterName", Value="Text - txt - csv (StarCalc)"), + PropertyValue( + Name="FilterOptions", Value="44,0,76,0" + ), # 44=comma, 34=quote, 76=UTF-8, 1=first row as header + ) + + # 导出文件 + cls.doc.storeToURL(csv_url, props) + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def sort_column(cls, column_name, ascending=True, start_index=2): + """ + Sorts the data in the specified column in ascending or descending order + + Args: + column_name (str): The name of the column to sort (e.g. 'A') or the title + ascending (bool): Whether to sort in ascending order (default True) + start_index (int): The index of the first row to sort, default is 1 + + Returns: + bool: True if successful, False otherwise + """ + + try: + column_data = cls.get_column_data(column_name)[start_index - 1 :] + column_data = sorted(column_data, key=lambda x: float(x), reverse=not ascending) + except: + cls.ret = "Error: Invalid column name or data type" + return False + + return cls.set_column_values(column_name, column_data, start_index) + + @classmethod + def set_validation_list(cls, column_name, values): + """ + Set a validation list for the specified column + + Args: + column_name (str): The name of the column to set the validation list for + values (list): The list of values to use for the validation list + + Returns: + None + """ + try: + column_index = cls._get_column_index(column_name) + last_row = cls._get_last_used_row() + cell_range = cls.sheet.getCellRangeByPosition(column_index, 1, column_index, last_row) + + # 获取现有的验证对象 + validation = cell_range.getPropertyValue("Validation") + + # 设置基本验证类型 + validation.Type = uno.Enum("com.sun.star.sheet.ValidationType", "LIST") + validation.Operator = uno.Enum("com.sun.star.sheet.ConditionOperator", "EQUAL") + + # 设置下拉列表 + validation.ShowList = True + values_str = ";".join(str(val) for val in values) + validation.Formula1 = values_str + + # 应用验证设置回单元格范围 + cell_range.setPropertyValue("Validation", validation) + + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def hide_row_data(cls, value="N/A"): + """ + Hide rows that contain the specified value + + Args: + value (str): The value to hide rows for, default is 'N/A' + + Returns: + None + """ + last_row = cls._get_last_used_row() + last_col = cls._get_last_used_column() + + for row in range(1, last_row + 1): + has_value = False + for col in range(last_col + 1): + cell = cls.sheet.getCellByPosition(col, row) + if cell.getString() == value: + has_value = True + break + row_range = cls.sheet.getRows().getByIndex(row) + row_range.IsVisible = not has_value + + cls.ret = "Success" + return True + + @classmethod + def reorder_columns(cls, column_order): + """ + Reorder the columns in the sheet according to the specified order + + Args: + column_order (list): A list of column names in the desired order + + Returns: + bool: True if successful, False otherwise + """ + try: + # 获取新的列索引 + new_indices = [cls._get_column_index(col) for col in column_order] + + # 创建新的列顺序 + for new_index, old_index in enumerate(new_indices): + if new_index != old_index: + cls.sheet.Columns.insertByIndex(new_index, 1) + source = cls.sheet.Columns[old_index + (old_index > new_index)] + target = cls.sheet.Columns[new_index] + target.setDataArray(source.getDataArray()) + cls.sheet.Columns.removeByIndex(old_index + (old_index > new_index), 1) + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def create_pivot_table( + cls, + source_sheet, + table_name, + row_fields=None, + col_fields=None, + value_fields=None, + aggregation_function="sum", + target_cell="A1", + ): + """ + Create a pivot table in the active worksheet based on data from the active sheet. + """ + try: + source = cls.doc.getSheets().getByName(source_sheet) + + # 获取数据范围 + cursor = source.createCursor() + cursor.gotoEndOfUsedArea(False) + end_col = cursor.getRangeAddress().EndColumn + end_row = cursor.getRangeAddress().EndRow + + # 获取完整的数据范围 + source_range = source.getCellRangeByPosition(0, 0, end_col, end_row) + + # 获取数据透视表集合 + dp_tables = cls.sheet.getDataPilotTables() + + # 创建数据透视表描述符 + dp_descriptor = dp_tables.createDataPilotDescriptor() + + # 设置数据源 + dp_descriptor.setSourceRange(source_range.getRangeAddress()) + + # 设置行字段 + if row_fields: + for field in row_fields: + field_index = cls._get_column_index(field) + dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index) + dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "ROW") + + # 设置列字段 + if col_fields: + for field in col_fields: + field_index = cls._get_column_index(field) + dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index) + dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "COLUMN") + + # 设置数据字段 + for field in value_fields: + field_index = cls._get_column_index(field) + dimension = dp_descriptor.getDataPilotFields().getByIndex(field_index) + dimension.Orientation = uno.Enum("com.sun.star.sheet.DataPilotFieldOrientation", "DATA") + + # 设置聚合函数 + function_map = {"Count": "COUNT", "Sum": "SUM", "Average": "AVERAGE", "Min": "MIN", "Max": "MAX"} + + if aggregation_function in function_map: + dimension.Function = uno.Enum( + "com.sun.star.sheet.GeneralFunction", function_map[aggregation_function] + ) + + # 在当前工作表中创建数据透视表 + dp_tables.insertNewByName( + table_name, # 透视表名称 + cls.sheet.getCellRangeByName(target_cell).CellAddress, # 目标位置 + dp_descriptor, # 描述符 + ) + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def merge_cells(cls, range_str): + """ + 合并活动工作表中指定范围的单元格 + + Args: + range_str (str): 要合并的单元格范围,格式为'A1:B10' + + Returns: + bool: 成功返回True,失败返回False + """ + try: + # 获取当前活动工作表 + sheet = cls.sheet + + # 获取单元格范围 + cell_range = sheet.getCellRangeByName(range_str) + + # 获取单元格范围的属性 + range_props = cell_range.getIsMerged() + + # 如果单元格范围尚未合并,则进行合并 + if not range_props: + cell_range.merge(True) + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def set_cell_value(cls, cell, value): + """ + Set a value to a specific cell in the active worksheet. + + Args: + cell (str): Cell reference (e.g., 'A1') + value (str): Value to set in the cell + + Returns: + bool: True if successful, False otherwise + """ + try: + # 获取单元格对象 + cell_obj = cls.sheet.getCellRangeByName(cell) + + if isinstance(value, str) and value.startswith("="): + # 设置公式 + cell_obj.Formula = value + cls.ret = "Success" + return True + + # 尝试将值转换为数字 + try: + # 尝试转换为整数 + int_value = int(value) + cell_obj.Value = int_value + except ValueError: + try: + # 尝试转换为浮点数 + float_value = float(value) + cell_obj.Value = float_value + except ValueError: + # 如果不是数字,则设置为字符串 + cell_obj.String = value + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def format_range(cls, range_str, background_color=None, font_color=None, bold=None, alignment=None): + """ + Apply formatting to the specified range in the active worksheet + + Args: + range_str (str): Range to format, in the format of 'A1:B10' + background_color (str, optional): Background color in hex format (e.g., '#0000ff') + font_color (str, optional): Font color in hex format (e.g., '#ffffff') + bold (bool, optional): Whether to make the text bold + italic (bool, optional): Whether to make the text italic + alignment (str, optional): Text alignment (left, center, right) + + Returns: + bool: True if successful, False otherwise + """ + try: + # 获取指定范围 + cell_range = cls.sheet.getCellRangeByName(range_str) + + # 设置背景颜色 + if background_color: + # 将十六进制颜色转换为整数 + bg_color_int = int(background_color.replace("#", ""), 16) + cell_range.CellBackColor = bg_color_int + + # 设置字体颜色 + if font_color: + # 将十六进制颜色转换为整数 + font_color_int = int(font_color.replace("#", ""), 16) + cell_range.CharColor = font_color_int + + # 设置粗体 + if bold is not None: + cell_range.CharWeight = 150.0 if bold else 100.0 # 150.0 是粗体,100.0 是正常 + + # 设置对齐方式 + if alignment: + # 设置水平对齐方式 + struct = cell_range.getPropertyValue("HoriJustify") + if alignment == "left": + struct.value = "LEFT" + elif alignment == "center": + struct.value = "CENTER" + elif alignment == "right": + struct.value = "RIGHT" + cell_range.setPropertyValue("HoriJustify", struct) + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def create_chart(cls, chart_type, data_range, title=None, x_axis_title=None, y_axis_title=None): + """ + Create a chart in the active worksheet based on the specified data range. + + Args: + chart_type (str): Type of chart to create (bar, column, line, pie, scatter, area) + data_range (str): Range containing the data for the chart, in the format of 'A1:B10' + title (str, optional): Title for the chart + x_axis_title (str, optional): Title for the X axis + y_axis_title (str, optional): Title for the Y axis + + Returns: + bool: True if successful, False otherwise + """ + # 将图表类型映射到LibreOffice的图表类型常量 + try: + chart_type_map = { + "bar": "com.sun.star.chart.BarDiagram", + "column": "com.sun.star.chart.ColumnDiagram", + "line": "com.sun.star.chart.LineDiagram", + "pie": "com.sun.star.chart.PieDiagram", + "scatter": "com.sun.star.chart.ScatterDiagram", + "area": "com.sun.star.chart.AreaDiagram", + } + + # 获取数据范围 + cell_range_address = cls.sheet.getCellRangeByName(data_range).getRangeAddress() + + # 创建图表 + charts = cls.sheet.getCharts() + rect = uno.createUnoStruct("com.sun.star.awt.Rectangle") + rect.Width = 10000 # 默认宽度 + rect.Height = 7000 # 默认高度 + + # 添加图表到工作表 + charts.addNewByName("MyChart", rect, (cell_range_address,), False, False) + + # 获取图表 + chart = charts.getByName("MyChart") + chart_doc = chart.getEmbeddedObject() + + # 设置图表类型 + diagram = chart_doc.createInstance(chart_type_map[chart_type]) + chart_doc.setDiagram(diagram) + + # 设置图表标题 + if title: + chart_doc.Title.String = title + + # 设置X轴标题 + if x_axis_title: + chart_doc.Diagram.XAxis.AxisTitle.String = x_axis_title + + # 设置Y轴标题 + if y_axis_title: + chart_doc.Diagram.YAxis.AxisTitle.String = y_axis_title + + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def freeze_panes(cls, rows=0, columns=0): + """ + 冻结活动工作表中的行和/或列 + + Args: + rows (int): 从顶部开始冻结的行数 + columns (int): 从左侧开始冻结的列数 + + Returns: + bool: 成功返回True,失败返回False + """ + try: + # 获取当前视图 + view = cls.doc.getCurrentController() + + # 设置冻结窗格 + view.freezeAtPosition(columns, rows) + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def rename_sheet(cls, old_name, new_name): + """ + 重命名工作表 + + Args: + old_name (str): 要重命名的工作表的当前名称 + new_name (str): 工作表的新名称 + + Returns: + bool: 成功返回True,失败返回False + """ + try: + # 获取所有工作表 + sheets = cls.doc.getSheets() + + # 检查原工作表是否存在 + if not sheets.hasByName(old_name): + return False + + # 检查新名称是否已存在 + if sheets.hasByName(new_name): + return False + + # 获取要重命名的工作表 + sheet = sheets.getByName(old_name) + + # 重命名工作表 + sheet.setName(new_name) + + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def copy_sheet(cls, source_sheet, new_sheet_name=None): + """ + 创建工作簿中现有工作表的副本 + + Args: + source_sheet (str): 要复制的工作表名称 + new_sheet_name (str, optional): 新工作表副本的名称,如果不提供则自动生成 + + Returns: + str: 新创建的工作表名称,如果失败则返回None + """ + try: + # 获取所有工作表 + sheets = cls.doc.getSheets() + + # 检查源工作表是否存在 + if not sheets.hasByName(source_sheet): + return None + + # 如果没有提供新名称,则生成一个 + if not new_sheet_name: + # 生成类似 "Sheet1 (2)" 的名称 + base_name = source_sheet + counter = 1 + new_sheet_name = f"{base_name} ({counter})" + + # 确保名称不重复 + while sheets.hasByName(new_sheet_name): + counter += 1 + new_sheet_name = f"{base_name} ({counter})" + + # 检查新名称是否已存在 + if sheets.hasByName(new_sheet_name): + return None # 名称已存在,无法创建 + + # 获取源工作表的索引 + source_index = -1 + for i in range(sheets.getCount()): + if sheets.getByIndex(i).getName() == source_sheet: + source_index = i + break + + if source_index == -1: + return None + + # 复制工作表 + sheets.copyByName(source_sheet, new_sheet_name, source_index + 1) + + cls.ret = f"New sheet created: {new_sheet_name}" + return new_sheet_name + + except Exception as e: + cls.ret = f"Error: {e}" + return None + + @classmethod + def reorder_sheets(cls, sheet_name, position): + """ + 重新排序工作表在工作簿中的位置 + + Args: + sheet_name (str): 要移动的工作表名称 + position (int): 要移动到的位置(基于0的索引) + + Returns: + bool: 成功返回True,失败返回False + """ + try: + # 获取所有工作表 + sheets = cls.doc.getSheets() + + # 检查工作表是否存在 + if not sheets.hasByName(sheet_name): + return False + + # 获取工作表总数 + sheet_count = sheets.getCount() + + # 检查位置是否有效 + if position < 0 or position >= sheet_count: + return False + + # 获取要移动的工作表 + sheet = sheets.getByName(sheet_name) + + # 获取工作表当前索引 + current_index = -1 + for i in range(sheet_count): + if sheets.getByIndex(i).Name == sheet_name: + current_index = i + break + + if current_index == -1: + return False + + # 移动工作表到指定位置 + sheets.moveByName(sheet_name, position) + + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def set_chart_legend_position(cls, position): + """ + Set the position of the legend in a chart in the active worksheet. + + Args: + position (str): Position of the legend ('top', 'bottom', 'left', 'right', 'none') + + Returns: + bool: True if successful, False otherwise + """ + try: + # 获取当前工作表中的所有图表 + charts = cls.sheet.getCharts() + if charts.getCount() == 0: + return False + + # 获取第一个图表(假设我们要修改的是第一个图表) + chart = charts.getByIndex(0) + chart_obj = chart.getEmbeddedObject() + + # 获取图表的图例 + diagram = chart_obj.getDiagram() + legend = chart_obj.getLegend() + + # 根据指定的位置设置图例位置 + if position == "none": + # 如果选择"none",则隐藏图例 + chart_obj.HasLegend = False + else: + # 确保图例可见 + chart_obj.HasLegend = True + + import inspect + + print(inspect.getmembers(legend)) + + # 设置图例位置 + if position == "top": + pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "TOP") + elif position == "bottom": + pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "BOTTOM") + elif position == "left": + pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "LEFT") + elif position == "right": + pos = uno.Enum("com.sun.star.chart.ChartLegendPosition", "RIGHT") + + legend.Alignment = pos + + cls.ret = "Success" + return True + except Exception: + cls.ret = "Error" + return False + + @classmethod + def set_number_format(cls, range_str, format_type, decimal_places=None): + """ + Apply a specific number format to a range of cells in the active worksheet. + + Args: + range_str (str): Range to format, in the format of 'A1:B10' + format_type (str): Type of number format to apply + decimal_places (int, optional): Number of decimal places to display + + Returns: + bool: True if successful, False otherwise + """ + try: + # 获取单元格范围 + cell_range = cls.sheet.getCellRangeByName(range_str) + + # 获取数字格式化服务 + number_formats = cls.doc.NumberFormats + locale = cls.doc.CharLocale + + # 根据格式类型设置格式字符串 + format_string = "" + + if format_type == "general": + format_string = "General" + elif format_type == "number": + if decimal_places is not None: + format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}" + else: + format_string = "0" + elif format_type == "currency": + if decimal_places is not None: + format_string = f"[$¥-804]#,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}" + else: + format_string = "[$¥-804]#,##0.00" + elif format_type == "accounting": + if decimal_places is not None: + format_string = f"_-[$¥-804]* #,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}_-;-[$¥-804]* #,##0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}_-;_-[$¥-804]* \"-\"_-;_-@_-" + else: + format_string = '_-[$¥-804]* #,##0.00_-;-[$¥-804]* #,##0.00_-;_-[$¥-804]* "-"??_-;_-@_-' + elif format_type == "date": + format_string = "YYYY/MM/DD" + elif format_type == "time": + format_string = "HH:MM:SS" + elif format_type == "percentage": + if decimal_places is not None: + format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}%" + else: + format_string = "0.00%" + elif format_type == "fraction": + format_string = "# ?/?" + elif format_type == "scientific": + if decimal_places is not None: + format_string = f"0{('.' + '0' * decimal_places) if decimal_places > 0 else ''}E+00" + else: + format_string = "0.00E+00" + elif format_type == "text": + format_string = "@" + + # 获取格式键 + format_key = number_formats.queryKey(format_string, locale, True) + + # 如果格式不存在,则添加 + if format_key == -1: + format_key = number_formats.addNew(format_string, locale) + + # 应用格式 + cell_range.NumberFormat = format_key + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def adjust_column_width(cls, columns, width=None, autofit=False): + """ + 调整活动工作表中指定列的宽度 + + Args: + columns (str): 要调整的列范围,例如 'A:C' 表示从A列到C列 + width (float, optional): 要设置的宽度(以字符为单位) + autofit (bool, optional): 是否自动调整列宽以适应内容 + + Returns: + bool: 成功返回True,失败返回False + """ + try: + # 解析列范围 + col_range = columns.split(":") + start_col = cls._column_name_to_index(col_range[0]) + + if len(col_range) > 1: + end_col = cls._column_name_to_index(col_range[1]) + else: + end_col = start_col + + # 获取列对象 + columns_obj = cls.sheet.getColumns() + + # 遍历指定的列范围 + for col_idx in range(start_col, end_col + 1): + column = columns_obj.getByIndex(col_idx) + + if autofit: + # 自动调整列宽 + column.OptimalWidth = True + elif width is not None: + # 设置指定宽度(转换为1/100毫米) + # 大约一个字符宽度为256 (1/100 mm) + column.Width = int(width * 256) + + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def adjust_row_height(cls, rows, height=None, autofit=False): + """ + 调整活动工作表中指定行的高度 + + Args: + rows (str): 要调整的行范围,例如 '1:10' 表示第1行到第10行 + height (float, optional): 要设置的高度(以点为单位) + autofit (bool, optional): 是否自动调整行高以适应内容 + + Returns: + bool: 操作成功返回True,否则返回False + """ + try: + # 解析行范围 + row_range = rows.split(":") + start_row = int(row_range[0]) + end_row = int(row_range[1]) if len(row_range) > 1 else start_row + + # 获取行对象 + for row_index in range(start_row, end_row + 1): + row = cls.sheet.getRows().getByIndex(row_index - 1) # 索引从0开始 + + if autofit: + # 自动调整行高以适应内容 + row.OptimalHeight = True + elif height is not None: + # 设置指定高度(将点转换为1/100毫米,LibreOffice使用的单位) + # 1点 ≈ 35.28 1/100毫米 + row.Height = int(height * 35.28) + row.OptimalHeight = False + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def export_to_pdf(cls, file_path=None, sheets=None, open_after_export=False): + """ + 将当前文档或指定工作表导出为PDF文件 + + Args: + file_path (str, optional): PDF文件保存路径,如果不指定则使用当前文档路径 + sheets (list, optional): 要包含在PDF中的工作表名称列表,如果不指定则包含所有工作表 + open_after_export (bool, optional): 导出后是否打开PDF文件 + + Returns: + bool: 成功返回True,失败返回False + """ + try: + # 如果未指定文件路径,则使用当前文档路径并更改扩展名为.pdf + if not file_path: + if cls.doc.hasLocation(): + url = cls.doc.getLocation() + file_path = uno.fileUrlToSystemPath(url) + file_path = os.path.splitext(file_path)[0] + ".pdf" + else: + # 如果文档尚未保存,则在用户桌面创建临时文件 + desktop_path = os.path.join(os.path.expanduser("~"), "Desktop") + file_path = os.path.join(desktop_path, "LibreOffice_Export.pdf") + + # 确保文件路径是系统路径,然后转换为URL + pdf_url = uno.systemPathToFileUrl(os.path.abspath(file_path)) + + # 创建导出属性 + export_props = [] + + # 设置过滤器名称 + export_props.append(PropertyValue(Name="FilterName", Value="calc_pdf_Export")) + + # 如果指定了特定工作表,则只导出这些工作表 + if sheets and isinstance(sheets, list) and len(sheets) > 0: + # 获取所有工作表 + all_sheets = cls.doc.getSheets() + selection = [] + + # 查找指定的工作表 + for sheet_name in sheets: + if all_sheets.hasByName(sheet_name): + sheet = all_sheets.getByName(sheet_name) + selection.append(sheet) + + # 如果找到了指定的工作表,则设置导出选择 + if selection: + export_props.append(PropertyValue(Name="Selection", Value=tuple(selection))) + + # 导出PDF + cls.doc.storeToURL(pdf_url, tuple(export_props)) + + # 如果需要,导出后打开PDF + if open_after_export: + if sys.platform.startswith("darwin"): # macOS + subprocess.call(("open", file_path)) + elif os.name == "nt": # Windows + os.startfile(file_path) + elif os.name == "posix": # Linux + subprocess.call(("xdg-open", file_path)) + + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def set_zoom_level(cls, zoom_percentage): + """ + 调整当前工作表的缩放级别,使单元格看起来更大或更小 + + Args: + zoom_percentage (int): 缩放级别的百分比(例如,75表示75%,100表示正常大小,150表示放大)。 + 有效范围通常为10-400。 + + Returns: + bool: 成功返回True,失败返回False + """ + try: + # 获取当前控制器 + controller = cls.doc.getCurrentController() + + # 设置缩放值 + # 确保缩放值在合理范围内 + if zoom_percentage < 10: + zoom_percentage = 10 + elif zoom_percentage > 400: + zoom_percentage = 400 + + # 应用缩放值 + controller.ZoomValue = zoom_percentage + cls.ret = "Success" + return True + + except Exception as e: + cls.ret = f"Error: {e}" + return False + + +if __name__ == "__main__": + print(CalcTools._get_column_index("A")) + print(CalcTools.get_workbook_info()) + print(CalcTools.get_content()) + CalcTools.switch_active_sheet("Sheet2") + # helper.set_column_values('A', [1, 2, 3, 4, 5]) + # helper.highlight_range('A1:A3', 'Red') + # helper.transpose_range('A1:D5', 'B8') + print(CalcTools.get_column_data("A")) + CalcTools.sort_column("A", True) + CalcTools.hide_row_data("N/A") + CalcTools.reorder_columns(["B", "A", "C"]) + CalcTools.freeze_panes(1, 1) + # helper.set_validation_list('C', ['Pass', 'Fail', 'Held']) + CalcTools.export_to_csv() diff --git a/mm_agents/autoglm_v/tools/package/libreoffice_impress.py b/mm_agents/autoglm_v/tools/package/libreoffice_impress.py new file mode 100644 index 00000000..0b8ba172 --- /dev/null +++ b/mm_agents/autoglm_v/tools/package/libreoffice_impress.py @@ -0,0 +1,1424 @@ +import json +import os + +import uno +from com.sun.star.awt.FontSlant import ITALIC, NONE +from com.sun.star.awt.FontWeight import BOLD, NORMAL +from com.sun.star.beans import PropertyValue +from com.sun.star.drawing.TextHorizontalAdjust import CENTER, LEFT, RIGHT + + +class ImpressTools: + localContext = uno.getComponentContext() + resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) + ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext") + desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + doc = desktop.getCurrentComponent() + ret = "" + + @classmethod + def close_other_window(cls): + """关闭除当前文档外的所有文档""" + components = cls.desktop.getComponents().createEnumeration() + current_url = cls.doc.getURL() + while components.hasMoreElements(): + doc = components.nextElement() + if doc.getURL() != current_url: + doc.close(True) + + @classmethod + def save(cls): + """保存文档到当前位置""" + try: + if cls.doc.hasLocation(): + cls.doc.store() + cls.ret = "Success" + else: + cls.ret = "Error: Document has no save location" + return True + except Exception as e: + cls.ret = f"Error: {e}" + return False + + @classmethod + def maximize_window(cls): + """ + 将窗口设置为工作区最大尺寸 + 使用工作区域大小(考虑任务栏等) + """ + window = cls.doc.getCurrentController().getFrame().getContainerWindow() + toolkit = window.getToolkit() + device = toolkit.createScreenCompatibleDevice(0, 0) + workarea = toolkit.getWorkArea() + window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15) + + @classmethod + def print_result(cls): + print(cls.ret) + + @classmethod + def env_info(cls, page_indices=None): + """ + 获取指定页面的内容 + :param page_indices: 页码列表,如果为None则获取所有页面 + :return: 包含各页面内容的列表 + """ + try: + pages = cls.doc.getDrawPages() + content_str = "" + if page_indices is None: + page_indices = range(pages.getCount()) + for page_index in page_indices: + if 0 <= page_index < pages.getCount(): + page = pages.getByIndex(page_index) + page_content = [] + for i in range(page.getCount()): + shape = page.getByIndex(i) + if hasattr(shape, "getText"): + text = shape.getText() + if text: + page_content.append("- Box " + str(i) + ": " + text.getString().strip()) + + c = "\n".join(page_content) + content_str += f"Slide {page_index+1}:\n{c}\n\n" + + cur_idx = cls.get_current_slide_index() + 1 + content_str = content_str + f"Current Slide Index: {cur_idx}" + cls.ret = content_str + return content_str + except Exception as e: + cls.ret = f"Error: {str(e)}" + return [] + + @classmethod + def get_current_slide_index(cls): + """ + Gets the index of the currently active slide in the presentation. + :return: The index of the currently active slide (0-based) + """ + try: + controller = cls.doc.getCurrentController() + current_page = controller.getCurrentPage() + pages = cls.doc.getDrawPages() + for i in range(pages.getCount()): + if pages.getByIndex(i) == current_page: + cls.ret = i + return i + cls.ret = "Current slide not found" + return -1 + except Exception as e: + cls.ret = f"Error: {str(e)}" + return -1 + + @classmethod + def go_to_slide(cls, slide_index): + """ + Navigates to a specific slide in the presentation based on its index. + + Args: + slide_index (int): The index of the slide to navigate to (1-based indexing) + + Returns: + bool: True if navigation was successful, False otherwise + """ + try: + zero_based_index = slide_index - 1 + controller = cls.doc.getCurrentController() + if not controller: + cls.ret = "Error: Could not get document controller" + return False + pages = cls.doc.getDrawPages() + if zero_based_index < 0 or zero_based_index >= pages.getCount(): + cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1-{pages.getCount()}" + return False + target_slide = pages.getByIndex(zero_based_index) + controller.setCurrentPage(target_slide) + cls.ret = f"Successfully navigated to slide {slide_index}" + return True + except Exception as e: + cls.ret = f"Error navigating to slide: {str(e)}" + return False + + @classmethod + def get_slide_count(cls): + """ + Gets the total number of slides in the current presentation. + :return: The total number of slides as an integer + """ + try: + pages = cls.doc.getDrawPages() + count = pages.getCount() + cls.ret = count + return count + except Exception as e: + cls.ret = f"Error: {str(e)}" + return 0 + + @classmethod + def duplicate_slide(cls, slide_index): + """ + Creates a duplicate of a specific slide and places it at the end of the presentation. + + :param slide_index: The index of the slide to duplicate (1-based indexing) + :return: True if successful, False otherwise + """ + try: + zero_based_index = slide_index - 1 + draw_pages = cls.doc.getDrawPages() + if zero_based_index < 0 or zero_based_index >= draw_pages.getCount(): + cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {draw_pages.getCount()}" + return False + controller = cls.doc.getCurrentController() + controller.setCurrentPage(draw_pages.getByIndex(zero_based_index)) + dispatcher = cls.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", cls.ctx) + frame = controller.getFrame() + dispatcher.executeDispatch(frame, ".uno:DuplicatePage", "", 0, ()) + duplicated_slide_index = zero_based_index + 1 + slide_count = draw_pages.getCount() + if duplicated_slide_index < slide_count - 1: + controller.setCurrentPage(draw_pages.getByIndex(duplicated_slide_index)) + moves_needed = slide_count - duplicated_slide_index - 1 + for _ in range(moves_needed): + dispatcher.executeDispatch(frame, ".uno:MovePageDown", "", 0, ()) + cls.ret = f"Slide {slide_index} duplicated successfully and moved to the end" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def set_slide_font(cls, slide_index, font_name): + """ + Sets the font style for all text elements in a specific slide, including the title. + + Args: + slide_index (int): The index of the slide to modify (1-based indexing) + font_name (str): The name of the font to apply (e.g., 'Arial', 'Times New Roman', 'Calibri') + + Returns: + bool: True if successful, False otherwise + """ + try: + zero_based_index = slide_index - 1 + slides = cls.doc.getDrawPages() + if zero_based_index < 0 or zero_based_index >= slides.getCount(): + cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1 to {slides.getCount()}." + return False + slide = slides.getByIndex(zero_based_index) + for i in range(slide.getCount()): + shape = slide.getByIndex(i) + if hasattr(shape, "getText"): + text = shape.getText() + if text: + cursor = text.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.setPropertyValue("CharFontName", font_name) + cls.ret = f"Successfully set font to '{font_name}' for all text elements in slide {slide_index}." + return True + except Exception as e: + cls.ret = f"Error setting font: {str(e)}" + return False + + @classmethod + def write_text(cls, content, page_index, box_index, bold=False, italic=False, size=None, append=False): + """ + Writes text to a specific textbox on a slide + + :param content: The text content to add + :param page_index: The index of the slide (1-based indexing) + :param box_index: The index of the textbox to modify (0-based indexing) + :param bold: Whether to make the text bold, default is False + :param italic: Whether to make the text italic, default is False + :param size: The size of the text. If None, uses the box's current font size. + :param append: Whether to append the text, default is False. If you want to observe some formats(like a bullet at the beginning) or keep the original text, you should set up it. + :return: True if successful, False otherwise + """ + try: + zero_based_page_index = page_index - 1 + pages = cls.doc.getDrawPages() + if zero_based_page_index < 0 or zero_based_page_index >= pages.getCount(): + cls.ret = f"Error: Page index {page_index} is out of range" + return False + page = pages.getByIndex(zero_based_page_index) + if box_index < 0 or box_index >= page.getCount(): + cls.ret = f"Error: Box index {box_index} is out of range" + return False + shape = page.getByIndex(box_index) + if not hasattr(shape, "String"): + cls.ret = f"Error: The shape at index {box_index} cannot contain text" + return False + if append: + shape.String = shape.String + content + else: + shape.String = content + if hasattr(shape, "getCharacterProperties"): + char_props = shape.getCharacterProperties() + if bold: + char_props.CharWeight = BOLD + else: + char_props.CharWeight = NORMAL + if italic: + char_props.CharPosture = ITALIC + else: + char_props.CharPosture = NONE + if size is not None: + char_props.CharHeight = size + + cls.ret = f"Text successfully written to page {page_index}, box {box_index}" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def set_style(cls, slide_index, box_index, bold=None, italic=None, underline=None): + """ + Sets the style properties for the specified textbox on a slide. + + :param slide_index: The index of the slide to modify (1-based indexing) + :param box_index: The index of the textbox to modify (0-based indexing) + :param bold: Whether to make the text bold + :param italic: Whether to make the text italic + :param underline: Whether to underline the text + :return: True if successful, False otherwise + """ + try: + pages = cls.doc.getDrawPages() + if slide_index < 1 or slide_index > pages.getCount(): + cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {pages.getCount()}" + return False + page = pages.getByIndex(slide_index - 1) + if box_index < 0 or box_index >= page.getCount(): + cls.ret = f"Error: Invalid box index {box_index}. Valid range is 0 to {page.getCount() - 1}" + return False + shape = page.getByIndex(box_index) + if not hasattr(shape, "getText"): + cls.ret = "Error: The specified shape does not contain text" + return False + text = shape.getText() + cursor = text.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + if bold is not None: + cursor.setPropertyValue("CharWeight", BOLD if bold else NORMAL) + if italic is not None: + cursor.setPropertyValue("CharPosture", ITALIC if italic else NONE) + if underline is not None: + cursor.setPropertyValue("CharUnderline", 1 if underline else 0) + cls.ret = "Style applied successfully" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def configure_auto_save(cls, enabled, interval_minutes): + """ + Enables or disables auto-save functionality for the current document and sets the auto-save interval. + + :param enabled: Whether to enable (True) or disable (False) auto-save + :param interval_minutes: The interval in minutes between auto-saves (minimum 1 minute) + :return: True if successful, False otherwise + """ + try: + if interval_minutes < 1: + interval_minutes = 1 + config_provider = cls.ctx.ServiceManager.createInstanceWithContext( + "com.sun.star.configuration.ConfigurationProvider", cls.ctx + ) + prop = PropertyValue() + prop.Name = "nodepath" + prop.Value = "/org.openoffice.Office.Common/Save/Document" + config_access = config_provider.createInstanceWithArguments( + "com.sun.star.configuration.ConfigurationUpdateAccess", (prop,) + ) + config_access.setPropertyValue("AutoSave", enabled) + config_access.setPropertyValue("AutoSaveTimeIntervall", interval_minutes) + config_access.commitChanges() + cls.ret = f"Auto-save {'enabled' if enabled else 'disabled'} with interval of {interval_minutes} minutes" + return True + except Exception as e: + cls.ret = f"Error configuring auto-save: {str(e)}" + return False + + @classmethod + def set_background_color(cls, slide_index, box_index, color): + """ + Sets the background color for the specified textbox on a slide. + + Args: + slide_index (int): The index of the slide containing the textbox (1-based indexing) + box_index (int): The index of the textbox to modify (0-based indexing) + color (str): The color to apply to the textbox (e.g., 'red', 'green', 'blue', 'yellow', or hex color code) + + Returns: + bool: True if successful, False otherwise + """ + try: + zero_based_slide_index = slide_index - 1 + slides = cls.doc.getDrawPages() + if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount(): + cls.ret = f"Error: Slide index {slide_index} is out of range" + return False + slide = slides.getByIndex(zero_based_slide_index) + if box_index < 0 or box_index >= slide.getCount(): + cls.ret = f"Error: Box index {box_index} is out of range" + return False + shape = slide.getByIndex(box_index) + color_int = 0 + color_map = { + "red": 16711680, + "green": 65280, + "blue": 255, + "yellow": 16776960, + "black": 0, + "white": 16777215, + "purple": 8388736, + "orange": 16753920, + "pink": 16761035, + "gray": 8421504, + "brown": 10824234, + "cyan": 65535, + "magenta": 16711935, + } + if color.lower() in color_map: + color_int = color_map[color.lower()] + elif color.startswith("#") and len(color) == 7: + color_int = int(color[1:], 16) + else: + cls.ret = f"Error: Invalid color format: {color}" + return False + shape.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "SOLID") + shape.FillColor = color_int + cls.ret = f"Background color of textbox {box_index} on slide {slide_index} set to {color}" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def set_text_color(cls, slide_index, box_index, color): + """ + Sets the text color for the specified textbox on a slide. + + Args: + slide_index (int): The index of the slide to modify (1-based indexing) + box_index (int): The index of the textbox to modify (0-based indexing) + color (str): The color to apply to the text (e.g., 'red', 'green', 'blue', 'black', or hex color code) + + Returns: + bool: True if successful, False otherwise + """ + try: + zero_based_slide_index = slide_index - 1 + slides = cls.doc.getDrawPages() + if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount(): + cls.ret = f"Error: Slide index {slide_index} is out of range" + return False + slide = slides.getByIndex(zero_based_slide_index) + if box_index < 0 or box_index >= slide.getCount(): + cls.ret = f"Error: Box index {box_index} is out of range" + return False + shape = slide.getByIndex(box_index) + if not hasattr(shape, "getText"): + cls.ret = f"Error: Shape at index {box_index} does not contain text" + return False + color_int = 0 + if color.startswith("#"): + color_int = int(color[1:], 16) + else: + color_map = { + "red": 16711680, + "green": 43315, + "blue": 255, + "black": 0, + "white": 16777215, + "yellow": 16776960, + "cyan": 65535, + "magenta": 16711935, + "gray": 8421504, + } + if color.lower() in color_map: + color_int = color_map[color.lower()] + else: + cls.ret = f"Error: Unsupported color '{color}'" + return False + text = shape.getText() + cursor = text.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.setPropertyValue("CharColor", color_int) + cls.ret = f"Successfully set text color to {color} for textbox {box_index} on slide {slide_index}" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def delete_content(cls, slide_index, box_index): + """ + Deletes the specified textbox from a slide. + + :param slide_index: The index of the slide to modify (1-based indexing) + :param box_index: The index of the textbox to modify (0-based indexing) + :return: True if successful, False otherwise + """ + try: + pages = cls.doc.getDrawPages() + zero_based_slide_index = slide_index - 1 + if zero_based_slide_index < 0 or zero_based_slide_index >= pages.getCount(): + cls.ret = f"Error: Invalid slide index {slide_index}. Valid range is 1 to {pages.getCount()}" + return False + slide = pages.getByIndex(zero_based_slide_index) + if box_index < 0 or box_index >= slide.getCount(): + cls.ret = f"Error: Invalid box index {box_index}. Valid range is 0 to {slide.getCount() - 1}" + return False + shape = slide.getByIndex(box_index) + slide.remove(shape) + cls.ret = f"Successfully deleted textbox {box_index} from slide {slide_index}" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def set_slide_orientation(cls, orientation): + """ + Changes the orientation of slides in the presentation between portrait (upright) and landscape (sideways). + + :param orientation: The desired orientation for the slides ('portrait' or 'landscape') + :return: True if successful, False otherwise + """ + try: + draw_pages = cls.doc.getDrawPages() + first_page = draw_pages.getByIndex(0) + current_width = first_page.Width + current_height = first_page.Height + if orientation == "portrait" and current_width > current_height: + new_width, new_height = current_height, current_width + elif orientation == "landscape" and current_width < current_height: + new_width, new_height = current_height, current_width + else: + cls.ret = f"Slides are already in {orientation} orientation" + return True + for i in range(draw_pages.getCount()): + page = draw_pages.getByIndex(i) + page.Width = new_width + page.Height = new_height + cls.ret = f"Changed slide orientation to {orientation}" + return True + except Exception as e: + cls.ret = f"Error changing slide orientation: {str(e)}" + return False + + @classmethod + def position_box(cls, slide_index, box_index, position): + """ + Positions a textbox or image on a slide at a specific location or predefined position. + + :param slide_index: The index of the slide containing the box (1-based indexing) + :param box_index: The index of the box to position (0-based indexing) + :param position: Predefined position on the slide (left, right, center, top, bottom, etc.) + :return: True if successful, False otherwise + """ + try: + pages = cls.doc.getDrawPages() + if slide_index < 1 or slide_index > pages.getCount(): + cls.ret = f"Error: Invalid slide index {slide_index}" + return False + page = pages.getByIndex(slide_index - 1) + if box_index < 0 or box_index >= page.getCount(): + cls.ret = f"Error: Invalid box index {box_index}" + return False + shape = page.getByIndex(box_index) + controller = cls.doc.getCurrentController() + slide_width = 28000 + slide_height = 21000 + shape_width = shape.Size.Width + shape_height = shape.Size.Height + margin = 500 + if position == "left": + new_x = margin + new_y = (slide_height - shape_height) / 2 + elif position == "right": + new_x = slide_width - shape_width - margin + new_y = (slide_height - shape_height) / 2 + elif position == "center": + new_x = (slide_width - shape_width) / 2 + new_y = (slide_height - shape_height) / 2 + elif position == "top": + new_x = (slide_width - shape_width) / 2 + new_y = margin + elif position == "bottom": + new_x = (slide_width - shape_width) / 2 + new_y = slide_height - shape_height - margin + elif position == "top-left": + new_x = margin + new_y = margin + elif position == "top-right": + new_x = slide_width - shape_width - margin + new_y = margin + elif position == "bottom-left": + new_x = margin + new_y = slide_height - shape_height - margin + elif position == "bottom-right": + new_x = slide_width - shape_width - margin + new_y = slide_height - shape_height - margin + else: + cls.ret = f"Error: Invalid position '{position}'" + return False + try: + shape.Position.X = int(new_x) + shape.Position.Y = int(new_y) + except: + try: + shape.setPropertyValue("PositionX", int(new_x)) + shape.setPropertyValue("PositionY", int(new_y)) + except: + point = uno.createUnoStruct("com.sun.star.awt.Point", int(new_x), int(new_y)) + shape.setPosition(point) + cls.ret = f"Box positioned at {position} (X: {new_x}, Y: {new_y})" + return True + except Exception as e: + cls.ret = f"Error positioning box: {str(e)}" + return False + + @classmethod + def insert_file(cls, file_path, slide_index=None, position=None, size=None, autoplay=False): + """ + Inserts a video file into the current or specified slide in the presentation. + + Args: + file_path (str): The full path to the video file to be inserted + slide_index (int, optional): The index of the slide to insert the video into (1-based indexing). + If not provided, inserts into the current slide. + position (dict, optional): The position coordinates for the video as percentages of slide dimensions + {'x': float, 'y': float} + size (dict, optional): The size dimensions for the video as percentages of slide dimensions + {'width': float, 'height': float} + autoplay (bool, optional): Whether the video should automatically play when the slide is shown + + Returns: + bool: True if successful, False otherwise + """ + try: + expanded_file_path = os.path.expanduser(file_path) + if not os.path.exists(expanded_file_path): + cls.ret = f"Error: File not found: {expanded_file_path}" + return False + file_url = uno.systemPathToFileUrl(os.path.abspath(expanded_file_path)) + pages = cls.doc.getDrawPages() + if slide_index is not None: + zero_based_index = slide_index - 1 + if zero_based_index < 0 or zero_based_index >= pages.getCount(): + cls.ret = f"Error: Invalid slide index: {slide_index}" + return False + slide = pages.getByIndex(zero_based_index) + else: + controller = cls.doc.getCurrentController() + slide = controller.getCurrentPage() + slide_width = 21000 + slide_height = 12750 + if position is None: + position = {"x": 10, "y": 10} + if size is None: + size = {"width": 80, "height": 60} + x = int(position["x"] * slide_width / 100) + y = int(position["y"] * slide_height / 100) + width = int(size["width"] * slide_width / 100) + height = int(size["height"] * slide_height / 100) + media_shape = cls.doc.createInstance("com.sun.star.presentation.MediaShape") + slide.add(media_shape) + media_shape.setPosition(uno.createUnoStruct("com.sun.star.awt.Point", x, y)) + media_shape.setSize(uno.createUnoStruct("com.sun.star.awt.Size", width, height)) + media_shape.setPropertyValue("MediaURL", file_url) + if autoplay: + try: + media_shape.setPropertyValue("MediaIsAutoPlay", True) + except: + pass + cls.ret = f"Video inserted successfully from {expanded_file_path}" + return True + except Exception as e: + cls.ret = f"Error inserting video: {str(e)}" + return False + + @classmethod + def set_slide_background(cls, slide_index=None, color=None, image_path=None): + """ + Sets the background color or image for a specific slide or all slides. + + Args: + slide_index (int, optional): The index of the slide to modify (1-based indexing). + If not provided, applies to all slides. + color (str, optional): The background color to apply (e.g., 'red', 'green', 'blue', or hex color code) + image_path (str, optional): Path to an image file to use as background. If provided, overrides color. + + Returns: + bool: True if successful, False otherwise + """ + try: + if not color and not image_path: + cls.ret = "Error: Either color or image_path must be provided" + return False + pages = cls.doc.getDrawPages() + page_count = pages.getCount() + rgb_color = None + if color: + if color.startswith("#"): + color = color.lstrip("#") + rgb_color = int(color, 16) + else: + color_map = { + "red": 16711680, + "green": 43315, + "blue": 255, + "black": 0, + "white": 16777215, + "yellow": 16776960, + "cyan": 65535, + "magenta": 16711935, + "gray": 8421504, + } + rgb_color = color_map.get(color.lower(), 0) + if slide_index is not None: + slide_index = slide_index - 1 + if slide_index < 0 or slide_index >= page_count: + cls.ret = f"Error: Slide index {slide_index + 1} is out of range (1-{page_count})" + return False + slides_to_modify = [pages.getByIndex(slide_index)] + else: + slides_to_modify = [pages.getByIndex(i) for i in range(page_count)] + for slide in slides_to_modify: + fill_props = cls.ctx.ServiceManager.createInstanceWithContext( + "com.sun.star.drawing.FillProperties", cls.ctx + ) + if image_path and os.path.exists(image_path): + abs_path = os.path.abspath(image_path) + file_url = uno.systemPathToFileUrl(abs_path) + fill_props.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "BITMAP") + fill_props.FillBitmapURL = file_url + fill_props.FillBitmapMode = uno.Enum("com.sun.star.drawing.BitmapMode", "STRETCH") + elif rgb_color is not None: + fill_props.FillStyle = uno.Enum("com.sun.star.drawing.FillStyle", "SOLID") + fill_props.FillColor = rgb_color + slide.setPropertyValue("Background", fill_props) + cls.ret = "Background set successfully" + return True + except Exception as e: + cls.ret = f"Error setting background: {str(e)}" + return False + + @classmethod + def save_as(cls, file_path, overwrite=False): + """ + Saves the current document to a specified location with a given filename. + + :param file_path: The full path where the file should be saved, including the filename and extension + :param overwrite: Whether to overwrite the file if it already exists (default: False) + :return: True if successful, False otherwise + """ + try: + if os.path.exists(file_path) and not overwrite: + cls.ret = f"File already exists and overwrite is set to False: {file_path}" + return False + abs_path = os.path.abspath(file_path) + if os.name == "nt": + url = "file:///" + abs_path.replace("\\", "/") + else: + url = "file://" + abs_path + properties = [] + overwrite_prop = PropertyValue() + overwrite_prop.Name = "Overwrite" + overwrite_prop.Value = overwrite + properties.append(overwrite_prop) + extension = os.path.splitext(file_path)[1].lower() + if extension == ".odp": + filter_name = "impress8" + elif extension == ".ppt": + filter_name = "MS PowerPoint 97" + elif extension == ".pptx": + filter_name = "Impress MS PowerPoint 2007 XML" + elif extension == ".pdf": + filter_name = "impress_pdf_Export" + else: + filter_name = "impress8" + filter_prop = PropertyValue() + filter_prop.Name = "FilterName" + filter_prop.Value = filter_name + properties.append(filter_prop) + cls.doc.storeAsURL(url, tuple(properties)) + cls.ret = f"Document saved successfully to {file_path}" + return True + except Exception as e: + cls.ret = f"Error saving document: {str(e)}" + return False + + @classmethod + def insert_image(cls, slide_index, image_path, width=None, height=None, position=None): + """ + Inserts an image to a specific slide in the presentation. + + Args: + slide_index (int): The index of the slide to add the image to (1-based indexing) + image_path (str): The full path to the image file to be added + width (float, optional): The width of the image in centimeters + height (float, optional): The height of the image in centimeters + position (dict, optional): The position coordinates for the image as percentages + { + 'x': float, # The x-coordinate as a percentage of slide width + 'y': float # The y-coordinate as a percentage of slide height + } + + Returns: + bool: True if successful, False otherwise + """ + try: + if not os.path.exists(image_path): + cls.ret = f"Error: Image file not found at {image_path}" + return False + zero_based_index = slide_index - 1 + slides = cls.doc.getDrawPages() + if zero_based_index < 0 or zero_based_index >= slides.getCount(): + cls.ret = f"Error: Slide index {slide_index} is out of range. Valid range is 1 to {slides.getCount()}" + return False + slide = slides.getByIndex(zero_based_index) + bitmap = cls.doc.createInstance("com.sun.star.drawing.BitmapTable") + image_url = uno.systemPathToFileUrl(os.path.abspath(image_path)) + shape = cls.doc.createInstance("com.sun.star.drawing.GraphicObjectShape") + shape.setPropertyValue("GraphicURL", image_url) + slide.add(shape) + x_pos = 0 + y_pos = 0 + slide_width = slide.Width + slide_height = slide.Height + if position: + if "x" in position: + x_pos = int(position["x"] / 100 * slide_width) + if "y" in position: + y_pos = int(position["y"] / 100 * slide_height) + current_width = shape.Size.Width + current_height = shape.Size.Height + new_width = int(width * 1000) if width is not None else current_width + new_height = int(height * 1000) if height is not None else current_height + size = uno.createUnoStruct("com.sun.star.awt.Size") + size.Width = new_width + size.Height = new_height + point = uno.createUnoStruct("com.sun.star.awt.Point") + point.X = x_pos + point.Y = y_pos + shape.Size = size + shape.Position = point + cls.ret = f"Image inserted successfully on slide {slide_index}" + return True + except Exception as e: + cls.ret = f"Error inserting image: {str(e)}" + return False + + @classmethod + def configure_display_settings( + cls, use_presenter_view=None, primary_monitor_only=None, monitor_for_presentation=None + ): + """ + Configures the display settings for LibreOffice Impress presentations. + + Args: + use_presenter_view (bool, optional): Whether to use presenter view. Set to false to disable presenter view. + primary_monitor_only (bool, optional): Whether to use only the primary monitor for the presentation. + monitor_for_presentation (int, optional): Specify which monitor to use (1 for primary, 2 for secondary, etc.) + + Returns: + bool: True if settings were successfully applied, False otherwise + """ + try: + controller = cls.doc.getCurrentController() + if not hasattr(controller, "getPropertyValue"): + cls.ret = "Error: Not an Impress presentation or controller not available" + return False + if use_presenter_view is not None: + try: + controller.setPropertyValue("IsPresentationViewEnabled", use_presenter_view) + except Exception as e: + cls.ret = f"Warning: Could not set presenter view: {str(e)}" + if primary_monitor_only is not None: + try: + controller.setPropertyValue("UsePrimaryMonitorOnly", primary_monitor_only) + except Exception as e: + cls.ret = f"Warning: Could not set primary monitor usage: {str(e)}" + if monitor_for_presentation is not None: + try: + controller.setPropertyValue("MonitorForPresentation", monitor_for_presentation - 1) + except Exception as e: + cls.ret = f"Warning: Could not set presentation monitor: {str(e)}" + cls.ret = "Display settings configured successfully" + return True + except Exception as e: + cls.ret = f"Error configuring display settings: {str(e)}" + return False + + @classmethod + def set_text_strikethrough(cls, slide_index, box_index, line_numbers, apply): + """ + Applies or removes strike-through formatting to specific text content in a slide. + + Args: + slide_index (int): The index of the slide containing the text (1-based indexing) + box_index (int): The index of the textbox containing the text (0-based indexing) + line_numbers (list): The line numbers to apply strike-through formatting to (1-based indexing) + apply (bool): Whether to apply (true) or remove (false) strike-through formatting + + Returns: + bool: True if successful, False otherwise + """ + try: + slides = cls.doc.getDrawPages() + slide = slides.getByIndex(slide_index - 1) + shape = slide.getByIndex(box_index) + if not hasattr(shape, "getText"): + cls.ret = f"Error: Shape at index {box_index} does not contain text" + return False + text = shape.getText() + cursor = text.createTextCursor() + text_content = text.getString() + lines = text_content.split("\n") + for line_number in line_numbers: + if 1 <= line_number <= len(lines): + start_pos = 0 + for i in range(line_number - 1): + start_pos += len(lines[i]) + 1 + end_pos = start_pos + len(lines[line_number - 1]) + cursor.gotoStart(False) + cursor.goRight(start_pos, False) + cursor.goRight(len(lines[line_number - 1]), True) + cursor.CharStrikeout = apply + cls.ret = f"Strike-through {'applied' if apply else 'removed'} successfully" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def set_textbox_alignment(cls, slide_index, box_index, alignment): + """ + Sets the text alignment for the specified textbox on a slide. + + :param slide_index: The index of the slide to modify (1-based indexing) + :param box_index: The index of the textbox to modify (0-based indexing) + :param alignment: The text alignment to apply ('left', 'center', 'right', or 'justify') + :return: True if successful, False otherwise + """ + try: + zero_based_slide_index = slide_index - 1 + slides = cls.doc.getDrawPages() + if zero_based_slide_index < 0 or zero_based_slide_index >= slides.getCount(): + cls.ret = f"Error: Slide index {slide_index} out of range" + return False + slide = slides.getByIndex(zero_based_slide_index) + if box_index < 0 or box_index >= slide.getCount(): + cls.ret = f"Error: Box index {box_index} out of range" + return False + shape = slide.getByIndex(box_index) + if not hasattr(shape, "getText"): + cls.ret = "Error: Selected shape does not support text" + return False + if alignment == "left": + shape.TextHorizontalAdjust = LEFT + elif alignment == "center": + shape.TextHorizontalAdjust = CENTER + elif alignment == "right": + shape.TextHorizontalAdjust = RIGHT + elif alignment == "justify": + text = shape.getText() + cursor = text.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.ParaAdjust = 3 + else: + cls.ret = f"Error: Invalid alignment value: {alignment}" + return False + cls.ret = f"Successfully set text alignment to {alignment} for textbox {box_index} on slide {slide_index}" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def set_slide_number_properties( + cls, color=None, font_size=None, visible=None, position=None, apply_to="all", slide_indices=None + ): + """ + Modifies the properties of slide numbers in the presentation. + + Args: + color (str, optional): The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code) + font_size (float, optional): The font size for slide numbers (in points) + visible (bool, optional): Whether slide numbers should be visible or hidden + position (str, optional): The position of slide numbers ('bottom-left', 'bottom-center', 'bottom-right', + 'top-left', 'top-center', 'top-right') + apply_to (str, optional): Whether to apply changes to 'all', 'current', or 'selected' slides + slide_indices (list, optional): Indices of specific slides to change (1-based indexing) + + Returns: + bool: True if successful, False otherwise + """ + try: + draw_pages = cls.doc.getDrawPages() + master_pages = cls.doc.getMasterPages() + pages_to_modify = [] + if apply_to == "all": + for i in range(draw_pages.getCount()): + pages_to_modify.append(draw_pages.getByIndex(i)) + elif apply_to == "current": + current_page = cls.doc.getCurrentController().getCurrentPage() + pages_to_modify.append(current_page) + elif apply_to == "selected" and slide_indices: + for idx in slide_indices: + if 1 <= idx <= draw_pages.getCount(): + pages_to_modify.append(draw_pages.getByIndex(idx - 1)) + for i in range(master_pages.getCount()): + master_page = master_pages.getByIndex(i) + page_number_shape = None + for j in range(master_page.getCount()): + shape = master_page.getByIndex(j) + if hasattr(shape, "TextType"): + try: + if shape.TextType == 5: + page_number_shape = shape + break + except: + pass + if hasattr(shape, "getText"): + try: + text = shape.getText() + if text and text.getTextFields().getCount() > 0: + fields = text.getTextFields().createEnumeration() + while fields.hasMoreElements(): + field = fields.nextElement() + if "PageNumber" in field.getImplementationName(): + page_number_shape = shape + break + if page_number_shape: + break + except: + pass + if page_number_shape: + if color is not None: + color_int = 0 + if color.startswith("#"): + color_int = int(color[1:], 16) + elif color == "red": + color_int = 16711680 + elif color == "green": + color_int = 65280 + elif color == "blue": + color_int = 255 + elif color == "black": + color_int = 0 + text = page_number_shape.getText() + cursor = text.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.CharColor = color_int + if font_size is not None: + text = page_number_shape.getText() + cursor = text.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.CharHeight = font_size + if position is not None: + page_width = master_page.Width + page_height = master_page.Height + width = page_number_shape.Size.Width + height = page_number_shape.Size.Height + new_x = 0 + new_y = 0 + if position.startswith("bottom"): + new_y = page_height - height - 100 + elif position.startswith("top"): + new_y = 100 + if position.endswith("left"): + new_x = 100 + elif position.endswith("center"): + new_x = (page_width - width) / 2 + elif position.endswith("right"): + new_x = page_width - width - 100 + page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", new_x, new_y) + if position.endswith("left"): + page_number_shape.ParaAdjust = LEFT + elif position.endswith("center"): + page_number_shape.ParaAdjust = CENTER + elif position.endswith("right"): + page_number_shape.ParaAdjust = RIGHT + if visible is not None: + try: + page_number_shape.Visible = visible + except: + if not visible: + page_number_shape.Size = uno.createUnoStruct("com.sun.star.awt.Size", 1, 1) + page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", -1000, -1000) + elif ( + visible is True + or visible is None + and (color is not None or font_size is not None or position is not None) + ): + page_number_shape = cls.doc.createInstance("com.sun.star.drawing.TextShape") + master_page.add(page_number_shape) + default_width = 2000 + default_height = 400 + page_number_shape.Size = uno.createUnoStruct("com.sun.star.awt.Size", default_width, default_height) + page_width = master_page.Width + page_height = master_page.Height + pos_x = page_width - default_width - 100 + pos_y = page_height - default_height - 100 + if position is not None: + if position.startswith("bottom"): + pos_y = page_height - default_height - 100 + elif position.startswith("top"): + pos_y = 100 + if position.endswith("left"): + pos_x = 100 + page_number_shape.ParaAdjust = LEFT + elif position.endswith("center"): + pos_x = (page_width - default_width) / 2 + page_number_shape.ParaAdjust = CENTER + elif position.endswith("right"): + pos_x = page_width - default_width - 100 + page_number_shape.ParaAdjust = RIGHT + page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", pos_x, pos_y) + text = page_number_shape.getText() + cursor = text.createTextCursor() + try: + page_field = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber") + text.insertTextContent(cursor, page_field, False) + except: + text.setString("<#>") + if color is not None: + color_int = 0 + if color.startswith("#"): + color_int = int(color[1:], 16) + elif color == "red": + color_int = 16711680 + elif color == "green": + color_int = 65280 + elif color == "blue": + color_int = 255 + elif color == "black": + color_int = 0 + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.CharColor = color_int + if font_size is not None: + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.CharHeight = font_size + if visible is not None: + try: + page_number_shape.Visible = visible + except: + if not visible: + page_number_shape.Position = uno.createUnoStruct("com.sun.star.awt.Point", -1000, -1000) + else: + try: + page_number_shape.Visible = True + except: + pass + try: + controller = cls.doc.getCurrentController() + view_data = controller.getViewData() + controller.restoreViewData(view_data) + except: + pass + cls.ret = "Slide number properties updated successfully" + return True + except Exception as e: + cls.ret = f"Error setting slide number properties: {str(e)}" + return False + + @classmethod + def set_slide_number(cls, color=None, font_size=None, visible=None, position=None): + """ + Sets the slide number in the presentation. + + :param color: The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code) + :param font_size: The font size for slide numbers (in points) + :param visible: Whether slide numbers should be visible or hidden + :param position: The position of slide numbers on the slides (bottom-left, bottom-center, bottom-right, top-left, top-center, top-right) + :return: True if successful, False otherwise + """ + try: + controller = cls.doc.getCurrentController() + dispatcher = cls.ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.DispatchHelper", cls.ctx) + if visible is False: + pages = cls.doc.getDrawPages() + for i in range(pages.getCount()): + page = pages.getByIndex(i) + for j in range(page.getCount()): + try: + shape = page.getByIndex(j) + if hasattr(shape, "Presentation") and shape.Presentation == "Number": + page.remove(shape) + except: + pass + master_pages = cls.doc.getMasterPages() + for i in range(master_pages.getCount()): + master_page = master_pages.getByIndex(i) + for j in range(master_page.getCount()): + try: + shape = master_page.getByIndex(j) + if hasattr(shape, "Presentation") and shape.Presentation == "Number": + master_page.remove(shape) + except: + pass + cls.ret = "Slide numbers hidden successfully" + return True + if visible is True or color is not None or font_size is not None or position is not None: + current_slide = controller.getCurrentPage() + master_pages = cls.doc.getMasterPages() + if master_pages.getCount() == 0: + cls.ret = "No master pages found" + return False + master_page = master_pages.getByIndex(0) + slide_number_shape = cls.doc.createInstance("com.sun.star.drawing.TextShape") + slide_number_shape.setSize(uno.createUnoStruct("com.sun.star.awt.Size", 2000, 500)) + pos = position or "bottom-right" + page_width = master_page.Width + page_height = master_page.Height + x, y = 0, 0 + if "bottom" in pos: + y = page_height - 1000 + elif "top" in pos: + y = 500 + if "left" in pos: + x = 500 + elif "center" in pos: + x = (page_width - 2000) / 2 + elif "right" in pos: + x = page_width - 2500 + slide_number_shape.setPosition(uno.createUnoStruct("com.sun.star.awt.Point", x, y)) + master_page.add(slide_number_shape) + text = slide_number_shape.getText() + cursor = text.createTextCursor() + page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber") + text.insertTextContent(cursor, page_number, False) + if "center" in pos: + slide_number_shape.setPropertyValue("TextHorizontalAdjust", CENTER) + elif "right" in pos: + slide_number_shape.setPropertyValue("TextHorizontalAdjust", RIGHT) + elif "left" in pos: + slide_number_shape.setPropertyValue("TextHorizontalAdjust", LEFT) + if font_size is not None: + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.setPropertyValue("CharHeight", font_size) + if color is not None: + cursor.gotoStart(False) + cursor.gotoEnd(True) + if color.startswith("#") and len(color) == 7: + r = int(color[1:3], 16) + g = int(color[3:5], 16) + b = int(color[5:7], 16) + cursor.setPropertyValue("CharColor", (r << 16) + (g << 8) + b) + else: + color_map = { + "red": 16711680, + "green": 65280, + "blue": 255, + "black": 0, + "white": 16777215, + "yellow": 16776960, + "cyan": 65535, + "magenta": 16711935, + "gray": 8421504, + } + if color.lower() in color_map: + cursor.setPropertyValue("CharColor", color_map[color.lower()]) + cls.ret = "Slide numbers added and configured successfully" + return True + except Exception as e: + cls.ret = f"Error setting slide number: {str(e)}" + return False + + @classmethod + def set_slide_number_color(cls, color): + """ + Sets the color of the slide number in the presentation. + + Args: + color (str): The color to apply to slide numbers (e.g., 'red', 'green', 'blue', 'black', or hex color code) + + Returns: + bool: True if successful, False otherwise + """ + try: + color_map = { + "black": 0, + "white": 16777215, + "red": 16711680, + "green": 65280, + "blue": 255, + "yellow": 16776960, + "cyan": 65535, + "magenta": 16711935, + "gray": 8421504, + "orange": 16753920, + "purple": 8388736, + } + if color.lower() in color_map: + rgb_color = color_map[color.lower()] + else: + if color.startswith("#"): + color = color[1:] + try: + if len(color) == 6: + rgb_color = int(color, 16) + else: + rgb_color = 0 + except ValueError: + rgb_color = 0 + found = False + master_pages = cls.doc.getMasterPages() + for i in range(master_pages.getCount()): + master_page = master_pages.getByIndex(i) + for j in range(master_page.getCount()): + shape = master_page.getByIndex(j) + if hasattr(shape, "getText") and shape.getText() is not None: + text = shape.getText() + try: + enum = text.createEnumeration() + while enum.hasMoreElements(): + para = enum.nextElement() + if hasattr(para, "createEnumeration"): + para_enum = para.createEnumeration() + while para_enum.hasMoreElements(): + portion = para_enum.nextElement() + if ( + hasattr(portion, "TextPortionType") + and portion.TextPortionType == "TextField" + ): + if hasattr(portion, "TextField") and portion.TextField is not None: + field = portion.TextField + if hasattr(field, "supportsService") and ( + field.supportsService( + "com.sun.star.presentation.TextField.PageNumber" + ) + or field.supportsService("com.sun.star.text.TextField.PageNumber") + ): + portion.CharColor = rgb_color + found = True + except Exception as e: + continue + draw_pages = cls.doc.getDrawPages() + for i in range(draw_pages.getCount()): + page = draw_pages.getByIndex(i) + for j in range(page.getCount()): + shape = page.getByIndex(j) + if hasattr(shape, "getText") and shape.getText() is not None: + text = shape.getText() + try: + enum = text.createEnumeration() + while enum.hasMoreElements(): + para = enum.nextElement() + if hasattr(para, "createEnumeration"): + para_enum = para.createEnumeration() + while para_enum.hasMoreElements(): + portion = para_enum.nextElement() + if ( + hasattr(portion, "TextPortionType") + and portion.TextPortionType == "TextField" + ): + if hasattr(portion, "TextField") and portion.TextField is not None: + field = portion.TextField + if hasattr(field, "supportsService") and ( + field.supportsService( + "com.sun.star.presentation.TextField.PageNumber" + ) + or field.supportsService("com.sun.star.text.TextField.PageNumber") + ): + portion.CharColor = rgb_color + found = True + except Exception as e: + continue + for i in range(draw_pages.getCount()): + page = draw_pages.getByIndex(i) + for j in range(page.getCount()): + shape = page.getByIndex(j) + if hasattr(shape, "getText") and shape.getText() is not None: + text = shape.getText() + text_string = text.getString() + if text_string.isdigit() and len(text_string) <= 3: + try: + cursor = text.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.CharColor = rgb_color + found = True + except Exception as e: + continue + if found: + cls.ret = f"Slide number color set to {color}" + return True + else: + cls.ret = "Could not find slide numbers to change color" + return False + except Exception as e: + cls.ret = f"Error setting slide number color: {str(e)}" + return False + + @classmethod + def export_to_image(cls, file_path, format, slide_index=None): + """ + Exports the current presentation or a specific slide to an image file format. + + Args: + file_path (str): The full path where the image file should be saved, including the filename and extension + format (str): The image format to export to (e.g., 'png', 'jpeg', 'gif') + slide_index (int, optional): The index of the specific slide to export (1-based indexing). + If not provided, exports the entire presentation as a series of images. + + Returns: + bool: True if export was successful, False otherwise + """ + try: + format = format.lower() + valid_formats = ["png", "jpeg", "jpg", "gif", "bmp", "tiff"] + if format not in valid_formats: + cls.ret = f"Error: Invalid format '{format}'. Valid formats are: {', '.join(valid_formats)}" + return False + if format == "jpg": + format = "jpeg" + pages = cls.doc.getDrawPages() + page_count = pages.getCount() + if slide_index is not None: + slide_index = slide_index - 1 + if slide_index < 0 or slide_index >= page_count: + cls.ret = f"Error: Invalid slide index {slide_index + 1}. Valid range is 1 to {page_count}" + return False + controller = cls.doc.getCurrentController() + filter_name = f"draw_{format}_Export" + filter_data = PropertyValue(Name="FilterData", Value=()) + if slide_index is not None: + controller.setCurrentPage(pages.getByIndex(slide_index)) + props = PropertyValue(Name="FilterName", Value=filter_name), filter_data + cls.doc.storeToURL(uno.systemPathToFileUrl(file_path), props) + cls.ret = f"Successfully exported slide {slide_index + 1} to {file_path}" + return True + else: + base_name, ext = os.path.splitext(file_path) + for i in range(page_count): + controller.setCurrentPage(pages.getByIndex(i)) + if page_count == 1: + current_file = f"{base_name}.{format}" + else: + current_file = f"{base_name}_{i + 1}.{format}" + props = PropertyValue(Name="FilterName", Value=filter_name), filter_data + cls.doc.storeToURL(uno.systemPathToFileUrl(current_file), props) + + if page_count == 1: + cls.ret = f"Successfully exported {page_count} slides to {base_name}.{format}" + else: + cls.ret = f"Successfully exported {page_count} slides to {base_name}_[1-{page_count}].{format}" + return True + except Exception as e: + cls.ret = f"Error exporting to image: {str(e)}" + return False diff --git a/mm_agents/autoglm_v/tools/package/libreoffice_writer.py b/mm_agents/autoglm_v/tools/package/libreoffice_writer.py new file mode 100644 index 00000000..35095c85 --- /dev/null +++ b/mm_agents/autoglm_v/tools/package/libreoffice_writer.py @@ -0,0 +1,753 @@ +import os +import re + +import uno +from com.sun.star.awt.FontSlant import ITALIC, NONE, OBLIQUE +from com.sun.star.awt.FontWeight import BOLD, NORMAL +from com.sun.star.beans import PropertyValue +from com.sun.star.style.ParagraphAdjust import CENTER, LEFT, RIGHT +from com.sun.star.text.ControlCharacter import PARAGRAPH_BREAK +from com.sun.star.text.TextContentAnchorType import AS_CHARACTER + + +class WriterTools: + localContext = uno.getComponentContext() + resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) + ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext") + desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx) + doc = desktop.getCurrentComponent() + text = doc.Text + cursor = text.createTextCursor() + ret = "" + + @classmethod + def close_other_window(cls): + """关闭除当前文档外的所有文档""" + components = cls.desktop.getComponents().createEnumeration() + current_url = cls.doc.getURL() + while components.hasMoreElements(): + doc = components.nextElement() + if doc.getURL() != current_url: + doc.close(True) + + @classmethod + def save(cls): + """保存文档到当前位置""" + try: + if cls.doc.hasLocation(): + cls.doc.store() + else: + raise Exception("文档没有保存位置,请使用另存为功能") + return True + except Exception as e: + return False + + @classmethod + def maximize_window(cls): + """ + 将窗口设置为工作区最大尺寸 + 使用工作区域大小(考虑任务栏等) + """ + window = cls.doc.getCurrentController().getFrame().getContainerWindow() + toolkit = window.getToolkit() + device = toolkit.createScreenCompatibleDevice(0, 0) + workarea = toolkit.getWorkArea() + window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15) + + @classmethod + def print_result(cls): + print(cls.ret) + + @classmethod + def write_text(cls, text, bold=False, italic=False, size=None): + """写入文本""" + cls.cursor.CharWeight = 150 if bold else 100 + cls.cursor.CharPosture = ITALIC if italic else NONE + if size: + cls.cursor.CharHeight = size + cls.text.insertString(cls.cursor, text, False) + cls.ret = "Success" + + @classmethod + def get_paragraphs(cls, start_index=0, count=None): + """Retrieves paragraphs from the document as a list.""" + text = cls.doc.getText() + paragraphs = text.createEnumeration() + paragraph_list = [] + while paragraphs.hasMoreElements(): + paragraph = paragraphs.nextElement() + if paragraph.supportsService("com.sun.star.text.Paragraph"): + paragraph_list.append(paragraph.getString()) + if start_index < 0: + start_index = 0 + elif start_index >= len(paragraph_list): + cls.ret = [] + if count is not None: + end_index = min(start_index + count, len(paragraph_list)) + cls.ret = paragraph_list[start_index:end_index] + else: + cls.ret = paragraph_list[start_index:] + return cls.ret + + @classmethod + def env_info(cls): + paras = cls.get_paragraphs() + para_str = "" + for i, para in enumerate(paras): + para = para[:500] + "..." if len(para) > 500 else para + para_str += "Paragraph " + str(i) + ": " + para.strip() + "\n" + cls.ret = para_str + return cls.ret + + @classmethod + def set_color(cls, pattern, color, paragraph_indices=None): + """ + Changes the color of matched text in the document for specified paragraphs. + + Args: + pattern (str): Regular expression pattern to match text + color (int): Hex color code (e.g., 0x000000 for black) + paragraph_indices (list, optional): List of paragraph indices to modify (0-based). + If None, applies to all paragraphs. + """ + try: + enum = cls.doc.Text.createEnumeration() + paragraphs = [] + while enum.hasMoreElements(): + paragraphs.append(enum.nextElement()) + if not paragraph_indices: + paragraphs_to_process = range(len(paragraphs)) + else: + paragraphs_to_process = paragraph_indices + regex = re.compile(pattern) + for idx in paragraphs_to_process: + if idx < 0 or idx >= len(paragraphs): + continue + paragraph = paragraphs[idx] + if not paragraph.supportsService("com.sun.star.text.Paragraph"): + continue + para_text = paragraph.getString() + matches = regex.finditer(para_text) + for match in matches: + para_cursor = cls.text.createTextCursorByRange(paragraph.getStart()) + para_cursor.goRight(match.start(), False) + para_cursor.goRight(match.end() - match.start(), True) + para_cursor.CharColor = color + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def find_and_replace(cls, pattern, replacement, paragraph_indices=None): + """ + Finds all occurrences of a specified text pattern and replaces them with another text in the document. + + Args: + pattern (str): The pattern to match in the document, should be a regular expression + replacement (str): The text to replace the found text with + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing) + + Returns: + str: Success message with number of replacements made + """ + try: + enum = cls.doc.Text.createEnumeration() + paragraphs = [] + while enum.hasMoreElements(): + paragraphs.append(enum.nextElement()) + total_replacements = 0 + if not paragraph_indices: + paragraphs_to_process = list(range(len(paragraphs))) + else: + paragraphs_to_process = [i for i in paragraph_indices if 0 <= i < len(paragraphs)] + regex = re.compile(pattern) + for idx in paragraphs_to_process: + if idx >= len(paragraphs): + continue + paragraph = paragraphs[idx] + if paragraph.supportsService("com.sun.star.text.Paragraph"): + text_content = paragraph.getString() + new_text, count = regex.subn(replacement, text_content) + if count > 0: + paragraph.setString(new_text) + total_replacements += count + cls.ret = f"Successfully made {total_replacements} replacements" + return cls.ret + except Exception as e: + cls.ret = f"Error during find and replace: {str(e)}" + return cls.ret + + @classmethod + def set_font(cls, font_name, paragraph_indices=None): + """ + Changes the font of text in the document or specified paragraphs. + + Args: + font_name (str): The name of the font to apply (e.g., 'Times New Roman', 'Arial', 'Calibri') + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing). + If not provided, applies to all paragraphs. + """ + try: + text = cls.doc.getText() + enum = text.createEnumeration() + paragraphs = [] + while enum.hasMoreElements(): + paragraphs.append(enum.nextElement()) + if not paragraph_indices: + paragraph_indices = range(len(paragraphs)) + for idx in paragraph_indices: + if 0 <= idx < len(paragraphs): + paragraph = paragraphs[idx] + cursor = text.createTextCursorByRange(paragraph) + cursor.CharFontName = font_name + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def set_line_spacing(cls, spacing_value, paragraph_indices=None): + """ + Sets the line spacing for specified paragraphs in the document. + + Args: + spacing_value (float): The line spacing value to apply (1.0 for single spacing, 2.0 for double spacing, etc.) + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing). + If not provided, applies to all paragraphs. + """ + try: + text = cls.doc.getText() + paragraph_enum = text.createEnumeration() + line_spacing_value = int(spacing_value * 100) + current_index = 0 + + while paragraph_enum.hasMoreElements(): + paragraph = paragraph_enum.nextElement() + + if not paragraph_indices or current_index in paragraph_indices: + line_spacing = uno.createUnoStruct("com.sun.star.style.LineSpacing") + line_spacing.Mode = 0 + line_spacing.Height = line_spacing_value + paragraph.ParaLineSpacing = line_spacing + + if paragraph.String.strip(): + current_index += 1 + + cls.ret = "Success" + return True + except Exception as e: + cls.ret = f"Error: {str(e)}" + return False + + @classmethod + def remove_highlighting(cls, paragraph_indices=None): + """ + Removes ALL highlighting from text in the document for specified paragraphs. + + Args: + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing). + If not provided, applies to all paragraphs. + + Returns: + str: Success message or error message + """ + try: + text = cls.doc.getText() + paragraphs = text.createEnumeration() + target_indices = set(paragraph_indices) if paragraph_indices else None + current_index = 0 + + while paragraphs.hasMoreElements(): + paragraph = paragraphs.nextElement() + if target_indices is None or current_index in target_indices: + if paragraph.supportsService("com.sun.star.text.Paragraph"): + para_cursor = text.createTextCursorByRange(paragraph) + # Remove all highlighting by setting back color to -1 + para_cursor.CharBackColor = -1 + + # Additional cleanup for individual text portions (optional) + text_portions = paragraph.createEnumeration() + while text_portions.hasMoreElements(): + text_portion = text_portions.nextElement() + if hasattr(text_portion, "CharBackColor"): + portion_cursor = text.createTextCursorByRange(text_portion) + portion_cursor.CharBackColor = -1 + current_index += 1 + + cls.ret = "Successfully removed all highlighting" + return cls.ret + except Exception as e: + cls.ret = f"Error removing highlighting: {str(e)}" + return cls.ret + + @classmethod + def find_highlighted_text(cls, highlight_color): + """ + Finds all text in the document that has a specific highlight color applied to it. + + Args: + highlight_color (str): The highlight color to search for. Can be a color name (e.g., 'yellow', 'green') or hex code. + + Returns: + list: A list of strings containing all text segments with the specified highlight color. + """ + color_map = { + "yellow": 16776960, + "green": 65280, + "blue": 255, + "red": 16711680, + "cyan": 65535, + "magenta": 16711935, + "black": 0, + "white": 16777215, + "gray": 8421504, + "lightgray": 12632256, + } + target_color = None + if highlight_color.lower() in color_map: + target_color = color_map[highlight_color.lower()] + elif highlight_color.startswith("#") and len(highlight_color) == 7: + try: + hex_color = highlight_color[1:] + r = int(hex_color[0:2], 16) + g = int(hex_color[2:4], 16) + b = int(hex_color[4:6], 16) + target_color = (r << 16) + (g << 8) + b + except ValueError: + cls.ret = f"Invalid hex color format: {highlight_color}" + return [] + else: + cls.ret = f"Unsupported color format: {highlight_color}" + return [] + highlighted_text = [] + text = cls.doc.getText() + enum_paragraphs = text.createEnumeration() + while enum_paragraphs.hasMoreElements(): + paragraph = enum_paragraphs.nextElement() + if paragraph.supportsService("com.sun.star.text.Paragraph"): + enum_portions = paragraph.createEnumeration() + while enum_portions.hasMoreElements(): + text_portion = enum_portions.nextElement() + if hasattr(text_portion, "CharBackColor") and text_portion.CharBackColor == target_color: + if text_portion.getString().strip(): + highlighted_text.append(text_portion.getString()) + cls.ret = f"Found {len(highlighted_text)} text segments with highlight color {highlight_color}" + return highlighted_text + + @classmethod + def insert_formula_at_cursor(cls, formula): + """ + Inserts a formula at the current cursor position in the document. + + Args: + formula (str): The formula to insert at the current cursor position. + + Returns: + bool: True if successful, False otherwise + """ + try: + embedded_obj = cls.doc.createInstance("com.sun.star.text.TextEmbeddedObject") + embedded_obj.setPropertyValue("CLSID", "078B7ABA-54FC-457F-8551-6147e776a997") + embedded_obj.setPropertyValue("AnchorType", AS_CHARACTER) + cls.text.insertTextContent(cls.cursor, embedded_obj, False) + math_obj = embedded_obj.getEmbeddedObject() + math_obj.Formula = formula + cls.ret = "Formula inserted successfully" + return True + except Exception as e: + cls.ret = f"Error inserting formula: {str(e)}" + return False + + @classmethod + def insert_image_at_cursor(cls, image_path, width=None, height=None): + """ + Inserts an image at the current cursor position in the document. + + Args: + image_path (str): Full path to the image file to insert + width (int, optional): Width to display the image in pixels + height (int, optional): Height to display the image in pixels + + Returns: + str: Success message or error message + """ + try: + if image_path.startswith("~"): + image_path = os.path.expanduser(image_path) + if not os.path.exists(image_path): + cls.ret = f"Error: Image file not found at {image_path}" + return cls.ret + image_path = os.path.abspath(image_path) + if os.name == "nt": + file_url = "file:///" + image_path.replace("\\", "/") + else: + file_url = "file://" + image_path + graphic = cls.doc.createInstance("com.sun.star.text.GraphicObject") + graphic.GraphicURL = file_url + graphic.AnchorType = AS_CHARACTER + if width is not None: + graphic.Width = width * 100 + if height is not None: + graphic.Height = height * 100 + cls.text.insertTextContent(cls.cursor, graphic, False) + cls.ret = "Success: Image inserted" + return cls.ret + except Exception as e: + cls.ret = f"Error: {str(e)}" + return cls.ret + + @classmethod + def set_strikethrough(cls, pattern, paragraph_indices=None): + """ + Sets the strikethrough formatting for text matching the specified pattern in the document. + + Args: + pattern (str): The regular expression pattern to match in the document + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing). + If not provided, applies to all paragraphs. + + Returns: + str: Success message or error information + """ + try: + paragraphs = cls.doc.getText().createEnumeration() + para_index = 0 + found_matches = 0 + while paragraphs.hasMoreElements(): + paragraph = paragraphs.nextElement() + if paragraph.supportsService("com.sun.star.text.Paragraph"): + if paragraph_indices and para_index not in paragraph_indices: + para_index += 1 + continue + para_text = paragraph.getString() + matches = list(re.finditer(pattern, para_text)) + for match in matches: + text_range = paragraph.getStart() + cursor = cls.doc.getText().createTextCursorByRange(text_range) + cursor.goRight(match.start(), False) + cursor.goRight(match.end() - match.start(), True) + cursor.CharStrikeout = 1 + found_matches += 1 + para_index += 1 + cls.ret = f"Successfully applied strikethrough to {found_matches} matches of pattern: {pattern}" + return cls.ret + except Exception as e: + cls.ret = f"Error applying strikethrough: {str(e)}" + return cls.ret + + @classmethod + def set_font_size(cls, font_size, pattern, paragraph_indices=None): + """ + Changes the font size of specified text in the document. + + Args: + font_size (float): The font size to apply (in points). + pattern (str): The pattern to match in the document, should be a regular expression. + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing). + If not provided, applies to all paragraphs. + + Returns: + str: Result message indicating success or failure. + """ + try: + regex = re.compile(pattern) + paragraphs = cls.doc.getText().createEnumeration() + current_index = 0 + while paragraphs.hasMoreElements(): + paragraph = paragraphs.nextElement() + if paragraph_indices and current_index not in paragraph_indices: + current_index += 1 + continue + if paragraph.supportsService("com.sun.star.text.Paragraph"): + para_cursor = cls.text.createTextCursorByRange(paragraph) + para_text = paragraph.getString() + matches = list(regex.finditer(para_text)) + for match in reversed(matches): + start_pos = match.start() + end_pos = match.end() + para_cursor.gotoStart(False) + para_cursor.goRight(start_pos, False) + para_cursor.goRight(end_pos - start_pos, True) + para_cursor.CharHeight = font_size + current_index += 1 + cls.ret = f"Successfully changed font size to {font_size} for text matching '{pattern}'" + return cls.ret + except Exception as e: + cls.ret = f"Error changing font size: {str(e)}" + return cls.ret + + @classmethod + def export_to_pdf(cls, output_path=None, output_filename=None, include_comments=False, quality="standard"): + """ + Exports the current document to PDF format. + + Args: + output_path (str, optional): The full path where the PDF should be saved. + If not provided, uses the same location as the original document. + output_filename (str, optional): The filename to use for the PDF. + If not provided, uses the original document's filename with .pdf extension. + include_comments (bool, optional): Whether to include comments in the exported PDF. + Defaults to False. + quality (str, optional): The quality of the PDF export ('standard', 'high', 'print'). + Defaults to 'standard'. + + Returns: + str: Path to the exported PDF file or error message + """ + try: + doc_url = cls.doc.getURL() + if not doc_url and not output_path: + return "Error: Document has not been saved and no output path provided" + if doc_url: + doc_path = uno.fileUrlToSystemPath(os.path.dirname(doc_url)) + doc_filename = os.path.basename(doc_url) + doc_name = os.path.splitext(doc_filename)[0] + else: + doc_path = "" + doc_name = "export" + final_path = output_path if output_path else doc_path + final_filename = output_filename if output_filename else f"{doc_name}.pdf" + if not final_filename.lower().endswith(".pdf"): + final_filename += ".pdf" + full_output_path = os.path.join(final_path, final_filename) + output_url = uno.systemPathToFileUrl(full_output_path) + export_props = [] + if quality == "high": + export_props.append(PropertyValue(Name="SelectPdfVersion", Value=1)) + elif quality == "print": + export_props.append(PropertyValue(Name="SelectPdfVersion", Value=2)) + else: + export_props.append(PropertyValue(Name="SelectPdfVersion", Value=0)) + export_props.append(PropertyValue(Name="ExportNotes", Value=include_comments)) + export_props.extend( + [ + PropertyValue(Name="FilterName", Value="writer_pdf_Export"), + PropertyValue(Name="Overwrite", Value=True), + ] + ) + cls.doc.storeToURL(output_url, tuple(export_props)) + cls.ret = f"PDF exported to: {full_output_path}" + return full_output_path + except Exception as e: + cls.ret = f"Error exporting to PDF: {str(e)}" + return cls.ret + + @classmethod + def set_paragraph_alignment(cls, alignment, paragraph_indices=None): + """ + Sets the text alignment for specified paragraphs in the document. + + Args: + alignment (str): The alignment to apply ('left', 'center', 'right', 'justify'). + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing). + If not provided, applies to all paragraphs. + + Returns: + str: Success message or error message + """ + try: + alignment_map = {"left": LEFT, "center": CENTER, "right": RIGHT, "justify": 3} + if alignment.lower() not in alignment_map: + cls.ret = f"Error: Invalid alignment '{alignment}'. Use 'left', 'center', 'right', or 'justify'." + return cls.ret + alignment_value = alignment_map[alignment.lower()] + text = cls.doc.getText() + paragraph_enum = text.createEnumeration() + paragraphs = [] + while paragraph_enum.hasMoreElements(): + paragraph = paragraph_enum.nextElement() + if paragraph.supportsService("com.sun.star.text.Paragraph"): + paragraphs.append(paragraph) + if paragraph_indices: + valid_indices = [i for i in paragraph_indices if 0 <= i < len(paragraphs)] + if len(valid_indices) != len(paragraph_indices): + cls.ret = f"Warning: Some paragraph indices were out of range (0-{len(paragraphs) - 1})" + for idx in valid_indices: + paragraphs[idx].ParaAdjust = alignment_value + else: + for paragraph in paragraphs: + paragraph.ParaAdjust = alignment_value + cls.ret = f"Successfully applied '{alignment}' alignment to paragraphs" + return cls.ret + except Exception as e: + cls.ret = f"Error setting paragraph alignment: {str(e)}" + return cls.ret + + @classmethod + def capitalize_words(cls, paragraph_indices=None): + """ + Capitalizes the first letter of each word for specified paragraphs in the document. + + Args: + paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing). + If not provided, applies to all paragraphs. + + Returns: + str: Success message or error message + """ + try: + text = cls.doc.getText() + enum = text.createEnumeration() + paragraphs = [] + while enum.hasMoreElements(): + paragraph = enum.nextElement() + if paragraph.supportsService("com.sun.star.text.Paragraph"): + paragraphs.append(paragraph) + if not paragraph_indices: + target_paragraphs = list(range(len(paragraphs))) + else: + target_paragraphs = paragraph_indices + valid_indices = [idx for idx in target_paragraphs if 0 <= idx < len(paragraphs)] + for idx in valid_indices: + paragraph = paragraphs[idx] + text_content = paragraph.getString() + if not text_content.strip(): + continue + capitalized_text = " ".join(word.capitalize() if word else "" for word in text_content.split(" ")) + para_cursor = text.createTextCursorByRange(paragraph.getStart()) + para_cursor.gotoRange(paragraph.getEnd(), True) + para_cursor.setString(capitalized_text) + cls.ret = f"Successfully capitalized words in {len(valid_indices)} paragraphs" + return cls.ret + except Exception as e: + cls.ret = f"Error capitalizing words: {str(e)}" + return cls.ret + + @classmethod + def set_default_font(cls, font_name, font_size=None): + """ + Sets the default font for new text in the document without changing existing text. + + Args: + font_name (str): The name of the font to set as default (e.g., 'Times New Roman', 'Arial', 'Calibri') + font_size (float, optional): The default font size in points. + + Returns: + str: Success message or error message + """ + try: + style_families = cls.doc.getStyleFamilies() + paragraph_styles = style_families.getByName("ParagraphStyles") + default_style_names = ["Default", "Standard", "Normal"] + standard_style = None + for style_name in default_style_names: + if paragraph_styles.hasByName(style_name): + standard_style = paragraph_styles.getByName(style_name) + break + if standard_style is None: + style_names = paragraph_styles.getElementNames() + if style_names: + standard_style = paragraph_styles.getByName(style_names[0]) + else: + raise Exception("Could not find default paragraph style") + standard_style.setPropertyValue("CharFontName", font_name) + standard_style.setPropertyValue("CharFontNameAsian", font_name) + standard_style.setPropertyValue("CharFontNameComplex", font_name) + if font_size is not None: + standard_style.setPropertyValue("CharHeight", float(font_size)) + standard_style.setPropertyValue("CharHeightAsian", float(font_size)) + standard_style.setPropertyValue("CharHeightComplex", float(font_size)) + cls.cursor.setPropertyValue("CharFontName", font_name) + cls.cursor.setPropertyValue("CharFontNameAsian", font_name) + cls.cursor.setPropertyValue("CharFontNameComplex", font_name) + if font_size is not None: + cls.cursor.setPropertyValue("CharHeight", float(font_size)) + cls.cursor.setPropertyValue("CharHeightAsian", float(font_size)) + cls.cursor.setPropertyValue("CharHeightComplex", float(font_size)) + cls.ret = f"Default font set to '{font_name}'" + (f" with size {font_size}pt" if font_size else "") + return cls.ret + except Exception as e: + cls.ret = f"Error setting default font: {str(e)}" + return cls.ret + + @classmethod + def add_page_numbers(cls, position, start_number=1, format=None): + """ + Adds page numbers to the document at the specified position. + + Args: + position (str): Position of the page numbers ('bottom_left', 'bottom_center', 'bottom_right', + 'top_left', 'top_center', 'top_right') + start_number (int, optional): The starting page number. Defaults to 1. + format (str, optional): Format of the page numbers (e.g., '1', 'Page 1', '1 of N'). + Defaults to simple number format. + + Returns: + str: Success message or error message + """ + try: + page_styles = cls.doc.StyleFamilies.getByName("PageStyles") + default_style = page_styles.getByName("Standard") + try: + default_style.setPropertyValue("PageNumberOffset", start_number) + except: + pass + if position.startswith("top"): + default_style.HeaderIsOn = True + target = default_style.HeaderText + else: + default_style.FooterIsOn = True + target = default_style.FooterText + cursor = target.createTextCursor() + cursor.gotoStart(False) + cursor.gotoEnd(True) + cursor.setString("") + cursor.gotoStart(False) + if position.endswith("_left"): + cursor.ParaAdjust = LEFT + elif position.endswith("_center"): + cursor.ParaAdjust = CENTER + elif position.endswith("_right"): + cursor.ParaAdjust = RIGHT + if not format or format == "1": + page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber") + page_number.NumberingType = 4 + target.insertTextContent(cursor, page_number, False) + elif format == "Page 1" or "Page" in format and "of" not in format: + target.insertString(cursor, "Page ", False) + page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber") + page_number.NumberingType = 4 + target.insertTextContent(cursor, page_number, False) + elif format == "1 of N" or format == "Page {page} of {total}" or "of" in format: + if "Page" in format: + target.insertString(cursor, "Page ", False) + page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber") + page_number.NumberingType = 4 + target.insertTextContent(cursor, page_number, False) + target.insertString(cursor, " of ", False) + page_count = cls.doc.createInstance("com.sun.star.text.TextField.PageCount") + page_count.NumberingType = 4 + target.insertTextContent(cursor, page_count, False) + else: + page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber") + page_number.NumberingType = 4 + target.insertTextContent(cursor, page_number, False) + cls.ret = "Successfully added page numbers" + return cls.ret + except Exception as e: + cls.ret = f"Error adding page numbers: {str(e)}" + return cls.ret + + @classmethod + def insert_page_break(cls, position="at_cursor"): + """ + Inserts a page break at the specified position. + + Args: + position (str): Where to insert the page break: 'at_cursor' for current cursor position, + 'end_of_document' for end of document. Defaults to 'at_cursor'. + """ + try: + if position == "end_of_document": + cls.cursor.gotoEnd(False) + cls.text.insertControlCharacter(cls.cursor, PARAGRAPH_BREAK, False) + cls.cursor.gotoStartOfParagraph(True) + cls.cursor.BreakType = uno.Enum("com.sun.star.style.BreakType", "PAGE_BEFORE") + cls.ret = "Page break inserted successfully" + return True + except Exception as e: + cls.ret = f"Error inserting page break: {str(e)}" + return False diff --git a/mm_agents/autoglm_v/tools/package/vlc.py b/mm_agents/autoglm_v/tools/package/vlc.py new file mode 100644 index 00000000..c3a14fc4 --- /dev/null +++ b/mm_agents/autoglm_v/tools/package/vlc.py @@ -0,0 +1,233 @@ +import json +import os +import re +import xml.etree.ElementTree as ET +from pathlib import Path +from urllib.parse import quote + +import requests +from requests.auth import HTTPBasicAuth + + +class VLCTools: + host = "localhost" + port = 8080 + base_url = f"http://{host}:{port}/requests" + password = "password" + auth = HTTPBasicAuth("", password) + ret = "" + + @classmethod + def print_result(cls): + print(cls.ret) + + @classmethod + def _make_request(cls, endpoint, params=None): + url = f"{cls.base_url}/{endpoint}" + try: + response = requests.get(url, params=params, auth=cls.auth) + response.raise_for_status() + return response + except requests.exceptions.RequestException as e: + return None + + @classmethod + def _get_status(cls): + response = cls._make_request("status.xml") + if response: + return ET.fromstring(response.content) + return None + + @classmethod + def env_info(cls): + cls.ret = "None" + + @classmethod + def get_playlist(cls): + response = cls._make_request("playlist.xml") + if response: + info = ET.fromstring(response.content) + playlist_node = info.find('.//node[@name="Playlist"]') + if playlist_node is not None: + playlist_items = [] + for leaf in playlist_node.findall("leaf"): + item = {"name": leaf.get("name"), "uri": leaf.get("uri"), "duration": leaf.get("duration") + "s"} + playlist_items.append(item) + cls.ret = f"Playlist: {playlist_items}" + return cls.ret + cls.ret = "Error getting playlist" + return None + + @classmethod + def play(cls): + response = cls._make_request("status.xml", {"command": "pl_play"}) + if response: + cls.ret = "Start playing the media" + return cls.ret + cls.ret = "Error playing the media" + return None + + @classmethod + def pause(cls): + response = cls._make_request("status.xml", {"command": "pl_pause"}) + if response: + cls.ret = "Pause the media" + return cls.ret + cls.ret = "Error pausing the media" + return None + + @classmethod + def next(cls): + response = cls._make_request("status.xml", {"command": "pl_next"}) + if response: + cls.ret = "Switch to next media" + return cls.ret + cls.ret = "Error switching to next media" + return None + + @classmethod + def previous(cls): + response = cls._make_request("status.xml", {"command": "pl_previous"}) + if response: + cls.ret = "Switch to previous media" + return cls.ret + cls.ret = "Error switching to previous media" + return None + + @classmethod + def add_to_playlist(cls, uri): + if uri.startswith("http"): + encoded_uri = uri + else: + encoded_uri = "file://" + quote(uri.replace("file://", "")) + + response = cls._make_request("status.xml", {"command": "in_play", "input": encoded_uri}) + if response: + cls.ret = f"Add {uri} to playlist" + return cls.ret + cls.ret = f"Error adding {uri} to playlist" + return None + + @classmethod + def get_current_time(cls): + status = cls._get_status() + if status is not None: + time = status.find("time") + cls.ret = int(time.text) if time is not None else None + return cls.ret + return None + + @classmethod + def get_media_duration(cls): + status = cls._get_status() + if status is not None: + length = status.find("length") + if length is not None: + cls.ret = f"Media duration: {length.text} seconds" + return cls.ret + cls.ret = "Error getting media duration" + return None + + @classmethod + def get_settings(cls): + settings = {} + with open(Path.home() / ".config/vlc/vlcrc", "r") as f: + for line in f: + if line: + try: + key, value = line.split("=") + if key.strip().startswith("#"): + continue + settings[key.strip()] = value.strip() + except: + continue + cls.ret = json.dumps(settings, indent=4, ensure_ascii=False) + return cls.ret + + @classmethod + def set_settings(cls, field, value): + with open(Path.home() / ".config/vlc/vlcrc", "r") as rf: + settings = rf.read() + + # 正则表达式匹配settings中的field项并替换 + pattern = re.compile(r"#? *" + re.escape(field) + r"=.*") + # 判断是否存在field项 + if pattern.search(settings): + settings = pattern.sub(f"{field}={value}", settings) + else: + settings += f"{field}={value}\n" + + with open(Path.home() / ".config/vlc/vlcrc", "w") as wf: + wf.write(settings) + + cls.ret = f"Set {field} to {value}" + return cls.ret + + @classmethod + def toggle_fullscreen(cls, enable=None): + """ + Toggle fullscreen mode or set it explicitly based on the enable parameter. + + Args: + enable (bool, optional): If provided, explicitly set fullscreen mode (True for fullscreen, False for windowed) + + Returns: + str: Success or error message + """ + if enable is not None: + command = "fullscreen" if enable else "fullscreen off" + else: + command = "fullscreen" + response = cls._make_request("status.xml", {"command": command}) + if response: + action = "enabled" if enable is True else "disabled" if enable is False else "toggled" + cls.ret = f"Fullscreen mode {action}" + return cls.ret + cls.ret = "Error changing fullscreen mode" + return None + + @classmethod + def get_media_files(cls, path, suffix=None): + """ + Gets the media files for the specified path. + + Args: + path (str): The path to the media files + suffix (List[str], optional): The suffix of the media files. + Defaults to ['mp4', 'avi', 'mkv', 'mov', 'mp3', 'm4a', 'wav'] + """ + # Set default suffix if not provided + if suffix is None: + suffix = ["mp4", "avi", "mkv", "mov", "mp3", "m4a", "wav"] + + # Validate path + if not path: + cls.ret = "Path cannot be empty" + return None + + if not os.path.exists(path): + cls.ret = f"Path not found: {path}" + return None + + # Initialize result list + media_files = [] + + # Convert suffix list to lowercase for case-insensitive comparison + suffix = [s.lower() for s in suffix] + + # Walk through directory + try: + for root, _, files in os.walk(path): + for file in files: + # Check if file extension matches any of the specified suffixes + if any(file.lower().endswith(f".{s}") for s in suffix): + # Add full path of the file to results + full_path = os.path.join(root, file) + media_files.append(full_path) + + except Exception as e: + cls.ret = f"Error while scanning directory: {str(e)}" + return None + + cls.ret = media_files + return cls.ret diff --git a/run_autoglm_v.py b/run_autoglm_v.py new file mode 100644 index 00000000..31794ea9 --- /dev/null +++ b/run_autoglm_v.py @@ -0,0 +1,608 @@ +"""Script to run end-to-end evaluation on the benchmark. +Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py. +""" + +import argparse +import datetime +import json +import logging +import os +import sys +import math +import ast +import time +import backoff +import httpx +import requests +from openai import APIConnectionError, APIError, RateLimitError +from requests.exceptions import SSLError +from tqdm import tqdm + +import lib_run_single +from desktop_env.desktop_env import MAX_RETRIES, DesktopEnv as DesktopEnvBase +from mm_agents.autoglm_v import AutoGLMAgent +from typing import Optional, Dict, Any +from openai import OpenAI + +# Almost deprecated since it's not multi-env, use run_multienv_*.py instead + +# Logger Configs {{{ # +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) + +datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S") + +file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8") +debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8") +stdout_handler = logging.StreamHandler(sys.stdout) +sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8") + +file_handler.setLevel(logging.INFO) +debug_handler.setLevel(logging.DEBUG) +stdout_handler.setLevel(logging.INFO) +sdebug_handler.setLevel(logging.DEBUG) + +formatter = logging.Formatter( + fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s" +) +file_handler.setFormatter(formatter) +debug_handler.setFormatter(formatter) +stdout_handler.setFormatter(formatter) +sdebug_handler.setFormatter(formatter) + +stdout_handler.addFilter(logging.Filter("desktopenv")) +sdebug_handler.addFilter(logging.Filter("desktopenv")) + +logger.addHandler(file_handler) +logger.addHandler(debug_handler) +logger.addHandler(stdout_handler) +logger.addHandler(sdebug_handler) +# }}} Logger Configs # + +logger = logging.getLogger("desktopenv.experiment") + + +def config() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run end-to-end evaluation on the benchmark") + + # environment config + parser.add_argument("--path_to_vm", type=str) + parser.add_argument( + "--provider_name", + type=str, + default="docker", + help="Virtualization provider (vmware, docker, aws, azure, gcp, virtualbox)", + ) + parser.add_argument("--headless", action="store_true", default=True, help="Run in headless machine") + parser.add_argument("--action_space", type=str, default="autoglm_computer_use", help="Action type") + parser.add_argument( + "--observation_type", + choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"], + default="a11y_tree", + help="Observation type", + ) + parser.add_argument("--screen_width", type=int, default=1920) + parser.add_argument("--screen_height", type=int, default=1080) + parser.add_argument("--sleep_after_execution", type=float, default=1.0) + parser.add_argument("--max_steps", type=int, default=50) + + # agent config + parser.add_argument("--max_trajectory_length", type=int, default=3) + parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples/examples") + + # lm config + parser.add_argument("--model", type=str, default="autoglm-os") + parser.add_argument("--temperature", type=float, default=0.4) + parser.add_argument("--top_p", type=float, default=0.5) + parser.add_argument("--max_tokens", type=int, default=4096) + parser.add_argument("--stop_token", type=str, default=None) + parser.add_argument("--image_width", type=int, default=1280) + parser.add_argument("--image_height", type=int, default=720) + + # example config + parser.add_argument("--domain", type=str, default="all") + parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json") + + # aws config + parser.add_argument( + "--region", type=str, default="us-east-1", help="AWS region for the VM" + ) + parser.add_argument( + "--client_password", type=str, default="", help="Client password" + ) + + # logging related + parser.add_argument("--result_dir", type=str, default="./results") + args = parser.parse_args() + + return args + + +class DesktopEnv(DesktopEnvBase): + def step(self, action, pause=2): + self._step_no += 1 + self.action_history.append(action) + + # Mark environment as used when step is called + self.is_environment_used = True + + reward = 0 # todo: Define reward calculation for each example + done = False # todo: Define episode termination condition for each example + info = {} + logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}") + + # handle the special actions + if action in ['WAIT', 'FAIL', 'DONE']: + if action == 'WAIT': + time.sleep(pause) + exe_result = 'Wait ' + str(pause) + ' seconds' + elif action == 'FAIL': + done = True + info = {"fail": True} + exe_result = 'Finish: fail' + elif action == 'DONE': + done = True + info = {"done": True} + exe_result = 'Finish: success' + elif type(action) == dict: + if action['action_type'] == 'OPEN_APP': + self.setup_controller._launch_setup(action['parameters']['launch_app_command'], shell=True) + exe_result = 'Open ' + action['parameters']['app_name'] + elif action['action_type'] == 'OPEN_CHROME_TAB': + self.setup_controller._chrome_open_tabs_setup(action['parameters']['urls_to_open']) + exe_result = 'Open ' + str(action['parameters']['urls_to_open']) + ' in Chrome successfully' + else: + # the set of all possible python commands insides `pyautogui` + result = self.controller.execute_python_command(action) + try: + if result['error']: + exe_result = result['error'].strip() + else: + exe_result = result['output'].strip() + except Exception as e: + exe_result = 'Error Action: ' + action + logger.error(f"Error executing action: {e}") + + time.sleep(pause) + observation = self._get_obs() + observation['exe_result'] = exe_result + + return observation, reward, done, info + + def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]: + # Reset to certain task in OSWorld + logger.info("Resetting environment...") + logger.info("Switching task...") + logger.info("Setting counters...") + self._traj_no += 1 + self._step_no = 0 + self.action_history.clear() + + for attempt in range(MAX_RETRIES): + # Only revert to snapshot if environment has been used (step/setup) + # This optimization is especially important for cloud providers like AWS + # where unnecessary snapshot operations are costly and time-consuming + + if task_config is not None: + # Only consider task proxy requirement if proxy is enabled at system level + task_use_proxy = task_config.get("proxy", False) and self.enable_proxy + if not self.enable_proxy and task_config.get("proxy", False): + logger.info("Task requires proxy but proxy is disabled at system level, ignoring proxy requirement.") + + if task_use_proxy != self.current_use_proxy: + # keep because get_info_from_website depend on this + self.current_use_proxy = task_use_proxy + + if self.is_environment_used: + logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name)) + self._revert_to_snapshot() + logger.info("Starting emulator...") + self._start_emulator() + logger.info("Emulator started.") + # Reset the usage flag after reverting + self.is_environment_used = False + else: + logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name)) + + if task_config is not None: + if task_config.get("proxy", False) and self.enable_proxy: + # If using proxy and proxy is enabled, set up the proxy configuration + self.setup_controller._proxy_setup(self.client_password) + self._set_task_info(task_config) + self.setup_controller.reset_cache_dir(self.cache_dir) + logger.info("Setting up environment...") + success = self.setup_controller.setup(self.config, task_config.get("proxy", False) and self.enable_proxy) + if success: + # Mark environment as used when setup is successfully executed + if self.config: # Only mark as used if there were actual setup operations + self.is_environment_used = True + break + else: + logger.error( + "Environment setup failed, retrying (%d/%d)...", + attempt + 1, + MAX_RETRIES, + ) + time.sleep(5) + else: + break + + logger.info("Environment setup complete.") + + # Upload tools from autoglm package + import mm_agents.autoglm_v + tool_dir = os.path.join(os.path.dirname(mm_agents.autoglm_v.__file__), 'tools', 'package') + for file in os.listdir(tool_dir): + if os.path.isdir(os.path.join(tool_dir, file)): + continue + self.setup_controller._upload_file_setup([{ + "local_path": os.path.join(tool_dir, file), + "path": os.path.join('~', file) + }]) + + # start soffice service for office tools + self.setup_controller._launch_setup('soffice --accept="socket,host=localhost,port=2002;urp;" --norestore --nologo --nodefault', shell=True) + time.sleep(5) + + observation = self._get_obs() + return observation + + def get_current_apps(self): + apps_code = r"""import subprocess; +command = "wmctrl -xl"; +apps = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip().split('\n'); +print(apps);""" + window_code = r"""import subprocess; +command = "wmctrl -a :ACTIVE: -v 2>&1 | grep 'Using window' | awk '{print $3}'"; +window_id = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip(); +print(window_id);""" + + apps = self.controller.execute_python_command(apps_code)['output'].strip() + apps = ast.literal_eval(apps) + app_list = {} + + for app in apps: + parts = app.split(maxsplit=4) + if len(parts) < 4: + continue + if parts[1] != '0': + continue + window_id = parts[0] + app_name = '.'.join(parts[2].split('.')[-(math.ceil(parts[2].count('.') / 2)):]) + title = parts[3] + app_list[window_id] = { + 'app_name': app_name, + 'title': title + } + + cur_id = self.controller.execute_python_command(window_code)['output'].strip() + + return app_list, cur_id + + def maximize_window(self): + window_state = r"""import subprocess; +command = "xprop -id $(xprop -root _NET_ACTIVE_WINDOW | awk -F' ' '{print $5}') _NET_WM_STATE" +output = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip(); +print(output);""" + for _ in range(5): + try: + self.setup_controller._launch_setup('wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz', shell=True) + time.sleep(2) + output = self.controller.execute_python_command(window_state)['output'].strip() + if '_NET_WM_STATE_FOCUSED' not in output or '_NET_WM_STATE_SKIP_TASKBAR' in output or '_NET_WM_STATE_MODAL' in output or '_NET_WM_STATE_MAXIMIZED' in output: # 没有窗口 or popups or 模态窗口 or 窗口已经最大化 + return + except Exception as e: + logger.error(f"Failed to maximize window: {e}") + time.sleep(1) + + def _get_obs(self): + tool_list = { + "libreoffice_calc": "CalcTools", + "libreoffice_impress": "ImpressTools", + "libreoffice_writer": "WriterTools", + "code": "CodeTools", + "vlc": "VLCTools", + "google_chrome": "BrowserTools" + } + + self.maximize_window() + + for i in range(3): + try: + app_list, cur_id = self.get_current_apps() + except Exception as e: + if i == 2: + raise e + logger.error(f"Failed to get current apps: {e}") + time.sleep(1) + + if cur_id in app_list: + cur_app = app_list[cur_id]['app_name'] + + tool_name = cur_app.strip().lower().replace('-', '_') + if tool_name in tool_list: + class_name = tool_list[tool_name] + command = f"from {tool_name} import *; " + command += f"{class_name}.env_info(); " + command += f"{class_name}.print_result();" + app_info = self.controller.execute_python_command(command)['output'].strip() + else: + app_info = None + else: + cur_app = None + app_info = None + + tree = self.controller.get_accessibility_tree() + screenshot = self.controller.get_screenshot() + if screenshot is None: + logger.error("Failed to get screenshot.") + screenshot = b'' + + return { + "screenshot": screenshot, + "accessibility_tree": tree, + "instruction": self.instruction, + "apps": app_list, + "cur_window_id": cur_id, + "cur_app": cur_app, + "app_info": app_info, + } + + +def test(args: argparse.Namespace, test_all_meta: dict) -> None: + scores = [] + max_steps = args.max_steps + + # log args + logger.info("Args: %s", args) + # set wandb project + cfg_args = { + "path_to_vm": args.path_to_vm, + "provider_name": args.provider_name, + "headless": args.headless, + "action_space": args.action_space, + "observation_type": args.observation_type, + "screen_width": args.screen_width, + "screen_height": args.screen_height, + "sleep_after_execution": args.sleep_after_execution, + "max_steps": args.max_steps, + "max_trajectory_length": args.max_trajectory_length, + "model": args.model, + "temperature": args.temperature, + "top_p": args.top_p, + "max_tokens": args.max_tokens, + "stop_token": args.stop_token, + "result_dir": args.result_dir, + } + + @backoff.on_exception( + backoff.constant, + (RateLimitError, APIConnectionError), + interval=0.1, + ) + def call_llm(messages): + logger.info("Calling LLM...") + + # Prepare the request data + data = { + "model": args.model, + "messages": messages, + "max_tokens": args.max_tokens, + "temperature": args.temperature, + "top_p": args.top_p, + "skip_special_tokens": False, + "stream": False, + "include_stop_str_in_output": True, + "stop": ["<|user|>", "<|observation|>", ""] + } + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY', '')}" + } + + # Get API base URL from environment or use default + base_url = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1') + url = f"{base_url}/chat/completions" + + response = requests.post( + url, + json=data, + headers=headers, + timeout=60.0 + ) + response.raise_for_status() + + result = response.json() + logger.info("LLM called successfully.") + return result['choices'][0]['message']['content'] + + env = DesktopEnv( + provider_name=args.provider_name, + region=args.region, + client_password=args.client_password, + path_to_vm=args.path_to_vm, + action_space=args.action_space, + screen_size=(args.screen_width, args.screen_height), + headless=args.headless, + os_type="Ubuntu", + require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"], + ) + agent = AutoGLMAgent( + action_space=args.action_space, + observation_type=args.observation_type, + screen_size=(args.screen_width, args.screen_height), + image_size=(args.image_width, args.image_height), + max_trajectory_length=args.max_trajectory_length, + client_password=args.client_password, + gen_func=call_llm, + ) + + for domain in tqdm(test_all_meta, desc="Domain"): + for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False): + config_file = os.path.join(args.test_config_base_dir, f"{domain}/{example_id}.json") + with open(config_file, "r", encoding="utf-8") as f: + example = json.load(f) + + logger.info(f"[Domain]: {domain}") + logger.info(f"[Example ID]: {example_id}") + + instruction = example["instruction"] + + logger.info(f"[Instruction]: {instruction}") + # wandb each example config settings + cfg_args["instruction"] = instruction + cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S") + + example_result_dir = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + domain, + example_id, + ) + os.makedirs(example_result_dir, exist_ok=True) + # example start running + try: + lib_run_single.run_single_example_autoglm( + agent, + env, + example, + max_steps, + instruction, + args, + example_result_dir, + scores, + ) + except Exception as e: + logger.error(f"Exception in {domain}/{example_id}: {e}") + # Only attempt to end recording if controller exists (not Docker provider) + if hasattr(env, "controller") and env.controller is not None: + env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) + with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: + f.write(json.dumps({"Error": f"Time limit exceeded in {domain}/{example_id}"})) + f.write("\n") + + env.close() + logger.info(f"Average score: {sum(scores) / len(scores)}") + + +def get_unfinished(action_space, use_model, observation_type, result_dir, total_file_json): + target_dir = os.path.join(result_dir, action_space, observation_type, use_model) + + if not os.path.exists(target_dir): + return total_file_json + + finished = {} + for domain in os.listdir(target_dir): + finished[domain] = [] + domain_path = os.path.join(target_dir, domain) + if os.path.isdir(domain_path): + for example_id in os.listdir(domain_path): + if example_id == "onboard": + continue + example_path = os.path.join(domain_path, example_id) + if os.path.isdir(example_path): + if "result.txt" not in os.listdir(example_path): + # empty all files under example_id + for file in os.listdir(example_path): + os.remove(os.path.join(example_path, file)) + else: + finished[domain].append(example_id) + + if not finished: + return total_file_json + + for domain, examples in finished.items(): + if domain in total_file_json: + total_file_json[domain] = [x for x in total_file_json[domain] if x not in examples] + + return total_file_json + + +def get_result(action_space, use_model, observation_type, result_dir, total_file_json): + target_dir = os.path.join(result_dir, action_space, observation_type, use_model) + if not os.path.exists(target_dir): + print("New experiment, no result yet.") + return None + + all_result = [] + + for domain in os.listdir(target_dir): + domain_path = os.path.join(target_dir, domain) + if os.path.isdir(domain_path): + for example_id in os.listdir(domain_path): + example_path = os.path.join(domain_path, example_id) + if os.path.isdir(example_path): + if "result.txt" in os.listdir(example_path): + result_path = os.path.join(example_path, "result.txt") + try: + with open(result_path, "r") as rf: + res = rf.read().strip() + if res.lower() == "true": + score = 1.0 + else: + score = float(res) + except Exception: + score = 0.0 + all_result.append(score) + + if not all_result: + print("New experiment, no result yet.") + return None + else: + print("Current Success Rate:", sum(all_result) / len(all_result) * 100, "%") + return all_result + + +if __name__ == "__main__": + ####### The complete version of the list of examples ####### + os.environ["TOKENIZERS_PARALLELISM"] = "false" + args = config() + if args.client_password == "": + if args.provider_name == "aws": + args.client_password = "osworld-public-evaluation" + else: + args.client_password = "password" + else: + args.client_password = args.client_password + + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) + + with open(args.test_all_meta_path, "r", encoding="utf-8") as f: + test_all_meta = json.load(f) + + if args.domain != "all": + test_all_meta = {args.domain: test_all_meta[args.domain]} + + test_file_list = get_unfinished( + args.action_space, + args.model, + args.observation_type, + args.result_dir, + test_all_meta, + ) + left_info = "" + for domain in test_file_list: + left_info += f"{domain}: {len(test_file_list[domain])}\n" + logger.info(f"Left tasks:\n{left_info}") + + get_result( + args.action_space, + args.model, + args.observation_type, + args.result_dir, + test_all_meta, + ) + test(args, test_file_list) diff --git a/run_multienv_autoglm_v.py b/run_multienv_autoglm_v.py new file mode 100644 index 00000000..3716050c --- /dev/null +++ b/run_multienv_autoglm_v.py @@ -0,0 +1,294 @@ +"""Script to run end-to-end evaluation on the benchmark. +Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py. +""" + +import argparse +import datetime +import json +import logging +import os +import sys +import math +import ast +import time +import backoff +import httpx +import requests +from tqdm import tqdm +from typing import Optional, Dict, Any +from multiprocessing import Pool +from openai import APIConnectionError, APIError, RateLimitError +from types import SimpleNamespace + +import lib_run_single +from run_autoglm_v import DesktopEnv, get_unfinished, get_result +from desktop_env.desktop_env import MAX_RETRIES, DesktopEnv as DesktopEnvBase +from mm_agents.autoglm_v import AutoGLMAgent +from openai import OpenAI + +logger = logging.getLogger("desktopenv.experiment") + +def config() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run end-to-end evaluation on the benchmark") + + # environment config + parser.add_argument("--path_to_vm", type=str) + parser.add_argument( + "--provider_name", + type=str, + default="docker", + help="Virtualization provider (vmware, docker, aws, azure, gcp, virtualbox)", + ) + parser.add_argument("--headless", action="store_true", default=True, help="Run in headless machine") + parser.add_argument("--action_space", type=str, default="autoglm_computer_use", help="Action type") + parser.add_argument( + "--observation_type", + choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"], + default="a11y_tree", + help="Observation type", + ) + parser.add_argument("--screen_width", type=int, default=1920) + parser.add_argument("--screen_height", type=int, default=1080) + parser.add_argument("--sleep_after_execution", type=float, default=1.0) + parser.add_argument("--max_steps", type=int, default=30) + + # agent config + parser.add_argument("--max_trajectory_length", type=int, default=3) + parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples/examples") + + # lm config + parser.add_argument("--model", type=str, default="autoglm-os") + parser.add_argument("--temperature", type=float, default=0.4) + parser.add_argument("--top_p", type=float, default=0.5) + parser.add_argument("--max_tokens", type=int, default=2048) + parser.add_argument("--stop_token", type=str, default=None) + parser.add_argument("--image_width", type=int, default=1280) + parser.add_argument("--image_height", type=int, default=720) + + # example config + parser.add_argument("--domain", type=str, default="all") + parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json") + + # aws config + parser.add_argument( + "--region", type=str, default="us-east-1", help="AWS region for the VM" + ) + parser.add_argument("--client_password", type=str, default="", help="Client password") + + # logging related + parser.add_argument("--result_dir", type=str, default="./results") + + # parallel number + parser.add_argument("--num_workers", type=int, default=20, help="Number of parallel workers") + args = parser.parse_args() + + return args + +def _worker_run(task): + domain, example_id, args = task # args 为 argparse.Namespace + logger = logging.getLogger("desktopenv.experiment") + try: + config_file = os.path.join(args.test_config_base_dir, f"{domain}/{example_id}.json") + with open(config_file, "r", encoding="utf-8") as f: + example = json.load(f) + instruction = example["instruction"] + + @backoff.on_exception(backoff.constant, (RateLimitError, APIConnectionError), interval=0.1) + def call_llm(messages): + logger.info("Calling LLM...") + + # Prepare the request data + data = { + "model": args.model, + "messages": messages, + "max_tokens": args.max_tokens, + "temperature": args.temperature, + "top_p": args.top_p, + "skip_special_tokens": False, + "stream": False, + "include_stop_str_in_output": True, + "stop": ["<|user|>", "<|observation|>", ""] + } + + # Set up proxy + # if os.environ.get('LAN_PROXY', None): + # proxies = { + # "http": os.environ.get('LAN_PROXY'), + # "https": os.environ.get('LAN_PROXY') + # } + # else: + # proxies = None + + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY', '')}" + } + + # Get API base URL from environment or use default + base_url = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1') + url = f"{base_url}/chat/completions" + + response = requests.post( + url, + json=data, + headers=headers, + # proxies=proxies, + timeout=60.0 + ) + response.raise_for_status() + + result = response.json() + logger.info("LLM called successfully.") + return result['choices'][0]['message']['content'] + + env = DesktopEnv( + provider_name=args.provider_name, + region=args.region, + client_password=args.client_password, + path_to_vm=args.path_to_vm, + action_space=args.action_space, + screen_size=(args.screen_width, args.screen_height), + headless=args.headless, + os_type="Ubuntu", + require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"], + ) + agent = AutoGLMAgent( + action_space=args.action_space, + observation_type=args.observation_type, + screen_size=(args.screen_width, args.screen_height), + image_size=(args.image_width, args.image_height), + max_trajectory_length=args.max_trajectory_length, + client_password=args.client_password, + gen_func=call_llm, + ) + + example_result_dir = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + domain, + example_id, + ) + os.makedirs(example_result_dir, exist_ok=True) + + local_scores = [] + try: + lib_run_single.run_single_example_autoglm( + agent, + env, + example, + args.max_steps, + instruction, + args, + example_result_dir, + local_scores, + ) + except Exception as e: + logger.error(f"[并发任务异常] {domain}/{example_id}: {e}") + if hasattr(env, "controller") and env.controller is not None: + try: + env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4")) + except Exception: + pass + with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f: + f.write(json.dumps({"Error": f"Exception in {domain}/{example_id}: {str(e)}"}) + "\n") + finally: + try: + env.close() + except Exception: + pass + + score = None + result_path = os.path.join(example_result_dir, "result.txt") + if os.path.exists(result_path): + try: + with open(result_path, "r") as rf: + res = rf.read().strip() + if res.lower() == "true": + score = 1.0 + else: + score = float(res) + except Exception: + score = 0.0 + else: + score = 0.0 + logger.info(f"[Finish] {domain}/{example_id} score={score}") + return (domain, example_id, score) + except Exception as e: + logger = logging.getLogger("desktopenv.experiment") + logger.error(f"[Initializing Fail] {domain}/{example_id}: {e}") + return (domain, example_id, 0.0) + +def test_parallel(args: argparse.Namespace, test_all_meta: dict): + tasks = [] + for domain in test_all_meta: + for example_id in test_all_meta[domain]: + tasks.append((domain, example_id, args)) + if not tasks: + logger.info("No pending tasks") + return + logger.info(f"Starting parallel execution: {args.num_workers} processes, {len(tasks)} tasks total") + + results = [] + with Pool(processes=args.num_workers) as pool: + for res in tqdm(pool.imap_unordered(_worker_run, tasks), total=len(tasks), desc="Parallel execution"): + results.append(res) + + scores = [s for (_, _, s) in results if s is not None] + if scores: + avg = sum(scores) / len(scores) + logger.info(f"Parallel execution completed. Average score: {avg}") + else: + logger.info("No scores obtained.") + +if __name__ == "__main__": + ####### The complete version of the list of examples ####### + os.environ["TOKENIZERS_PARALLELISM"] = "false" + args = config() + if args.client_password == "": + if args.provider_name == "aws": + args.client_password = "osworld-public-evaluation" + else: + args.client_password = "password" + else: + args.client_password = args.client_password + + # save args to json in result_dir/action_space/observation_type/model/args.json + path_to_args = os.path.join( + args.result_dir, + args.action_space, + args.observation_type, + args.model, + "args.json", + ) + os.makedirs(os.path.dirname(path_to_args), exist_ok=True) + with open(path_to_args, "w", encoding="utf-8") as f: + json.dump(vars(args), f, indent=4) + + with open(args.test_all_meta_path, "r", encoding="utf-8") as f: + test_all_meta = json.load(f) + + if args.domain != "all": + test_all_meta = {args.domain: test_all_meta[args.domain]} + + test_file_list = get_unfinished( + args.action_space, + args.model, + args.observation_type, + args.result_dir, + test_all_meta, + ) + left_info = "" + for domain in test_file_list: + left_info += f"{domain}: {len(test_file_list[domain])}\n" + logger.info(f"Left tasks:\n{left_info}") + + get_result( + args.action_space, + args.model, + args.observation_type, + args.result_dir, + test_all_meta, + ) + test_parallel(args, test_file_list)