Add autoglm-os-9b-v (#344)

* update for autoglm-v * Update run_autoglm.py --------- Co-authored-by: hanyullai <hanyullai@outlook.com>
2025-09-24 19:43:28 +08:00 · 2025-09-24 19:43:28 +08:00 · a4f8fe2f00
parent f59cf00cae
commit a4f8fe2f00
23 changed files with 8425 additions and 2 deletions
--- a/lib_run_single.py
+++ b/lib_run_single.py
@ -253,14 +253,20 @@ def run_single_example_autoglm(agent, env, example, max_steps, instruction, args
                    "screenshot_file": f"step_{step_idx + 1}_{action_timestamp}.png"
                }))
                f.write("\n")
+                
            if done:
                logger.info("The episode is done.")
                break
        
-        if not done: # not completed the task yet
-            env.action_history.append('FAIL')
+        # Invalid Action
+        if not actions:
+            obs = env._get_obs() # update observation
            
        step_idx += 1
+    
+    if not done: # not completed the task yet
+        env.action_history.append('FAIL')
+    
    result = env.evaluate()
    logger.info("Result: %.2f", result)
    scores.append(result)
--- a/mm_agents/autoglm_v/init.py
+++ b/mm_agents/autoglm_v/init.py
@ -0,0 +1,7 @@
+"""
+AutoGLM agent implementation
+"""
+
+from .main import AutoGLMAgent
+
+__all__ = ["AutoGLMAgent"]
--- a/mm_agents/autoglm_v/main.py
+++ b/mm_agents/autoglm_v/main.py
@ -0,0 +1,265 @@
+import logging
+import re
+from base64 import b64encode
+from PIL import Image
+from io import BytesIO
+from typing import Dict, List
+
+from .prompt.accessibility_tree_handle import linearize_accessibility_tree, trim_accessibility_tree
+from .prompt.grounding_agent import GroundingAgent as Agent
+from .tools.package.google_chrome import BrowserTools
+from .prompt.procedural_memory import Prompt
+
+logger = logging.getLogger("desktopenv.agent")
+
+pure_text_settings = ["a11y_tree"]
+
+def resize_image(image, w, h):
+    img = Image.open(BytesIO(image))
+    # resize to max_pixel_num max_pixels
+    img = img.resize((w, h))
+    buf = BytesIO()
+    img.save(buf, format='PNG')  # 指定保存格式，比如 PNG、JPEG
+    img_bytes = buf.getvalue()   # 得到 bytes 数据
+    return img_bytes
+
+def parse_code_from_string(input_string):
+    # input_string = "\n".join([line.strip() for line in input_string.split(';') if line.strip()])
+    if input_string.strip() in ["WAIT", "DONE", "FAIL"]:
+        return [input_string.strip()]
+
+    # This regular expression will match both ```code``` and ```python code```
+    # and capture the `code` part. It uses a non-greedy match for the content inside.
+    pattern = r"```(?:\w+\s+)?(.*?)```"
+    # Find all non-overlapping matches in the string
+    matches = re.findall(pattern, input_string, re.DOTALL)
+
+    # The regex above captures the content inside the triple backticks.
+    # The `re.DOTALL` flag allows the dot `.` to match newline characters as well,
+    # so the code inside backticks can span multiple lines.
+
+    # matches now contains all the captured code snippets
+
+    codes = []
+
+    for match in matches:
+        match = match.strip()
+        commands = ["WAIT", "DONE", "FAIL"]  # fixme: updates this part when we have more commands
+
+        if match in commands:
+            codes.append(match.strip())
+        elif match.split("\n")[-1] in commands:
+            if len(match.split("\n")) > 1:
+                codes.append("\n".join(match.split("\n")[:-1]))
+            codes.append(match.split("\n")[-1])
+        else:
+            codes.append(match)
+
+    return codes
+
+
+class AutoGLMAgent:
+    def __init__(
+        self,
+        action_space="autoglm_computer_use",
+        observation_type="a11y_tree",
+        max_trajectory_length=3,
+        a11y_tree_max_items=300,
+        with_image: bool = True,
+        screen_size = (1920, 1080),
+        image_size=(1920, 1080),
+        with_atree: bool = False,
+        glm41v_format: bool = True,
+        relative_coordinate: bool = True,
+        client_password="password",
+        gen_func=None,
+        tool_in_sys_msg: bool = True,
+    ):
+        self.action_space = action_space
+        self.observation_type = observation_type
+        assert action_space in ["autoglm_computer_use"], "Invalid action space"
+        assert observation_type in ["a11y_tree"], "Invalid observation type"
+        self.max_trajectory_length = max_trajectory_length
+        self.a11y_tree_max_items = a11y_tree_max_items
+        self.with_image = with_image
+        self.screen_size = screen_size
+        self.image_size = image_size
+        self.with_atree = with_atree
+        self.glm41v_format = glm41v_format
+        self.relative_coordinate = relative_coordinate
+        self.client_password = client_password
+        self.gen_func = gen_func
+        self.tool_in_sys_msg = tool_in_sys_msg
+
+        self.tool_list = {
+            "libreoffice_calc": "CalcTools",
+            "libreoffice_impress": "ImpressTools",
+            "libreoffice_writer": "WriterTools",
+            "code": "CodeTools",
+            "vlc": "VLCTools",
+            "google_chrome": "BrowserTools",
+        }
+        
+        Agent.relative_coordinate = relative_coordinate
+        
+        self.contents = []
+
+    @property
+    def turn_number(self):
+        return len(self.contents)
+
+    def prepare(self, instruction: str, obs: Dict, history: List, last_result: str = "") -> List:
+        """
+        Predict the next action(s) based on the current observation.
+        """
+        if "exe_result" in obs and not last_result:
+            last_result = obs["exe_result"]
+            if self.contents:
+                self.contents[-1]["exe_result"] = last_result
+
+        cur_app = obs["cur_app"]
+        logger.info(f"current app is {cur_app}")
+
+        if cur_app:
+            tool_name = cur_app.strip().lower().replace("-", "_")
+            tool_name = tool_name if tool_name in self.tool_list.keys() else None
+        else:
+            tool_name = None
+
+        setup_prompt, func_def_prompt, note_prompt = Prompt.construct_procedural_memory(
+            Agent, app_name=tool_name, client_password=self.client_password, with_image=self.with_image, with_atree=self.with_atree, relative_coordinate=self.relative_coordinate, glm41v_format=self.glm41v_format
+        )
+        if self.tool_in_sys_msg:
+            system_message = setup_prompt + "\n\n" + func_def_prompt + "\n\n" + note_prompt
+        else:
+            system_message = setup_prompt + "\n\n" + note_prompt
+        system_message += "\n\n**IMPORTANT** You are asked to complete the following task: {}".format(instruction)
+
+        messages = [
+            {
+                "role": "system",
+                "content": system_message,
+            }
+        ]
+        messages.extend(history)
+
+        if obs["apps"]:
+            app_str = "Window ID    App Name    Title\n"
+            for window_id, app in obs["apps"].items():
+                app_str += f"{window_id}    {app['app_name']}    {app['title']}\n"
+        else:
+            app_str = "None"
+
+        last_result = last_result.strip() if last_result else "None"
+        last_result = last_result[:2000] + "..." if len(last_result) > 2000 else last_result
+
+        tree = linearize_accessibility_tree(obs["accessibility_tree"], "Ubuntu")
+        tree = trim_accessibility_tree(tree, 300)
+
+        app_info = obs["app_info"].strip() if obs["app_info"] else "None"
+        app_info = app_info[:5000] + "..." if len(app_info) > 5000 else app_info
+
+        prompt = "* Apps: {}\n\n* Current App: {}{}\n\n* App Info: {}\n\n* Previous Action Result: {}".format(
+            app_str.strip(),
+            obs["cur_window_id"].strip() if obs["cur_window_id"] in app_str else "None",
+            '\n\n* A11y Tree: {}'.format(tree.strip()) if self.with_atree else "",
+            app_info,
+            last_result if last_result else "None",
+        ) + (
+            "\n\n" + func_def_prompt if not self.tool_in_sys_msg else ""
+        )
+
+        content = [{"type": "text", "text": prompt}]
+        if self.with_image and obs.get('screenshot'):
+            screenshot = resize_image(obs['screenshot'], self.image_size[0], self.image_size[1])
+            content = [
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/png;base64,{b64encode(screenshot).decode('utf-8')}",
+                        "detail": "high",
+                    },
+                }
+            ] + content
+
+        messages.append({"role": "user", "content": content})
+
+        return messages
+
+    def execute(self, response, obs):
+        try:
+            actions = parse_code_from_string(response)
+            action = actions[0]
+            logger.info(f"The pesudo action is {action}")
+
+            if "Agent." in action:
+                actions = [
+                    eval(action),
+                ]
+            elif "BrowserTools." in action:  # TODO: special check for BrowserTools
+                actions = [
+                    eval(action),
+                ]
+            else:
+                actions = Agent.tool_commands(action, obs["cur_app"].strip().replace("-", "_").lower())
+                logger.info(f"The grounded action is {actions[0]}")
+        except Exception as e:
+            print("Failed to parse action from response", e)
+            actions = []
+
+        return actions
+
+    def format_history(self, max_turns=30):
+        history = []
+        for ix in range(self.turn_number):
+            if ix == 0:
+                env_input = "**Environment State (Omitted)**"
+            else:
+                env_input = (
+                    f"**Environment State (Omitted)**\nPrevious Action Result: {self.contents[ix - 1]['exe_result']}"
+                )
+
+            env_input = env_input[:2000] + "..." if len(env_input) > 2000 else env_input
+            response = (
+                self.contents[ix]["response"][:1500] + "..."
+                if len(self.contents[ix]["response"]) > 1500
+                else self.contents[ix]["response"]
+            )
+            history.append({"role": "user", "content": [{"type": "text", "text": env_input}]})
+            history.append({"role": "assistant", "content": [{"type": "text", "text": response}]})
+
+        return history[-max_turns * 2:]
+
+    def predict(self, instruction: str, obs: Dict) -> List:
+        history = self.format_history()
+        messages = self.prepare(instruction, obs, history)
+
+        assert self.gen_func is not None, "gen_func is not set"
+        try:
+            response = self.gen_func(messages)
+        except Exception as e:
+            logger.error("Failed to call gen_func, Error: " + str(e))
+            response = ""
+
+        logger.info("RESPONSE: %s", response)
+
+        actions = self.execute(response, obs)
+
+        # update the contents
+        self.contents.append(
+            {
+                "instruction": instruction,
+                "index": len(self.contents),
+                "response": response,
+                "action": "Parse error" if not actions else actions[0],
+                "exe_result": "Invalid action" if not actions else "",
+                **obs,
+            }
+        )
+        return response, actions
+
+    def reset(self, _logger=None):
+        global logger
+        logger = _logger if _logger is not None else logging.getLogger("desktopenv.aguvis_agent")
+
+        self.contents = []
--- a/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py
+++ b/mm_agents/autoglm_v/prompt/accessibility_tree_handle.py
@ -0,0 +1,329 @@
+import io
+import re
+import xml.etree.ElementTree as ET
+from typing import List, Tuple
+
+from PIL import Image, ImageDraw, ImageFont
+
+from .deduplicate_node import filter_similar_nodes
+
+attributes_ns_ubuntu = "https://accessibility.windows.example.org/ns/attributes"
+attributes_ns_windows = "https://accessibility.windows.example.org/ns/attributes"
+state_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/state"
+state_ns_windows = "https://accessibility.windows.example.org/ns/state"
+component_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/component"
+component_ns_windows = "https://accessibility.windows.example.org/ns/component"
+value_ns_ubuntu = "https://accessibility.ubuntu.example.org/ns/value"
+value_ns_windows = "https://accessibility.windows.example.org/ns/value"
+class_ns_windows = "https://accessibility.windows.example.org/ns/class"
+
+
+def find_leaf_nodes(xlm_file_str):
+    if not xlm_file_str:
+        return []
+
+    root = ET.fromstring(xlm_file_str)
+
+    # Recursive function to traverse the XML tree and collect leaf nodes
+    def collect_leaf_nodes(node, leaf_nodes):
+        # If the node has no children, it is a leaf node, add it to the list
+        if not list(node):
+            leaf_nodes.append(node)
+        # If the node has children, recurse on each child
+        for child in node:
+            collect_leaf_nodes(child, leaf_nodes)
+
+    # List to hold all leaf nodes
+    leaf_nodes = []
+    collect_leaf_nodes(root, leaf_nodes)
+    return leaf_nodes
+
+
+def judge_node(node: ET, platform="Ubuntu", check_image=False) -> bool:
+    if platform == "Ubuntu":
+        _state_ns = state_ns_ubuntu
+        _component_ns = component_ns_ubuntu
+    elif platform == "Windows":
+        _state_ns = state_ns_windows
+        _component_ns = component_ns_windows
+    else:
+        raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+    keeps: bool = (
+        node.tag.startswith("document")
+        or node.tag.endswith("item")
+        or node.tag.endswith("button")
+        or node.tag.endswith("heading")
+        or node.tag.endswith("label")
+        or node.tag.endswith("scrollbar")
+        or node.tag.endswith("searchbox")
+        or node.tag.endswith("textbox")
+        or node.tag.endswith("link")
+        or node.tag.endswith("tabelement")
+        or node.tag.endswith("textfield")
+        or node.tag.endswith("textarea")
+        or node.tag.endswith("menu")
+        or node.tag
+        in {
+            "alert",
+            "canvas",
+            "check-box",
+            "combo-box",
+            "entry",
+            "icon",
+            "image",
+            "paragraph",
+            "scroll-bar",
+            "section",
+            "slider",
+            "static",
+            "table-cell",
+            "terminal",
+            "text",
+            "netuiribbontab",
+            "start",
+            "trayclockwclass",
+            "traydummysearchcontrol",
+            "uiimage",
+            "uiproperty",
+            "uiribboncommandbar",
+        }
+    )
+    keeps = (
+        keeps
+        and (
+            platform == "Ubuntu"
+            and node.get("{{{:}}}showing".format(_state_ns), "false") == "true"
+            and node.get("{{{:}}}visible".format(_state_ns), "false") == "true"
+            or platform == "Windows"
+            and node.get("{{{:}}}visible".format(_state_ns), "false") == "true"
+        )
+        and (
+            node.get("name", "") != ""
+            or node.text is not None
+            and len(node.text) > 0
+            or check_image
+            and node.get("image", "false") == "true"
+        )
+    )
+    # and (
+    #     node.get("{{{:}}}enabled".format(_state_ns), "false") == "true"
+    #     or node.get("{{{:}}}editable".format(_state_ns), "false") == "true"
+    #     or node.get("{{{:}}}expandable".format(_state_ns), "false") == "true"
+    #     or node.get("{{{:}}}checkable".format(_state_ns), "false") == "true"
+    # ) \
+
+    coordinates: Tuple[int, int] = eval(node.get("{{{:}}}screencoord".format(_component_ns), "(-1, -1)"))
+    sizes: Tuple[int, int] = eval(node.get("{{{:}}}size".format(_component_ns), "(-1, -1)"))
+    keeps = keeps and coordinates[0] >= 0 and coordinates[1] >= 0 and sizes[0] > 0 and sizes[1] > 0
+    return keeps
+
+
+def filter_nodes(root: ET, platform="Ubuntu", check_image=False):
+    filtered_nodes = []
+
+    for node in root.iter():
+        if judge_node(node, platform, check_image):
+            filtered_nodes.append(node)
+
+    return filtered_nodes
+
+
+def draw_bounding_boxes(nodes, image_file_content, down_sampling_ratio=1.0, platform="Ubuntu"):
+
+    if platform == "Ubuntu":
+        _state_ns = state_ns_ubuntu
+        _component_ns = component_ns_ubuntu
+        _value_ns = value_ns_ubuntu
+    elif platform == "Windows":
+        _state_ns = state_ns_windows
+        _component_ns = component_ns_windows
+        _value_ns = value_ns_windows
+    else:
+        raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+    # Load the screenshot image
+    image_stream = io.BytesIO(image_file_content)
+    image = Image.open(image_stream)
+    if float(down_sampling_ratio) != 1.0:
+        image = image.resize((int(image.size[0] * down_sampling_ratio), int(image.size[1] * down_sampling_ratio)))
+    draw = ImageDraw.Draw(image)
+    marks = []
+    drew_nodes = []
+    text_informations: List[str] = ["index\ttag\tname\ttext"]
+
+    try:
+        # Adjust the path to the font file you have or use a default one
+        font = ImageFont.truetype("arial.ttf", 15)
+    except IOError:
+        # Fallback to a basic font if the specified font can't be loaded
+        font = ImageFont.load_default()
+
+    index = 1
+
+    # Loop over all the visible nodes and draw their bounding boxes
+    for _node in nodes:
+        coords_str = _node.attrib.get("{{{:}}}screencoord".format(_component_ns))
+        size_str = _node.attrib.get("{{{:}}}size".format(_component_ns))
+
+        if coords_str and size_str:
+            try:
+                # Parse the coordinates and size from the strings
+                coords = tuple(map(int, coords_str.strip("()").split(", ")))
+                size = tuple(map(int, size_str.strip("()").split(", ")))
+
+                import copy
+
+                original_coords = copy.deepcopy(coords)
+                original_size = copy.deepcopy(size)
+
+                if float(down_sampling_ratio) != 1.0:
+                    # Downsample the coordinates and size
+                    coords = tuple(int(coord * down_sampling_ratio) for coord in coords)
+                    size = tuple(int(s * down_sampling_ratio) for s in size)
+
+                # Check for negative sizes
+                if size[0] <= 0 or size[1] <= 0:
+                    raise ValueError(f"Size must be positive, got: {size}")
+
+                # Calculate the bottom-right corner of the bounding box
+                bottom_right = (coords[0] + size[0], coords[1] + size[1])
+
+                # Check that bottom_right > coords (x1 >= x0, y1 >= y0)
+                if bottom_right[0] < coords[0] or bottom_right[1] < coords[1]:
+                    raise ValueError(f"Invalid coordinates or size, coords: {coords}, size: {size}")
+
+                # Check if the area only contains one color
+                cropped_image = image.crop((*coords, *bottom_right))
+                if len(set(list(cropped_image.getdata()))) == 1:
+                    continue
+
+                # Draw rectangle on image
+                draw.rectangle([coords, bottom_right], outline="red", width=1)
+
+                # Draw index number at the bottom left of the bounding box with black background
+                text_position = (coords[0], bottom_right[1])  # Adjust Y to be above the bottom right
+                text_bbox: Tuple[int, int, int, int] = draw.textbbox(text_position, str(index), font=font, anchor="lb")
+                # offset: int = bottom_right[1]-text_bbox[3]
+                # text_bbox = (text_bbox[0], text_bbox[1]+offset, text_bbox[2], text_bbox[3]+offset)
+
+                # draw.rectangle([text_position, (text_position[0] + 25, text_position[1] + 18)], fill='black')
+                draw.rectangle(text_bbox, fill="black")
+                draw.text(text_position, str(index), font=font, anchor="lb", fill="white")
+
+                # each mark is an x, y, w, h tuple
+                marks.append([original_coords[0], original_coords[1], original_size[0], original_size[1]])
+                drew_nodes.append(_node)
+
+                if _node.text:
+                    node_text = _node.text if '"' not in _node.text else '"{:}"'.format(_node.text.replace('"', '""'))
+                elif _node.get("{{{:}}}class".format(class_ns_windows), "").endswith("EditWrapper") and _node.get(
+                    "{{{:}}}value".format(_value_ns)
+                ):
+                    node_text = _node.get("{{{:}}}value".format(_value_ns), "")
+                    node_text = node_text if '"' not in node_text else '"{:}"'.format(node_text.replace('"', '""'))
+                else:
+                    node_text = '""'
+                text_information: str = "{:d}\t{:}\t{:}\t{:}".format(index, _node.tag, _node.get("name", ""), node_text)
+                text_informations.append(text_information)
+
+                index += 1
+
+            except ValueError:
+                pass
+
+    output_image_stream = io.BytesIO()
+    image.save(output_image_stream, format="PNG")
+    image_content = output_image_stream.getvalue()
+
+    return marks, drew_nodes, "\n".join(text_informations), image_content
+
+
+def print_nodes_with_indent(nodes, indent=0):
+    for node in nodes:
+        print(" " * indent, node.tag, node.attrib)
+        print_nodes_with_indent(node, indent + 2)
+
+
+def find_active_applications(tree, state_ns):
+    apps_with_active_tag = []
+    for application in list(tree.getroot()):
+        app_name = application.attrib.get("name")
+        for frame in application:
+            is_active = frame.attrib.get("{{{:}}}active".format(state_ns), "false")
+            if is_active == "true":
+                apps_with_active_tag.append(app_name)
+    if apps_with_active_tag:
+        to_keep = apps_with_active_tag + ["gnome-shell"]
+    else:
+        to_keep = ["gjs", "gnome-shell"]
+    return to_keep
+
+
+def linearize_accessibility_tree(accessibility_tree, platform="Ubuntu"):
+    if platform == "Ubuntu":
+        _attributes_ns = attributes_ns_ubuntu
+        _state_ns = state_ns_ubuntu
+        _component_ns = component_ns_ubuntu
+        _value_ns = value_ns_ubuntu
+    elif platform == "Windows":
+        _attributes_ns = attributes_ns_windows
+        _state_ns = state_ns_windows
+        _component_ns = component_ns_windows
+        _value_ns = value_ns_windows
+    else:
+        raise ValueError("Invalid platform, must be 'Ubuntu' or 'Windows'")
+
+    try:
+        tree = ET.ElementTree(ET.fromstring(accessibility_tree))
+        keep_apps = find_active_applications(tree, _state_ns)
+
+        # Remove inactive applications
+        for application in list(tree.getroot()):
+            if application.get("name") not in keep_apps:
+                tree.getroot().remove(application)
+
+        filtered_nodes = filter_nodes(tree.getroot(), platform, check_image=True)
+        linearized_accessibility_tree = ["tag\ttext\tposition (center x & y)\tsize (w & h)"]
+
+        # Linearize the accessibility tree nodes into a table format
+        for node in filtered_nodes:
+            try:
+                text = node.text if node.text is not None else ""
+                text = text.strip()
+                name = node.get("name", "").strip()
+                if text == "":
+                    text = name
+                elif name != "" and text != name:
+                    text = f"{name} ({text})"
+
+                text = text.replace("\n", "\\n")
+                pos = node.get("{{{:}}}screencoord".format(_component_ns), "")
+                size = node.get("{{{:}}}size".format(_component_ns), "")
+
+                x, y = re.match(f"\((\d+), (\d+)\)", pos).groups()
+                w, h = re.match(f"\((\d+), (\d+)\)", size).groups()
+                x_mid, y_mid = int(x) + int(w) // 2, int(y) + int(h) // 2
+
+                linearized_accessibility_tree.append(
+                    "{:}\t{:}\t{:}\t{:}".format(node.tag, text, f"({x_mid}, {y_mid})", size)
+                )
+            except Exception as e:
+                continue
+
+        # Filter out similar nodes
+        linearized_accessibility_tree = filter_similar_nodes("\n".join(linearized_accessibility_tree))
+    except Exception as e:
+        print(f"Error in linearize_accessibility_tree: {e}")
+        linearized_accessibility_tree = ""
+
+    return linearized_accessibility_tree
+
+
+def trim_accessibility_tree(linearized_accessibility_tree, max_items):
+    lines = linearized_accessibility_tree.strip().split("\n")
+    if len(lines) > max_items:
+        lines = lines[:max_items]
+        linearized_accessibility_tree = "\n".join(lines)
+        linearized_accessibility_tree += "\n..."
+    return linearized_accessibility_tree
--- a/mm_agents/autoglm_v/prompt/deduplicate_node.py
+++ b/mm_agents/autoglm_v/prompt/deduplicate_node.py
@ -0,0 +1,100 @@
+import re
+
+
+def parse_line(line):
+    # 解析格式，如：label   Google Chrome   (191, 13)       (104, 17)
+    pattern = r"^(\S+)\s+(.+?)\s+\((\d+), (\d+)\)\s+\((\d+), (\d+)\)"
+    m = re.match(pattern, line)
+    if not m:
+        return None
+    node_type, text, cx, cy, w, h = m.groups()
+    cx, cy, w, h = map(int, (cx, cy, w, h))
+    # bounding box as (x1, y1, x2, y2)
+    x1 = cx - w // 2
+    y1 = cy - h // 2
+    x2 = x1 + w
+    y2 = y1 + h
+    return {
+        "type": node_type,
+        "text": text.strip(),
+        "bbox": (x1, y1, x2, y2),
+        "center": (cx, cy),
+        "size": (w, h),
+        "raw": line,
+    }
+
+
+def iou(box1, box2):
+    # box: (x1, y1, x2, y2)
+    xi1 = max(box1[0], box2[0])
+    yi1 = max(box1[1], box2[1])
+    xi2 = min(box1[2], box2[2])
+    yi2 = min(box1[3], box2[3])
+    inter_width = max(0, xi2 - xi1)
+    inter_height = max(0, yi2 - yi1)
+    inter_area = inter_width * inter_height
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union = area1 + area2 - inter_area
+    if union == 0:
+        return 0
+    return inter_area / union
+
+
+def norm_text(s):
+    # 归一化文本：小写、去空格等
+    return re.sub(r"\s+", "", s.lower())
+
+
+def text_similarity(a, b):
+    # 简单判定：完全一致为1，否则0
+    na, nb = norm_text(a), norm_text(b)
+    if na == nb:
+        return 1.0
+    else:
+        return 0
+
+
+def filter_similar_nodes(linearized_accessibility_tree):
+    lines = [ln for ln in linearized_accessibility_tree.split("\n") if ln.strip()]
+    # parse all nodes
+    nodes = []
+    for ln in lines:
+        node = parse_line(ln)
+        if node:
+            nodes.append(node)
+        else:
+            # 解析不了的保留
+            nodes.append({"raw": ln, "invalid": True})
+    filtered = []
+    removed = [False] * len(nodes)
+    # 阈值可自行调整
+    IOU_THRESH = 0.2
+    TEXT_THRESH = 0.9
+    for i, ni in enumerate(nodes):
+        if ni.get("invalid"):
+            filtered.append(ni["raw"])
+            continue
+        if removed[i]:
+            continue
+        for j in range(i + 1, len(nodes)):
+            nj = nodes[j]
+            if nj.get("invalid"):
+                continue
+            iou_val = iou(ni["bbox"], nj["bbox"])
+            text_sim = text_similarity(ni["text"], nj["text"])
+            if iou_val > IOU_THRESH and text_sim > TEXT_THRESH:
+                # 二者极其相似，移除后者
+                removed[j] = True
+                # print(f"移除: {nj['raw']} (与 {ni['raw']} 相似度高)")
+        # 保留未被标记为移除的
+        if not removed[i]:
+            filtered.append(ni["raw"])
+    return "\n".join(filtered)
+
+
+# 示例用法
+if __name__ == "__main__":
+    linearized_accessibility_tree = "tag\ttext\tposition (center x & y)\tsize (w & h)\nicon\t\t(1853, 1001)\t(64, 64)\nlabel\tHome\t(1853, 1045)\t(40, 17)\nlabel\tActivities\t(49, 13)\t(63, 17)\ntext\tActivities\t(49, 13)\t(63, 17)\nlabel\tApr 17 17‎∶04\t(995, 13)\t(117, 27)\ntext\tApr 17 17‎∶04\t(995, 13)\t(87, 18)\nmenu\tSystem\t(1867, 13)\t(106, 27)\npush-button\tGoogle Chrome\t(35, 65)\t(70, 64)\npush-button\tThunderbird Mail\t(35, 133)\t(70, 64)\npush-button\tVisual Studio Code\t(35, 201)\t(70, 64)\npush-button\tVLC media player\t(35, 269)\t(70, 64)\npush-button\tLibreOffice Writer\t(35, 337)\t(70, 64)\npush-button\tLibreOffice Calc\t(35, 405)\t(70, 64)\npush-button\tLibreOffice Impress\t(35, 473)\t(70, 64)\npush-button\tGNU Image Manipulation Program\t(35, 541)\t(70, 64)\npush-button\tFiles\t(35, 609)\t(70, 64)\npush-button\tUbuntu Software\t(35, 677)\t(70, 64)\npush-button\tHelp\t(35, 745)\t(70, 64)\npush-button\tTrash\t(35, 816)\t(70, 64)\ntoggle-button\tShow Applications\t(35, 1045)\t(70, 70)"
+    result = filter_similar_nodes(linearized_accessibility_tree)
+    print(result)
--- a/mm_agents/autoglm_v/prompt/grounding_agent.py
+++ b/mm_agents/autoglm_v/prompt/grounding_agent.py
@ -0,0 +1,260 @@
+import base64
+import json
+import logging
+import os
+import xml.etree.ElementTree as ET
+from typing import Dict, List, Optional, Tuple
+
+logger = logging.getLogger("desktopenv.agent")
+
+
+def agent_action(func):
+    func.is_agent_action = True
+    return func
+
+
+switch_window_code = """import subprocess;
+import pyautogui;
+pyautogui.press('escape');
+time.sleep(0.5);
+subprocess.run(['wmctrl', '-ia', 'WINDOW_ID'])
+subprocess.run(['wmctrl', '-ir', 'WINDOW_ID', '-b', 'add,maximized_vert,maximized_horz'])
+print('Switch to WINDOW_ID')"""
+
+launch_app_commands = {
+    # Web Browser
+    "chrome": "google-chrome --remote-debugging-port=1337",
+    # File Manager
+    "files": "nautilus",
+    # Terminal
+    "terminal": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-terminal',
+    # Utilities
+    "gedit": "gedit",
+    # Office
+    "libreoffice writer": "libreoffice --writer",
+    "libreoffice calc": "libreoffice --calc",
+    "libreoffice impress": "libreoffice --impress",
+    # System
+    "settings": 'export DBUS_SESSION_BUS_ADDRESS="unix:path=/run/user/1000/bus" && gnome-control-center',
+    # Multimedia
+    "vlc": "vlc",
+    "gimp": "gimp",
+    # IDE
+    "vs code": "code",
+    # Email
+    "thunderbird": "thunderbird",
+}
+
+
+class GroundingAgent:
+
+    tool_list = {
+        "libreoffice_calc": "CalcTools",
+        "libreoffice_impress": "ImpressTools",
+        "libreoffice_writer": "WriterTools",
+        "code": "CodeTools",
+        "vlc": "VLCTools",
+        "google_chrome": "BrowserTools",
+    }
+    
+    relative_coordinate = True  # whether the coordinates are relative (0-1000) or absolute (e.g. 1920x1080)
+
+    @classmethod
+    def tool_commands(cls, code: str, tool_name: str):
+        command = f"from {tool_name} import *; "
+        command += code
+
+        tool_class = cls.tool_list[tool_name]
+        command += f"; {tool_class}.print_result()"
+
+        return [
+            command,
+        ]
+
+    @classmethod
+    @agent_action
+    def click(
+        cls,
+        coordinate: List,
+        num_clicks: int = 1,
+        button_type: str = "left",
+    ):
+        """
+        Click on the element
+
+        Args:
+            coordinate (List): [x, y], coordinate of the element to click on
+            num_clicks (int): number of times to click the element
+            button_type (str): which mouse button to press ("left", "middle", or "right")
+        """
+        command = ""
+        x, y = coordinate
+        if cls.relative_coordinate:
+            x, y = round(x * 1920 / 1000), round(y * 1080 / 1000)
+        command += f"""pyautogui.click({x}, {y}, clicks={num_clicks}, button={repr(button_type)}); print("Click Success")"""  # TODO: 最大化窗口需要一次调用
+        return command
+
+    @classmethod
+    @agent_action
+    def type(
+        cls,
+        coordinate: Optional[List] = None,
+        text: str = "",
+        overwrite: bool = False,
+        enter: bool = False,
+    ):
+        """
+        Type text into the element
+
+        Args:
+            coordinate (List): [x, y], coordinate of the element to type into. If None, typing starts at current cursor location
+            text (str): the text to type
+            overwrite (bool): True to overwrite existing text, False otherwise
+            enter (bool): True to press enter after typing, False otherwise
+        """
+
+        command = ""
+
+        if coordinate is not None:
+            # Start typing at the center of the element
+            x, y = coordinate
+            if cls.relative_coordinate:
+                x, y = round(x * 1920 / 1000), round(y * 1080 / 1000)
+            command += f"pyautogui.click({x}, {y}); "
+
+        if overwrite:
+            command += f"pyautogui.hotkey('ctrl', 'a'); pyautogui.press('backspace'); "
+
+        command += f"pyautogui.write({repr(text)}); "
+
+        if enter:
+            command += "pyautogui.press('enter'); "
+
+        command += "print('Type Success')"
+
+        return command
+
+    @classmethod
+    @agent_action
+    def drag_and_drop(cls, drag_from_coordinate: List, drop_on_coordinate: List):
+        """
+        Drag element1 and drop it on element2
+
+        Args:
+            drag_from_coordinate (List): [x, y], coordinate of element to drag
+            drop_on_coordinate (List): [x, y], coordinate of element to drop on
+        """
+        x1, y1 = drag_from_coordinate
+        if cls.relative_coordinate:
+            x1, y1 = round(x1 * 1920 / 1000), round(y1 * 1080 / 1000)
+        x2, y2 = drop_on_coordinate
+        if cls.relative_coordinate:
+            x2, y2 = round(x2 * 1920 / 1000), round(y2 * 1080 / 1000)
+
+        command = f"pyautogui.moveTo({x1}, {y1}); "
+        # TODO: specified duration?
+        command += f"pyautogui.dragTo({x2}, {y2}, duration=1.); pyautogui.mouseUp(); "
+
+        command += "print('Drag and Drop Success')"
+
+        return command
+
+    @classmethod
+    @agent_action
+    def scroll(cls, coordinate: List, direction: str):
+        """
+        Scroll the element in the specified direction
+
+        Args:
+            coordinate (List): [x, y], coordinate of the element to scroll in
+            direction (str): the direction to scroll ("up" or "down")
+        """
+        x, y = coordinate
+        if cls.relative_coordinate:
+            x, y = round(x * 1920 / 1000), round(y * 1080 / 1000)
+        amount = 100 if direction == "up" else -100
+        return f"import pyautogui; pyautogui.moveTo({x}, {y}); pyautogui.scroll({amount}); print('Scroll Success')"
+
+    @classmethod
+    @agent_action
+    def open_app(cls, app_name: str):
+        """
+        Open a specified application
+
+        Supported apps: chrome, files, terminal, gedit, libreoffice writer, 
+        libreoffice calc, libreoffice impress, vs code, vlc, gimp, settings, thunderbird
+
+        Args:
+            app_name (str): name of the application to open
+        """
+
+        app_name = app_name.lower().strip()
+
+        if app_name not in launch_app_commands:
+            command = f"print(f'{app_name} is not supported or recognized')"
+        else:
+            command = {
+                "action_type": "OPEN_APP",
+                "parameters": {"launch_app_command": launch_app_commands[app_name], "app_name": app_name},
+            }
+
+        return command
+
+    @classmethod
+    @agent_action
+    def switch_window(cls, window_id: str):
+        """
+        Switch to the window with the given window id
+
+        Args:
+            window_id (str): the window id to switch to from the provided list of open windows
+        """
+        return switch_window_code.replace("WINDOW_ID", window_id)
+
+    @classmethod
+    @agent_action
+    def hotkey(cls, keys: List):
+        """
+        Press a hotkey combination
+
+        Args:
+            keys (List): the keys to press in combination (e.g. ['ctrl', 'c'] for copy, ['prtsc'] for screenshot)
+        """
+        # add quotes around the keys
+        keys = [f"'{key}'" for key in keys]
+        key_str = ", ".join(keys).replace("'", "\\'")
+        return f"import pyautogui; pyautogui.hotkey({', '.join(keys)}); print(f'Press Hotkey: {key_str}')"
+
+    @classmethod
+    @agent_action
+    def quote(cls, content: str):
+        """
+        Quote information from the current page for memory
+
+        Args:
+            content (str): text summarized or copied from the page for later operation
+        """
+        return f'''print("""{content}""")'''
+
+    @classmethod
+    @agent_action
+    def wait(cls):
+        """
+        Wait for a while
+
+        """
+        return "WAIT"
+
+    @classmethod
+    @agent_action
+    def exit(cls, success: bool):
+        """
+        End the current task
+
+        Args:
+            success (bool): True if successfully finish a task, False otherwise
+        """
+        if success:
+            return "DONE"
+        else:
+            return "FAIL"
--- a/mm_agents/autoglm_v/prompt/procedural_memory.py
+++ b/mm_agents/autoglm_v/prompt/procedural_memory.py
@ -0,0 +1,194 @@
+import inspect
+import json
+import os
+import textwrap
+
+current_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def generate_func(json_data):
+    # 收集所有类名和它们的函数
+    class_funcs = {}
+    no_class_funcs = []
+    cls_name = ""
+
+    for item in json_data:
+        if item["type"] == "function":
+            func = item["function"]
+            func_parts = func["name"].split(".")
+
+            if len(func_parts) == 2:
+                class_name, func_name = func_parts
+                if class_name not in class_funcs:
+                    class_funcs[class_name] = []
+                class_funcs[class_name].append(item)
+            else:
+                no_class_funcs.append(item)
+
+    code = ""
+
+    # 生成有类的函数
+    for class_name, funcs in class_funcs.items():
+        code += f"class {class_name}:\n"
+        cls_name = class_name
+        for item in funcs:
+            func = item["function"]
+            func_name = func["name"].split(".")[-1]
+            description = func["description"]
+            params = func["parameters"]["properties"]
+            required = func["parameters"].get("required", [])
+
+            # 构建参数列表
+            param_list = ["cls"]
+            # 首先添加必需参数
+            for param_name in required:
+                param_list.append(f"{param_name}")
+            # 然后添加可选参数
+            for param_name in params:
+                if param_name not in required:
+                    param_list.append(f"{param_name}")  # 可选参数默认值设为None
+
+            # 构建函数定义
+            func_def = f"    def {func_name}({', '.join(param_list)}):\n"
+
+            # 构建文档字符串
+            docstring = f'        """\n        {description}\n\n        Args:\n'
+            if len(param_list) == 1:  # 只有cls参数
+                docstring += "            None\n"
+            else:
+                # 首先记录必需参数
+                for param_name in required:
+                    param_type = params[param_name]["type"]
+                    param_desc = params[param_name].get("description", "")
+                    docstring += f"            {param_name} ({param_type}): {param_desc}\n"
+                # 然后记录可选参数
+                for param_name in params:
+                    if param_name not in required:
+                        param_type = params[param_name]["type"]
+                        param_desc = params[param_name].get("description", "")
+                        docstring += f"            {param_name} ({param_type}, optional): {param_desc}\n"
+
+            docstring += '        """\n'
+
+            code += func_def + docstring + "\n"
+
+        code += "\n"
+
+    # 生成没有类的函数
+    for item in no_class_funcs:
+        func = item["function"]
+        func_name = func["name"]
+        description = func["description"]
+        params = func["parameters"]["properties"]
+        required = func["parameters"].get("required", [])
+
+        # 构建参数列表
+        param_list = []
+        # 首先添加必需参数
+        for param_name in required:
+            param_list.append(f"{param_name}")
+        # 然后添加可选参数
+        for param_name in params:
+            if param_name not in required:
+                param_list.append(f"{param_name}")
+
+        # 构建函数定义
+        func_def = f"def {func_name}({', '.join(param_list)}):\n"
+
+        # 构建文档字符串
+        docstring = f'    """\n    {description}\n\n    Args:\n'
+        if not param_list:
+            docstring += "        None\n"
+        else:
+            # 首先记录必需参数
+            for param_name in required:
+                param_type = params[param_name]["type"]
+                param_desc = params[param_name].get("description", "")
+                docstring += f"        {param_name} ({param_type}): {param_desc}\n"
+            # 然后记录可选参数
+            for param_name in params:
+                if param_name not in required:
+                    param_type = params[param_name]["type"]
+                    param_desc = params[param_name].get("description", "")
+                    docstring += f"        {param_name} ({param_type}, optional): {param_desc}\n"
+
+        docstring += '    """\n'
+
+        code += func_def + docstring + "\n"
+
+    return code.strip(), cls_name
+
+
+setup_prompt = """You are a GUI operation agent. You will be given a task and your action history, with current observation ({observation_list}). You should help me control the computer, output the best action step by step to accomplish the task.
+You should first generate a plan, reflect on the current observation, then generate actions to complete the task in python-style pseudo code using the predefined functions.
+
+* Output Format:
+{format_hint}"""
+
+func_def_template = """* Available Functions:
+```python
+{class_content}
+```"""
+
+note_prompt = """* Note:
+- Your code should only be wrapped in ```python```.
+- Only **ONE-LINE-OF-CODE** at a time.
+- Each code block is context independent, and variables from the previous round cannot be used in the next round.
+{relative_coordinate_hint}- Return with `Agent.exit(success=True)` immediately after the task is completed.
+- The computer's environment is Linux, e.g., Desktop path is '/home/user/Desktop'
+- My computer's password is '{client_password}', feel free to use it when you need sudo rights"""
+
+
+class Prompt:
+    @staticmethod
+    def construct_procedural_memory(agent_class, app_name=None, client_password="password", with_image=True, with_atree=False, relative_coordinate=True, glm41v_format=True):
+        agent_class_content = "Class Agent:"
+        for attr_name in dir(agent_class):
+            attr = getattr(agent_class, attr_name)
+            if callable(attr) and hasattr(attr, "is_agent_action"):
+                # Use inspect to get the full function signature
+                signature = inspect.signature(attr)
+                agent_class_content += f"""
+    def {attr_name}{signature}:
+        '''{attr.__doc__}'''
+    """
+
+        if app_name is not None:
+            tool_path = os.path.join(current_dir, "tools", "apis", f"{app_name.lower()}.json")
+            with open(tool_path, "r") as f:
+                json_data = json.load(f)
+
+            tool_class_content, tool_class_name = generate_func(json_data)
+
+            agent_class_content += "\n\n{}".format(tool_class_content)
+            
+        func_def_prompt = func_def_template.format(class_content=agent_class_content.strip())
+
+        # --- dynamic observation list ---
+        obs_items = []
+        if with_image:
+            obs_items.append("screenshot")
+        obs_items.append("current app name")
+        if with_atree:
+            obs_items.append("a11y tree (based on AT-SPI library)")
+        obs_items.append("app info")
+        obs_items.append("last action result")
+        observation_list = ", ".join(obs_items)
+
+        setup_prompt_formatted = setup_prompt.format(
+            observation_list=observation_list,
+            format_hint="<think>\n{**YOUR-PLAN-AND-THINKING**}</think>\n<answer>```python\n{**ONE-LINE-OF-CODE**}\n```</answer>" if glm41v_format else "<think>\n{**YOUR-PLAN-AND-THINKING**}\n</think>\n```python\n{**ONE-LINE-OF-CODE**}\n```"
+        )
+
+        note_prompt_formatted = note_prompt.format(
+            relative_coordinate_hint="- The coordinate [x, y] should be normalized to 0-1000, which usually should be the center of a specific target element.\n" if relative_coordinate else "",
+            client_password=client_password
+        )
+
+        return setup_prompt_formatted, func_def_prompt, note_prompt_formatted
+
+
+if __name__ == "__main__":
+    from grounding_agent import GroundingAgent
+
+    print(Prompt.construct_procedural_memory(GroundingAgent, "vlc"))
--- a/mm_agents/autoglm_v/tools/apis/init.py
+++ b/mm_agents/autoglm_v/tools/apis/init.py
@ -0,0 +1,3 @@
+from .func import generate_func
+
+__all__ = ["generate_func"]
--- a/mm_agents/autoglm_v/tools/apis/code.json
+++ b/mm_agents/autoglm_v/tools/apis/code.json
@ -0,0 +1,236 @@
+[
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.launch_vscode",
+            "description": "Launch VS Code with specified path",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "File path or directory to open"
+                    }
+                },
+                "required": ["path"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.compare_files",
+            "description": "Compare two files in VS Code",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "file1": {
+                        "type": "string",
+                        "description": "First file path"
+                    },
+                    "file2": {
+                        "type": "string",
+                        "description": "Second file path"
+                    }
+                },
+                "required": ["file1", "file2"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.add_folder",
+            "description": "Add folder to active VS Code window",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "folder": {
+                        "type": "string",
+                        "description": "Folder path to add"
+                    }
+                },
+                "required": ["folder"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.goto_file",
+            "description": "Open file at specific position",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "description": "File path to open"
+                    },
+                    "line": {
+                        "type": "integer",
+                        "description": "Line number",
+                        "default": 1
+                    },
+                    "character": {
+                        "type": "integer",
+                        "description": "Character position",
+                        "default": 1
+                    }
+                },
+                "required": ["file_path"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.perform_merge",
+            "description": "Perform three-way merge",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path1": {
+                        "type": "string",
+                        "description": "First version file path"
+                    },
+                    "path2": {
+                        "type": "string",
+                        "description": "Second version file path"
+                    },
+                    "base": {
+                        "type": "string",
+                        "description": "Base version file path"
+                    },
+                    "result": {
+                        "type": "string",
+                        "description": "Output file path"
+                    }
+                },
+                "required": ["path1", "path2", "base", "result"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.remove_folder",
+            "description": "Remove folder from active VS Code window",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "folder": {
+                        "type": "string",
+                        "description": "Folder path to remove"
+                    }
+                },
+                "required": ["folder"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.install_extension",
+            "description": "Install or update VS Code extension",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "extension_id": {
+                        "type": "string",
+                        "description": "Extension identifier"
+                    },
+                    "pre_release": {
+                        "type": "boolean",
+                        "description": "Install pre-release version",
+                        "default": false
+                    }
+                },
+                "required": ["extension_id"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.uninstall_extension",
+            "description": "Uninstall VS Code extension",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "extension_id": {
+                        "type": "string",
+                        "description": "Extension identifier"
+                    }
+                },
+                "required": ["extension_id"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.list_extensions",
+            "description": "List installed extensions",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "show_versions": {
+                        "type": "boolean",
+                        "description": "Show extension versions",
+                        "default": false
+                    },
+                    "category": {
+                        "type": "string",
+                        "description": "Filter by category"
+                    }
+                }
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.update_extensions",
+            "description": "Update all extensions to latest version",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.disable_extension",
+            "description": "Disable extension for next VS Code instance",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "extension_id": {
+                        "type": "string",
+                        "description": "Extension identifier"
+                    }
+                },
+                "required": ["extension_id"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CodeTools.toggle_sync",
+            "description": "Toggle VS Code synchronization",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "state": {
+                        "type": "string",
+                        "description": "Sync state",
+                        "enum": ["on", "off"]
+                    }
+                },
+                "required": ["state"]
+            }
+        }
+    }
+]
--- a/mm_agents/autoglm_v/tools/apis/func.py
+++ b/mm_agents/autoglm_v/tools/apis/func.py
@ -0,0 +1,117 @@
+def generate_func(json_data):
+    # 收集所有类名和它们的函数
+    class_funcs = {}
+    no_class_funcs = []
+
+    for item in json_data:
+        if item["type"] == "function":
+            func = item["function"]
+            func_parts = func["name"].split(".")
+
+            if len(func_parts) == 2:
+                class_name, func_name = func_parts
+                if class_name not in class_funcs:
+                    class_funcs[class_name] = []
+                class_funcs[class_name].append(item)
+            else:
+                no_class_funcs.append(item)
+
+    code = ""
+
+    # 生成有类的函数
+    for class_name, funcs in class_funcs.items():
+        code += f"class {class_name}:\n"
+        for item in funcs:
+            func = item["function"]
+            func_name = func["name"].split(".")[-1]
+            description = func["description"]
+            params = func["parameters"]["properties"]
+            required = func["parameters"].get("required", [])
+
+            # 构建参数列表
+            param_list = ["cls"]
+            # 首先添加必需参数
+            for param_name in required:
+                param_list.append(f"{param_name}")
+            # 然后添加可选参数
+            for param_name in params:
+                if param_name not in required:
+                    param_list.append(f"{param_name}")  # 可选参数默认值设为None
+
+            # 构建函数定义
+            func_def = f"    def {func_name}({', '.join(param_list)}):\n"
+
+            # 构建文档字符串
+            docstring = f'        """\n        {description}\n\n        Args:\n'
+            if len(param_list) == 1:  # 只有cls参数
+                docstring += "            None\n"
+            else:
+                # 首先记录必需参数
+                for param_name in required:
+                    param_type = params[param_name]["type"]
+                    param_desc = params[param_name].get("description", "")
+                    docstring += f"            {param_name} ({param_type}): {param_desc}\n"
+                # 然后记录可选参数
+                for param_name in params:
+                    if param_name not in required:
+                        param_type = params[param_name]["type"]
+                        param_desc = params[param_name].get("description", "")
+                        docstring += f"            {param_name} ({param_type}, optional): {param_desc}\n"
+
+            docstring += '        """\n'
+
+            code += func_def + docstring + "\n"
+
+        code += "\n"
+
+    # 生成没有类的函数
+    for item in no_class_funcs:
+        func = item["function"]
+        func_name = func["name"]
+        description = func["description"]
+        params = func["parameters"]["properties"]
+        required = func["parameters"].get("required", [])
+
+        # 构建参数列表
+        param_list = []
+        # 首先添加必需参数
+        for param_name in required:
+            param_list.append(f"{param_name}")
+        # 然后添加可选参数
+        for param_name in params:
+            if param_name not in required:
+                param_list.append(f"{param_name}")
+
+        # 构建函数定义
+        func_def = f"def {func_name}({', '.join(param_list)}):\n"
+
+        # 构建文档字符串
+        docstring = f'    """\n    {description}\n\n    Args:\n'
+        if not param_list:
+            docstring += "        None\n"
+        else:
+            # 首先记录必需参数
+            for param_name in required:
+                param_type = params[param_name]["type"]
+                param_desc = params[param_name].get("description", "")
+                docstring += f"        {param_name} ({param_type}): {param_desc}\n"
+            # 然后记录可选参数
+            for param_name in params:
+                if param_name not in required:
+                    param_type = params[param_name]["type"]
+                    param_desc = params[param_name].get("description", "")
+                    docstring += f"        {param_name} ({param_type}, optional): {param_desc}\n"
+
+        docstring += '    """\n'
+
+        code += func_def + docstring + "\n"
+
+    return code.strip()
+
+
+if __name__ == "__main__":
+    import json
+
+    with open("libreoffice_calc.json", "r") as f:
+        json_data = json.load(f)
+    print(generate_func(json_data))
--- a/mm_agents/autoglm_v/tools/apis/google_chrome.json
+++ b/mm_agents/autoglm_v/tools/apis/google_chrome.json
@ -0,0 +1,134 @@
+[
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.open_profile_settings",
+            "description": "Opens profile settings page.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.open_password_settings",
+            "description": "Opens password/autofill settings page.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.open_privacy_settings",
+            "description": "Opens privacy settings page.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.open_appearance_settings",
+            "description": "Opens appearance settings page.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.open_search_engine_settings",
+            "description": "Opens search engine settings page.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.bring_back_last_tab",
+            "description": "Restores last-closed tab (Ctrl+Shift+T).",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.print",
+            "description": "Opens print dialog (Ctrl+P).",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.delete_browsing_data",
+            "description": "Opens clear browsing data dialog (Ctrl+Shift+Del).",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.open_extensions",
+            "description": "Opens extensions management page.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.bookmark_page",
+            "description": "Bookmarks current page (Ctrl+D).",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "BrowserTools.open_bookmarks",
+            "description": "Opens bookmarks page.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    }
+]
--- a/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json
+++ b/mm_agents/autoglm_v/tools/apis/libreoffice_calc.json
@ -0,0 +1,634 @@
+[
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.get_workbook_info",
+            "description": "Get workbook info: file path, name, sheets, and active sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.save",
+            "description": "Save workbook to current location",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.get_column_data",
+            "description": "Get all data from specified column",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "column_name": {
+                        "type": "string",
+                        "description": "Column name (e.g. 'A', 'B')"
+                    }
+                },
+                "required": [
+                    "column_name"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.switch_active_sheet",
+            "description": "Switch to sheet (creates if not exists)",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "sheet_name": {
+                        "type": "string",
+                        "description": "Sheet name"
+                    }
+                },
+                "required": [
+                    "sheet_name"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.set_column_values",
+            "description": "Set values to column (values only, not formulas)",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "column_name": {
+                        "type": "string",
+                        "description": "Column name (e.g. 'A', 'B')"
+                    },
+                    "data": {
+                        "type": "array",
+                        "description": "Values to write"
+                    },
+                    "start_index": {
+                        "type": "integer",
+                        "description": "First row index (default: 2)"
+                    }
+                },
+                "required": [
+                    "column_name",
+                    "data"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.highlight_range",
+            "description": "Highlight range with color",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "range_str": {
+                        "type": "string",
+                        "description": "Range (e.g. 'A1:B10')"
+                    },
+                    "color": {
+                        "type": "integer",
+                        "description": "Color value (default: 0xFF0000)"
+                    }
+                },
+                "required": [
+                    "range_str"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.transpose_range",
+            "description": "Transpose range and paste to target cell",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "source_range": {
+                        "type": "string",
+                        "description": "Source range (e.g. 'A1:B10')"
+                    },
+                    "target_cell": {
+                        "type": "string",
+                        "description": "Target cell (e.g. 'A1')"
+                    }
+                },
+                "required": [
+                    "source_range",
+                    "target_cell"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.export_to_csv",
+            "description": "Export to CSV with same path/name",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.sort_column",
+            "description": "Sort column data",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "column_name": {
+                        "type": "string",
+                        "description": "Column name (e.g. 'A', 'B')"
+                    },
+                    "ascending": {
+                        "type": "boolean",
+                        "description": "Sort ascending (default: true)"
+                    },
+                    "start_index": {
+                        "type": "integer",
+                        "description": "First row index (default: 2)"
+                    }
+                },
+                "required": [
+                    "column_name"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.set_validation_list",
+            "description": "Set validation list for column",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "column_name": {
+                        "type": "string",
+                        "description": "Column name (e.g. 'A', 'B')"
+                    },
+                    "values": {
+                        "type": "array",
+                        "description": "Validation values"
+                    }
+                },
+                "required": [
+                    "column_name",
+                    "values"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.hide_row_data",
+            "description": "Hide rows containing value",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "value": {
+                        "type": "string",
+                        "description": "Value to hide (default: 'N/A')"
+                    }
+                },
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.reorder_columns",
+            "description": "Reorder columns by specified order",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "column_order": {
+                        "type": "array",
+                        "description": "Column names in desired order (e.g. ['A', 'B', 'C'])"
+                    }
+                },
+                "required": [
+                    "column_order"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.create_pivot_table",
+            "description": "Create pivot table from source sheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "source_sheet": {
+                        "type": "string",
+                        "description": "Source sheet name"
+                    },
+                    "table_name": {
+                        "type": "string",
+                        "description": "Pivot table name"
+                    },
+                    "row_fields": {
+                        "type": "array",
+                        "description": "Row labels (e.g. ['A', 'B'])"
+                    },
+                    "col_fields": {
+                        "type": "array",
+                        "description": "Column labels (e.g. ['A', 'B'])"
+                    },
+                    "value_fields": {
+                        "type": "array",
+                        "description": "Value fields (e.g. ['A', 'B'])"
+                    },
+                    "aggregation_function": {
+                        "type": "string",
+                        "description": "Aggregation function (sum, count, average, min, max)"
+                    },
+                    "target_cell": {
+                        "type": "string",
+                        "description": "Target cell (default: 'A1')"
+                    }
+                },
+                "required": [
+                    "source_sheet",
+                    "table_name",
+                    "value_fields"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.merge_cells",
+            "description": "Merge cells in range",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "range_str": {
+                        "type": "string",
+                        "description": "Cell range (e.g. 'A1:B10')"
+                    }
+                },
+                "required": [
+                    "range_str"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.set_cell_value",
+            "description": "Set cell value",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "cell": {
+                        "type": "string",
+                        "description": "Cell reference (e.g. 'A1')"
+                    },
+                    "value": {
+                        "type": "string",
+                        "description": "Cell value"
+                    }
+                },
+                "required": [
+                    "cell",
+                    "value"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.format_range",
+            "description": "Apply formatting to range",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "range_str": {
+                        "type": "string",
+                        "description": "Range (e.g. 'A1:B10')"
+                    },
+                    "background_color": {
+                        "type": "string",
+                        "description": "Background color (e.g. '#0000ff')"
+                    },
+                    "font_color": {
+                        "type": "string",
+                        "description": "Font color (e.g. '#ffffff')"
+                    },
+                    "bold": {
+                        "type": "boolean",
+                        "description": "Bold text"
+                    },
+                    "alignment": {
+                        "type": "string",
+                        "description": "Text alignment (left, center, right)"
+                    }
+                },
+                "required": [
+                    "range_str"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.create_chart",
+            "description": "Create chart from data range",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "chart_type": {
+                        "type": "string",
+                        "description": "Chart type (bar, column, line, pie, scatter, area)"
+                    },
+                    "data_range": {
+                        "type": "string",
+                        "description": "Data range (e.g. 'A1:B10')"
+                    },
+                    "title": {
+                        "type": "string",
+                        "description": "Chart title"
+                    },
+                    "x_axis_title": {
+                        "type": "string",
+                        "description": "X axis title"
+                    },
+                    "y_axis_title": {
+                        "type": "string",
+                        "description": "Y axis title"
+                    }
+                },
+                "required": [
+                    "chart_type",
+                    "data_range"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.freeze_panes",
+            "description": "Freeze rows/columns",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "rows": {
+                        "type": "integer",
+                        "description": "Rows to freeze from top"
+                    },
+                    "columns": {
+                        "type": "integer",
+                        "description": "Columns to freeze from left"
+                    }
+                },
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.rename_sheet",
+            "description": "Rename worksheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "old_name": {
+                        "type": "string",
+                        "description": "Current sheet name"
+                    },
+                    "new_name": {
+                        "type": "string",
+                        "description": "New sheet name"
+                    }
+                },
+                "required": [
+                    "old_name",
+                    "new_name"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.copy_sheet",
+            "description": "Copy worksheet",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "source_sheet": {
+                        "type": "string",
+                        "description": "Source sheet name"
+                    },
+                    "new_sheet_name": {
+                        "type": "string",
+                        "description": "New sheet name (optional)"
+                    }
+                },
+                "required": [
+                    "source_sheet"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.reorder_sheets",
+            "description": "Change sheet order",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "sheet_name": {
+                        "type": "string",
+                        "description": "Sheet to move"
+                    },
+                    "position": {
+                        "type": "integer",
+                        "description": "New position (0-based)"
+                    }
+                },
+                "required": [
+                    "sheet_name",
+                    "position"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.set_chart_legend_position",
+            "description": "Set chart legend position",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "position": {
+                        "type": "string",
+                        "description": "Legend position (top, bottom, left, right, none)"
+                    }
+                },
+                "required": [
+                    "position"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.set_number_format",
+            "description": "Apply number format to range",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "range_str": {
+                        "type": "string",
+                        "description": "Range (e.g. 'A1:B10')"
+                    },
+                    "format_type": {
+                        "type": "string",
+                        "description": "Format type (general, number, currency, accounting, date, time, percentage, fraction, scientific, text)"
+                    },
+                    "decimal_places": {
+                        "type": "integer",
+                        "description": "Decimal places (optional)"
+                    }
+                },
+                "required": [
+                    "range_str",
+                    "format_type"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.adjust_column_width",
+            "description": "Adjust column width",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "columns": {
+                        "type": "string",
+                        "description": "Column range (e.g. 'A:C')"
+                    },
+                    "width": {
+                        "type": "number",
+                        "description": "Width in characters"
+                    },
+                    "autofit": {
+                        "type": "boolean",
+                        "description": "Autofit to content"
+                    }
+                },
+                "required": [
+                    "columns"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.adjust_row_height",
+            "description": "Adjust row height",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "rows": {
+                        "type": "string",
+                        "description": "Row range (e.g. '1:10')"
+                    },
+                    "height": {
+                        "type": "number",
+                        "description": "Height in points"
+                    },
+                    "autofit": {
+                        "type": "boolean",
+                        "description": "Autofit to content"
+                    }
+                },
+                "required": [
+                    "rows"
+                ]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.export_to_pdf",
+            "description": "Export to PDF",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "description": "PDF save path (default: same as original)"
+                    },
+                    "sheets": {
+                        "type": "array",
+                        "description": "Sheets to include (default: all)"
+                    },
+                    "open_after_export": {
+                        "type": "boolean",
+                        "description": "Open PDF after export (default: false)"
+                    }
+                },
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "CalcTools.set_zoom_level",
+            "description": "Set worksheet zoom level",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "zoom_percentage": {
+                        "type": "integer",
+                        "description": "Zoom percentage (10-400)"
+                    }
+                },
+                "required": [
+                    "zoom_percentage"
+                ]
+            }
+        }
+    }
+]
--- a/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json
+++ b/mm_agents/autoglm_v/tools/apis/libreoffice_impress.json
@ -0,0 +1,559 @@
+[
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.save",
+      "description": "Save current presentation",
+      "parameters": {
+        "type": "object",
+        "properties": {},
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.go_to_slide",
+      "description": "Navigate to specific slide",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          }
+        },
+        "required": ["slide_index"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.get_slide_count",
+      "description": "Get total slide count",
+      "parameters": {
+        "type": "object",
+        "properties": {},
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.duplicate_slide",
+      "description": "Duplicate slide and place at end",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index to duplicate (1-based)"
+          }
+        },
+        "required": ["slide_index"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_slide_font",
+      "description": "Set font for all text in slide",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "font_name": {
+            "type": "string",
+            "description": "Font name (e.g., 'Arial', 'Times New Roman')"
+          }
+        },
+        "required": ["slide_index", "font_name"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.write_text",
+      "description": "Write text to textbox",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "content": {
+            "type": "string",
+            "description": "Text content"
+          },
+          "page_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Textbox index (0-based)"
+          },
+          "bold": {
+            "type": "boolean",
+            "description": "Bold text (default: false)"
+          },
+          "italic": {
+            "type": "boolean",
+            "description": "Italic text (default: false)"
+          },
+          "size": {
+            "type": "integer",
+            "description": "Font size"
+          },
+          "append": {
+            "type": "boolean",
+            "description": "Append to existing text (default: false)"
+          }
+        },
+        "required": ["content", "page_index", "box_index"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_style",
+      "description": "Set text style for textbox",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Textbox index (0-based)"
+          },
+          "bold": {
+            "type": "boolean",
+            "description": "Bold text"
+          },
+          "italic": {
+            "type": "boolean",
+            "description": "Italic text"
+          },
+          "underline": {
+            "type": "boolean",
+            "description": "Underline text"
+          }
+        },
+        "required": ["slide_index", "box_index"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.configure_auto_save",
+      "description": "Configure auto-save settings",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "enabled": {
+            "type": "boolean",
+            "description": "Enable auto-save"
+          },
+          "interval_minutes": {
+            "type": "number",
+            "description": "Auto-save interval in minutes (min: 1)"
+          }
+        },
+        "required": ["enabled", "interval_minutes"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_background_color",
+      "description": "Set textbox background color",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Textbox index (0-based)"
+          },
+          "color": {
+            "type": "string",
+            "description": "Color name or hex code"
+          }
+        },
+        "required": ["slide_index", "box_index", "color"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_text_color",
+      "description": "Set text color for textbox",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Textbox index (0-based)"
+          },
+          "color": {
+            "type": "string",
+            "description": "Color name or hex code"
+          }
+        },
+        "required": ["slide_index", "box_index", "color"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.delete_content",
+      "description": "Delete textbox from slide",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Textbox index (0-based)"
+          }
+        },
+        "required": ["slide_index", "box_index"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_slide_orientation",
+      "description": "Set slide orientation",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "orientation": {
+            "type": "string",
+            "description": "Slide orientation",
+            "enum": ["portrait", "landscape"]
+          }
+        },
+        "required": ["orientation"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.position_box",
+      "description": "Position textbox or image on slide",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Box index (0-based)"
+          },
+          "position": {
+            "type": "string",
+            "description": "Position on slide",
+            "enum": ["left", "right", "center", "top", "bottom", "top-left", "top-right", "bottom-left", "bottom-right"]
+          }
+        },
+        "required": ["slide_index", "box_index", "position"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.insert_file",
+      "description": "Insert video or audio file",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "file_path": {
+            "type": "string",
+            "description": "File path"
+          },
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "position": {
+            "type": "object",
+            "description": "Position coordinates",
+            "properties": {
+              "x": {
+                "type": "number",
+                "description": "X position (% of slide width)"
+              },
+              "y": {
+                "type": "number",
+                "description": "Y position (% of slide height)"
+              }
+            }
+          },
+          "size": {
+            "type": "object",
+            "description": "Size dimensions",
+            "properties": {
+              "width": {
+                "type": "number",
+                "description": "Width (% of slide width)"
+              },
+              "height": {
+                "type": "number",
+                "description": "Height (% of slide height)"
+              }
+            }
+          },
+          "autoplay": {
+            "type": "boolean",
+            "description": "Auto-play media"
+          }
+        },
+        "required": ["file_path"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_slide_background",
+      "description": "Set slide background color or image",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based). If not provided, applies to all slides"
+          },
+          "color": {
+            "type": "string",
+            "description": "Background color"
+          },
+          "image_path": {
+            "type": "string",
+            "description": "Background image path (overrides color)"
+          }
+        },
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.save_as",
+      "description": "Save document to specified location",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "file_path": {
+            "type": "string",
+            "description": "File save path with filename and extension"
+          },
+          "overwrite": {
+            "type": "boolean",
+            "description": "Overwrite existing file (default: false)"
+          }
+        },
+        "required": ["file_path"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.insert_image",
+      "description": "Insert image to slide",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "image_path": {
+            "type": "string",
+            "description": "Image file path"
+          },
+          "width": {
+            "type": "number",
+            "description": "Image width in cm"
+          },
+          "height": {
+            "type": "number",
+            "description": "Image height in cm"
+          },
+          "position": {
+            "type": "object",
+            "description": "Position coordinates",
+            "properties": {
+              "x": {
+                "type": "number",
+                "description": "X position (% of slide width)"
+              },
+              "y": {
+                "type": "number",
+                "description": "Y position (% of slide height)"
+              }
+            }
+          }
+        },
+        "required": ["slide_index", "image_path"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.configure_display_settings",
+      "description": "Configure presentation display settings",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "use_presenter_view": {
+            "type": "boolean",
+            "description": "Use presenter view"
+          },
+          "primary_monitor_only": {
+            "type": "boolean",
+            "description": "Use primary monitor only"
+          },
+          "monitor_for_presentation": {
+            "type": "integer",
+            "description": "Monitor number for presentation"
+          }
+        },
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_slide_number_color",
+      "description": "Set slide number color",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "color": {
+            "type": "string",
+            "description": "Color name or hex code"
+          }
+        },
+        "required": ["color"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_text_strikethrough",
+      "description": "Apply strikethrough formatting to text",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Textbox index (0-based)"
+          },
+          "line_numbers": {
+            "type": "array",
+            "items": {
+              "type": "integer"
+            },
+            "description": "Line numbers for strikethrough (1-based)"
+          },
+          "apply": {
+            "type": "boolean",
+            "description": "Apply or remove strikethrough"
+          }
+        },
+        "required": ["slide_index", "box_index", "line_numbers", "apply"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.set_textbox_alignment",
+      "description": "Set text alignment for textbox",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "slide_index": {
+            "type": "integer",
+            "description": "Slide index (1-based)"
+          },
+          "box_index": {
+            "type": "integer",
+            "description": "Textbox index (0-based)"
+          },
+          "alignment": {
+            "type": "string",
+            "description": "Text alignment",
+            "enum": ["left", "center", "right", "justify"]
+          }
+        },
+        "required": ["slide_index", "box_index", "alignment"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "ImpressTools.export_to_image",
+      "description": "Export presentation or slide to image",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "file_path": {
+            "type": "string",
+            "description": "Image save path with filename and extension"
+          },
+          "format": {
+            "type": "string",
+            "description": "Image format",
+            "enum": ["png", "jpeg", "jpg", "gif", "bmp", "tiff"]
+          },
+          "slide_index": {
+            "type": "integer",
+            "description": "Specific slide index (1-based). If not provided, exports all slides"
+          }
+        },
+        "required": ["file_path", "format"]
+      }
+    }
+  }
+]
--- a/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json
+++ b/mm_agents/autoglm_v/tools/apis/libreoffice_writer.json
@ -0,0 +1,412 @@
+[
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.save",
+      "description": "Save document to current location",
+      "parameters": {
+        "type": "object",
+        "properties": {},
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.write_text",
+      "description": "Write text at cursor position",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "text": {
+            "type": "string",
+            "description": "Text to write"
+          },
+          "bold": {
+            "type": "boolean",
+            "description": "Apply bold formatting"
+          },
+          "italic": {
+            "type": "boolean",
+            "description": "Apply italic formatting"
+          },
+          "size": {
+            "type": "number",
+            "description": "Font size"
+          }
+        },
+        "required": ["text"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.set_color",
+      "description": "Change text color using regex pattern",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "pattern": {
+            "type": "string",
+            "description": "Regex pattern to match"
+          },
+          "color": {
+            "type": "number",
+            "description": "Hex color code (e.g., 0x000000)"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["pattern", "color"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.find_and_replace",
+      "description": "Find and replace text using regex",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "pattern": {
+            "type": "string",
+            "description": "Regex pattern to find"
+          },
+          "replacement": {
+            "type": "string",
+            "description": "Replacement text"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["pattern", "replacement"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.set_font",
+      "description": "Change font family",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "font_name": {
+            "type": "string",
+            "description": "Font name (e.g., 'Arial', 'Times New Roman')"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["font_name"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.set_line_spacing",
+      "description": "Set line spacing",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "spacing_value": {
+            "type": "number",
+            "description": "Spacing value (1.0=single, 2.0=double)"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["spacing_value"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.remove_highlighting",
+      "description": "Remove text highlighting",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.find_highlighted_text",
+      "description": "Find text with specific highlight color",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "highlight_color": {
+            "type": "string",
+            "description": "Color name (e.g., 'yellow') or hex code"
+          }
+        },
+        "required": ["highlight_color"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.insert_formula_at_cursor",
+      "description": "Insert formula at cursor",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "formula": {
+            "type": "string",
+            "description": "Formula to insert"
+          }
+        },
+        "required": ["formula"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.insert_image_at_cursor",
+      "description": "Insert image at cursor",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "image_path": {
+            "type": "string",
+            "description": "Full path to image file"
+          },
+          "width": {
+            "type": "integer",
+            "description": "Display width in pixels"
+          },
+          "height": {
+            "type": "integer",
+            "description": "Display height in pixels"
+          }
+        },
+        "required": ["image_path"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.set_strikethrough",
+      "description": "Apply strikethrough formatting",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "pattern": {
+            "type": "string",
+            "description": "Regex pattern to match"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["pattern"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.set_font_size",
+      "description": "Change font size",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "font_size": {
+            "type": "number",
+            "description": "Font size in points"
+          },
+          "pattern": {
+            "type": "string",
+            "description": "Regex pattern to match"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["font_size", "pattern"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.export_to_pdf",
+      "description": "Export document to PDF",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "output_path": {
+            "type": "string",
+            "description": "PDF save path"
+          },
+          "output_filename": {
+            "type": "string",
+            "description": "PDF filename"
+          },
+          "include_comments": {
+            "type": "boolean",
+            "description": "Include comments in PDF"
+          },
+          "quality": {
+            "type": "string",
+            "description": "Export quality ('standard', 'high', 'print')"
+          }
+        },
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.set_paragraph_alignment",
+      "description": "Set paragraph alignment",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "alignment": {
+            "type": "string",
+            "description": "Alignment type ('left', 'center', 'right', 'justify')"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["alignment"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.capitalize_words",
+      "description": "Capitalize first letter of each word",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.set_default_font",
+      "description": "Set default font for new text",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "font_name": {
+            "type": "string",
+            "description": "Default font name"
+          },
+          "font_size": {
+            "type": "number",
+            "description": "Default font size in points"
+          }
+        },
+        "required": ["font_name"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.add_page_numbers",
+      "description": "Add page numbers",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "position": {
+            "type": "string",
+            "description": "Position ('bottom_left', 'bottom_center', 'bottom_right', 'top_left', 'top_center', 'top_right')"
+          },
+          "start_number": {
+            "type": "integer",
+            "description": "Starting page number"
+          },
+          "format": {
+            "type": "string",
+            "description": "Number format (e.g., '1', 'Page 1', '1 of N')"
+          }
+        },
+        "required": ["position"]
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.insert_page_break",
+      "description": "Insert page break",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "position": {
+            "type": "string",
+            "description": "Insert location ('at_cursor', 'end_of_document')"
+          }
+        },
+        "required": []
+      }
+    }
+  },
+  {
+    "type": "function",
+    "function": {
+      "name": "WriterTools.change_text_case",
+      "description": "Change text case",
+      "parameters": {
+        "type": "object",
+        "properties": {
+          "case_type": {
+            "type": "string",
+            "description": "Case type ('lowercase', 'uppercase')"
+          },
+          "pattern": {
+            "type": "string",
+            "description": "Regex pattern to match"
+          },
+          "paragraph_indices": {
+            "type": "array",
+            "description": "Target paragraph indices (0-based). Applies to all if omitted"
+          }
+        },
+        "required": ["case_type", "pattern"]
+      }
+    }
+  }
+]
--- a/mm_agents/autoglm_v/tools/apis/vlc.json
+++ b/mm_agents/autoglm_v/tools/apis/vlc.json
@ -0,0 +1,166 @@
+[
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.get_playlist",
+            "description": "Get current playlist with track info",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function", 
+        "function": {
+            "name": "VLCTools.play",
+            "description": "Start playing current media",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.pause",
+            "description": "Pause current media",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.next",
+            "description": "Switch to next track",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.previous",
+            "description": "Switch to previous track",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.add_to_playlist",
+            "description": "Add media file to playlist",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "uri": {
+                        "type": "string",
+                        "description": "Media file URI (file:// or https://)"
+                    }
+                },
+                "required": ["uri"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.get_current_time",
+            "description": "Get current playback position in seconds",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.get_media_duration",
+            "description": "Get media duration in seconds",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.toggle_fullscreen",
+            "description": "Toggle or set fullscreen mode",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "enable": {
+                        "type": "boolean",
+                        "description": "Force fullscreen on/off, omit to toggle"
+                    }
+                },
+                "required": []
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.get_settings",
+            "description": "Get VLC settings",
+            "parameters": {
+                "type": "object",
+                "properties": {}
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.set_settings",
+            "description": "Set VLC settings",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "field": {
+                        "type": "string",
+                        "description": "Setting name (e.g. qt-max-volume, qt-minimal-view)"
+                    },
+                    "value": {
+                        "type": "string",
+                        "description": "Setting value (use 0/1 for booleans)"
+                    }
+                },
+                "required": ["field", "value"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "VLCTools.get_media_files",
+            "description": "Get media files from path",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Directory path"
+                    },
+                    "suffix": {
+                        "type": "array",
+                        "description": "File extensions, default: ['mp4','avi','mkv','mov','mp3','m4a','wav']"
+                    }
+                },
+                "required": ["path"]
+            }
+        }
+    }
+]
--- a/mm_agents/autoglm_v/tools/package/code.py
+++ b/mm_agents/autoglm_v/tools/package/code.py
@ -0,0 +1,260 @@
+import json
+import os
+import subprocess
+from pathlib import Path
+
+
+class CodeTools:
+    ret = ""
+
+    @classmethod
+    def print_result(cls):
+        """打印执行结果"""
+        print(cls.ret)
+
+    @classmethod
+    def launch_vscode(cls, path):
+        """
+        Launches Visual Studio Code with the specified file path or directory.
+        在存在的窗口中打开一个文件或目录。
+
+        Args:
+            path (str): 文件路径或目录。
+        """
+        try:
+            subprocess.run(["code", "-r", path], check=True)
+            cls.ret = "Successfully launched VS Code"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error launching VS Code: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def env_info(cls):
+        cls.ret = "None"
+
+    @classmethod
+    def compare_files(cls, file1, file2):
+        """
+        Compares two files in VSCode.
+        在VSCode中比较两个文件。
+
+        Args:
+            file1 (str): 第一个文件的路径。
+            file2 (str): 第二个文件的路径。
+        """
+        try:
+            # 获取compare结果
+            subprocess.run(["code", "-d", file1, file2], check=True)
+            cls.ret = "The compared files are opened in VSCode"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error comparing files: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def add_folder(cls, folder):
+        """
+        Adds a folder to the last active window in VSCode.
+        向VSCode的最后一个活动窗口添加文件夹。
+
+        Args:
+            folder (str): 文件夹路径。
+        """
+        try:
+            subprocess.run(["code", "-a", folder], check=True)
+            cls.ret = "Successfully added folder"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error adding folder: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def goto_file(cls, file_path, line=1, character=1):
+        """
+        Opens a file at a specific line and character position.
+        在特定行和字符的位置打开文件。
+
+        Args:
+            file_path (str): 文件路径。
+            line (int): 行号。
+            character (int): 字符位置。
+        """
+        try:
+            command = f"{file_path}:{line}:{character}"
+            subprocess.run(["code", "-g", command], check=True)
+            cls.ret = "Successfully opened file, line: {}, character: {}".format(line, character)
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error going to file: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def perform_merge(cls, path1, path2, base, result):
+        """
+        Perform a three-way merge.
+        执行三方合并。
+
+        Args:
+            path1 (str): 第一版本文件路径。
+            path2 (str): 第二版本文件路径。
+            base (str): 基础版本文件路径。
+            result (str): 结果文件的保存路径。
+        """
+        try:
+            subprocess.run(["code", "-m", path1, path2, base, result], check=True)
+            cls.ret = "Successfully performed merge"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error performing merge: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def remove_folder(cls, folder):
+        """
+        Removes a folder from the last active window in VSCode.
+        在VSCode的最后一个活动窗口中移除文件夹。
+
+        Args:
+            folder (str): 文件夹路径。
+        """
+        try:
+            subprocess.run(["code", "--remove", folder], check=True)
+            cls.ret = "Successfully removed folder"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error removing folder: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def install_extension(cls, extension_id, pre_release=False):
+        """
+        Installs an extension or updates it in VSCode.
+        安装或更新VSCode中的扩展。
+
+        Args:
+            extension_id (str): 扩展的标识符。
+            pre_release (bool): 是否安装预发布版本。
+        """
+        try:
+            command = ["code", "--install-extension", extension_id]
+            if pre_release:
+                command.append("--pre-release")
+            subprocess.run(command, check=True)
+            cls.ret = "Successfully installed extension"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error installing extension: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def uninstall_extension(cls, extension_id):
+        """
+        Uninstalls an extension from VSCode.
+        从VSCode中卸载扩展。
+
+        Args:
+            extension_id (str): 扩展的标识符。
+        """
+        try:
+            subprocess.run(["code", "--uninstall-extension", extension_id], check=True)
+            cls.ret = "Successfully uninstalled extension"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error uninstalling extension: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def list_extensions(cls, show_versions=False, category=None):
+        """
+        Lists installed extensions in VSCode.
+        列出VSCode中安装的扩展。
+
+        Args:
+            show_versions (bool): 是否显示扩展的版本。
+            category (str): 按类别筛选扩展。
+        """
+        try:
+            command = ["code", "--list-extensions"]
+            if show_versions:
+                command.append("--show-versions")
+            if category:
+                command.extend(["--category", category])
+            cls.ret = subprocess.run(command, check=True, capture_output=True, text=True).stdout
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error listing extensions: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def update_extensions(cls):
+        """
+        Updates all installed extensions in VSCode to the latest version.
+        更新VSCode中所有安装的扩展到最新版本。
+        """
+        try:
+            subprocess.run(["code", "--update-extensions"], check=True)
+            cls.ret = "Successfully updated extensions"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error updating extensions: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def disable_extension(cls, extension_id):
+        """
+        Disables a specific extension for the next instance of VSCode.
+        禁用在下一个VSCode窗口中的指定扩展。
+
+        Args:
+            extension_id (str): 扩展的标识符。
+        """
+        try:
+            subprocess.run(["code", "--disable-extension", extension_id], check=True)
+            cls.ret = "Successfully disabled extension"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error disabling extension: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
+
+    @classmethod
+    def toggle_sync(cls, state):
+        """
+        Toggles synchronization on or off in VSCode.
+        在VSCode中开启或关闭同步。
+
+        Args:
+            state (str): 'on' 或 'off' 表示开启或关闭。
+        """
+        try:
+            command = ["code", "--sync", state]
+            subprocess.run(command, check=True)
+            cls.ret = "Successfully toggled sync"
+        except subprocess.CalledProcessError as e:
+            cls.ret = f"Error toggling sync: {e}"
+        except Exception as e:
+            cls.ret = f"Unexpected error: {e}"
+
+        return cls.ret
--- a/mm_agents/autoglm_v/tools/package/google_chrome.py
+++ b/mm_agents/autoglm_v/tools/package/google_chrome.py
@ -0,0 +1,107 @@
+class BrowserTools:
+    ret = ""
+
+    @classmethod
+    def print_result(cls):
+        print(cls.ret)
+
+    @classmethod
+    def env_info(cls):
+        cls.ret = "None"
+
+    # @classmethod
+    # def show_all_tabs(cls):
+    #     cls.ret = "Browser not found"
+    #     for attempt in range(3):
+    #         with sync_playwright() as p:
+    #             try:
+    #                 browser = p.chromium.connect_over_cdp(cls.remote_debugging_url)
+    #                 if not browser:
+    #                     continue
+    #                 context = browser.contexts[0]
+    #                 # 获取所有窗口名称
+    #                 cls.ret = 'Browser Tabs: '
+    #                 for idx, page in enumerate(context.pages):
+    #                     cls.ret += f"{idx}. {page.title()} ({page.url})" + '\n'
+    #                 return cls.ret
+    #             except TimeoutError:
+    #                 cls.ret = 'Failed to get browser tabs'
+    #                 return None
+    #     return None
+
+    @classmethod
+    def open_profile_settings(cls):
+        """
+        Open the profile settings page in the browser.
+        """
+        return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/people"]}}
+
+    @classmethod
+    def open_password_settings(cls):
+        """
+        Open the password settings page in the browser.
+        """
+        return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/autofill"]}}
+
+    @classmethod
+    def open_privacy_settings(cls):
+        """
+        Open the privacy settings page in the browser.
+        """
+        return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/privacy"]}}
+
+    @classmethod
+    def open_appearance_settings(cls):
+        """
+        Open the appearance settings page in the browser.
+        """
+        return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/appearance"]}}
+
+    @classmethod
+    def open_search_engine_settings(cls):
+        """
+        Open the search engine settings page in the browser.
+        """
+        return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://settings/search"]}}
+
+    @classmethod
+    def bring_back_last_tab(cls):
+        """
+        Bring back the last tab in the browser.
+        """
+        return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 't'); print('Brought back last tab')"
+
+    @classmethod
+    def print(cls):
+        """
+        Open the print option in current page.
+        """
+        return f"import pyautogui; pyautogui.hotkey('ctrl', 'p'); print('Opened print option')"
+
+    @classmethod
+    def delete_browsing_data(cls):
+        """
+        Delete browsing data in the browser.
+        """
+        return f"import pyautogui; pyautogui.hotkey('ctrl', 'shift', 'del'); print('Deleted browsing data')"
+
+    @classmethod
+    def open_extensions(cls):
+        """
+        open the extensions page in the browser.
+        """
+        return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://extensions"]}}
+
+    @classmethod
+    def bookmark_page(cls):
+        """
+        Bookmark the current page in the browser.
+        """
+        return f"import pyautogui; pyautogui.hotkey('ctrl', 'd'); print('Bookmarked page')"
+
+    @classmethod
+    def open_bookmarks(cls):
+        """
+        Open the bookmarks page in the browser.
+        """
+        return {"action_type": "OPEN_CHROME_TAB", "parameters": {"urls_to_open": ["chrome://bookmarks"]}}
--- a/mm_agents/autoglm_v/tools/package/libreoffice_calc.py
+++ b/mm_agents/autoglm_v/tools/package/libreoffice_calc.py
--- a/mm_agents/autoglm_v/tools/package/libreoffice_impress.py
+++ b/mm_agents/autoglm_v/tools/package/libreoffice_impress.py
--- a/mm_agents/autoglm_v/tools/package/libreoffice_writer.py
+++ b/mm_agents/autoglm_v/tools/package/libreoffice_writer.py
@ -0,0 +1,753 @@
+import os
+import re
+
+import uno
+from com.sun.star.awt.FontSlant import ITALIC, NONE, OBLIQUE
+from com.sun.star.awt.FontWeight import BOLD, NORMAL
+from com.sun.star.beans import PropertyValue
+from com.sun.star.style.ParagraphAdjust import CENTER, LEFT, RIGHT
+from com.sun.star.text.ControlCharacter import PARAGRAPH_BREAK
+from com.sun.star.text.TextContentAnchorType import AS_CHARACTER
+
+
+class WriterTools:
+    localContext = uno.getComponentContext()
+    resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+    ctx = resolver.resolve("uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext")
+    desktop = ctx.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", ctx)
+    doc = desktop.getCurrentComponent()
+    text = doc.Text
+    cursor = text.createTextCursor()
+    ret = ""
+
+    @classmethod
+    def close_other_window(cls):
+        """关闭除当前文档外的所有文档"""
+        components = cls.desktop.getComponents().createEnumeration()
+        current_url = cls.doc.getURL()
+        while components.hasMoreElements():
+            doc = components.nextElement()
+            if doc.getURL() != current_url:
+                doc.close(True)
+
+    @classmethod
+    def save(cls):
+        """保存文档到当前位置"""
+        try:
+            if cls.doc.hasLocation():
+                cls.doc.store()
+            else:
+                raise Exception("文档没有保存位置，请使用另存为功能")
+            return True
+        except Exception as e:
+            return False
+
+    @classmethod
+    def maximize_window(cls):
+        """
+        将窗口设置为工作区最大尺寸
+        使用工作区域大小（考虑任务栏等）
+        """
+        window = cls.doc.getCurrentController().getFrame().getContainerWindow()
+        toolkit = window.getToolkit()
+        device = toolkit.createScreenCompatibleDevice(0, 0)
+        workarea = toolkit.getWorkArea()
+        window.setPosSize(workarea.X, workarea.Y, workarea.Width, workarea.Height, 15)
+
+    @classmethod
+    def print_result(cls):
+        print(cls.ret)
+
+    @classmethod
+    def write_text(cls, text, bold=False, italic=False, size=None):
+        """写入文本"""
+        cls.cursor.CharWeight = 150 if bold else 100
+        cls.cursor.CharPosture = ITALIC if italic else NONE
+        if size:
+            cls.cursor.CharHeight = size
+        cls.text.insertString(cls.cursor, text, False)
+        cls.ret = "Success"
+
+    @classmethod
+    def get_paragraphs(cls, start_index=0, count=None):
+        """Retrieves paragraphs from the document as a list."""
+        text = cls.doc.getText()
+        paragraphs = text.createEnumeration()
+        paragraph_list = []
+        while paragraphs.hasMoreElements():
+            paragraph = paragraphs.nextElement()
+            if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                paragraph_list.append(paragraph.getString())
+        if start_index < 0:
+            start_index = 0
+        elif start_index >= len(paragraph_list):
+            cls.ret = []
+        if count is not None:
+            end_index = min(start_index + count, len(paragraph_list))
+            cls.ret = paragraph_list[start_index:end_index]
+        else:
+            cls.ret = paragraph_list[start_index:]
+        return cls.ret
+
+    @classmethod
+    def env_info(cls):
+        paras = cls.get_paragraphs()
+        para_str = ""
+        for i, para in enumerate(paras):
+            para = para[:500] + "..." if len(para) > 500 else para
+            para_str += "Paragraph " + str(i) + ": " + para.strip() + "\n"
+        cls.ret = para_str
+        return cls.ret
+
+    @classmethod
+    def set_color(cls, pattern, color, paragraph_indices=None):
+        """
+        Changes the color of matched text in the document for specified paragraphs.
+
+        Args:
+            pattern (str): Regular expression pattern to match text
+            color (int): Hex color code (e.g., 0x000000 for black)
+            paragraph_indices (list, optional): List of paragraph indices to modify (0-based).
+                If None, applies to all paragraphs.
+        """
+        try:
+            enum = cls.doc.Text.createEnumeration()
+            paragraphs = []
+            while enum.hasMoreElements():
+                paragraphs.append(enum.nextElement())
+            if not paragraph_indices:
+                paragraphs_to_process = range(len(paragraphs))
+            else:
+                paragraphs_to_process = paragraph_indices
+            regex = re.compile(pattern)
+            for idx in paragraphs_to_process:
+                if idx < 0 or idx >= len(paragraphs):
+                    continue
+                paragraph = paragraphs[idx]
+                if not paragraph.supportsService("com.sun.star.text.Paragraph"):
+                    continue
+                para_text = paragraph.getString()
+                matches = regex.finditer(para_text)
+                for match in matches:
+                    para_cursor = cls.text.createTextCursorByRange(paragraph.getStart())
+                    para_cursor.goRight(match.start(), False)
+                    para_cursor.goRight(match.end() - match.start(), True)
+                    para_cursor.CharColor = color
+            cls.ret = "Success"
+            return True
+        except Exception as e:
+            cls.ret = f"Error: {str(e)}"
+            return False
+
+    @classmethod
+    def find_and_replace(cls, pattern, replacement, paragraph_indices=None):
+        """
+        Finds all occurrences of a specified text pattern and replaces them with another text in the document.
+
+        Args:
+            pattern (str): The pattern to match in the document, should be a regular expression
+            replacement (str): The text to replace the found text with
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing)
+
+        Returns:
+            str: Success message with number of replacements made
+        """
+        try:
+            enum = cls.doc.Text.createEnumeration()
+            paragraphs = []
+            while enum.hasMoreElements():
+                paragraphs.append(enum.nextElement())
+            total_replacements = 0
+            if not paragraph_indices:
+                paragraphs_to_process = list(range(len(paragraphs)))
+            else:
+                paragraphs_to_process = [i for i in paragraph_indices if 0 <= i < len(paragraphs)]
+            regex = re.compile(pattern)
+            for idx in paragraphs_to_process:
+                if idx >= len(paragraphs):
+                    continue
+                paragraph = paragraphs[idx]
+                if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                    text_content = paragraph.getString()
+                    new_text, count = regex.subn(replacement, text_content)
+                    if count > 0:
+                        paragraph.setString(new_text)
+                        total_replacements += count
+            cls.ret = f"Successfully made {total_replacements} replacements"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error during find and replace: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def set_font(cls, font_name, paragraph_indices=None):
+        """
+        Changes the font of text in the document or specified paragraphs.
+
+        Args:
+            font_name (str): The name of the font to apply (e.g., 'Times New Roman', 'Arial', 'Calibri')
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+                                             If not provided, applies to all paragraphs.
+        """
+        try:
+            text = cls.doc.getText()
+            enum = text.createEnumeration()
+            paragraphs = []
+            while enum.hasMoreElements():
+                paragraphs.append(enum.nextElement())
+            if not paragraph_indices:
+                paragraph_indices = range(len(paragraphs))
+            for idx in paragraph_indices:
+                if 0 <= idx < len(paragraphs):
+                    paragraph = paragraphs[idx]
+                    cursor = text.createTextCursorByRange(paragraph)
+                    cursor.CharFontName = font_name
+            cls.ret = "Success"
+            return True
+        except Exception as e:
+            cls.ret = f"Error: {str(e)}"
+            return False
+
+    @classmethod
+    def set_line_spacing(cls, spacing_value, paragraph_indices=None):
+        """
+        Sets the line spacing for specified paragraphs in the document.
+
+        Args:
+            spacing_value (float): The line spacing value to apply (1.0 for single spacing, 2.0 for double spacing, etc.)
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+                                            If not provided, applies to all paragraphs.
+        """
+        try:
+            text = cls.doc.getText()
+            paragraph_enum = text.createEnumeration()
+            line_spacing_value = int(spacing_value * 100)
+            current_index = 0
+
+            while paragraph_enum.hasMoreElements():
+                paragraph = paragraph_enum.nextElement()
+
+                if not paragraph_indices or current_index in paragraph_indices:
+                    line_spacing = uno.createUnoStruct("com.sun.star.style.LineSpacing")
+                    line_spacing.Mode = 0
+                    line_spacing.Height = line_spacing_value
+                    paragraph.ParaLineSpacing = line_spacing
+
+                if paragraph.String.strip():
+                    current_index += 1
+
+            cls.ret = "Success"
+            return True
+        except Exception as e:
+            cls.ret = f"Error: {str(e)}"
+            return False
+
+    @classmethod
+    def remove_highlighting(cls, paragraph_indices=None):
+        """
+        Removes ALL highlighting from text in the document for specified paragraphs.
+
+        Args:
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+                If not provided, applies to all paragraphs.
+
+        Returns:
+            str: Success message or error message
+        """
+        try:
+            text = cls.doc.getText()
+            paragraphs = text.createEnumeration()
+            target_indices = set(paragraph_indices) if paragraph_indices else None
+            current_index = 0
+
+            while paragraphs.hasMoreElements():
+                paragraph = paragraphs.nextElement()
+                if target_indices is None or current_index in target_indices:
+                    if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                        para_cursor = text.createTextCursorByRange(paragraph)
+                        # Remove all highlighting by setting back color to -1
+                        para_cursor.CharBackColor = -1
+
+                        # Additional cleanup for individual text portions (optional)
+                        text_portions = paragraph.createEnumeration()
+                        while text_portions.hasMoreElements():
+                            text_portion = text_portions.nextElement()
+                            if hasattr(text_portion, "CharBackColor"):
+                                portion_cursor = text.createTextCursorByRange(text_portion)
+                                portion_cursor.CharBackColor = -1
+                current_index += 1
+
+            cls.ret = "Successfully removed all highlighting"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error removing highlighting: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def find_highlighted_text(cls, highlight_color):
+        """
+        Finds all text in the document that has a specific highlight color applied to it.
+
+        Args:
+            highlight_color (str): The highlight color to search for. Can be a color name (e.g., 'yellow', 'green') or hex code.
+
+        Returns:
+            list: A list of strings containing all text segments with the specified highlight color.
+        """
+        color_map = {
+            "yellow": 16776960,
+            "green": 65280,
+            "blue": 255,
+            "red": 16711680,
+            "cyan": 65535,
+            "magenta": 16711935,
+            "black": 0,
+            "white": 16777215,
+            "gray": 8421504,
+            "lightgray": 12632256,
+        }
+        target_color = None
+        if highlight_color.lower() in color_map:
+            target_color = color_map[highlight_color.lower()]
+        elif highlight_color.startswith("#") and len(highlight_color) == 7:
+            try:
+                hex_color = highlight_color[1:]
+                r = int(hex_color[0:2], 16)
+                g = int(hex_color[2:4], 16)
+                b = int(hex_color[4:6], 16)
+                target_color = (r << 16) + (g << 8) + b
+            except ValueError:
+                cls.ret = f"Invalid hex color format: {highlight_color}"
+                return []
+        else:
+            cls.ret = f"Unsupported color format: {highlight_color}"
+            return []
+        highlighted_text = []
+        text = cls.doc.getText()
+        enum_paragraphs = text.createEnumeration()
+        while enum_paragraphs.hasMoreElements():
+            paragraph = enum_paragraphs.nextElement()
+            if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                enum_portions = paragraph.createEnumeration()
+                while enum_portions.hasMoreElements():
+                    text_portion = enum_portions.nextElement()
+                    if hasattr(text_portion, "CharBackColor") and text_portion.CharBackColor == target_color:
+                        if text_portion.getString().strip():
+                            highlighted_text.append(text_portion.getString())
+        cls.ret = f"Found {len(highlighted_text)} text segments with highlight color {highlight_color}"
+        return highlighted_text
+
+    @classmethod
+    def insert_formula_at_cursor(cls, formula):
+        """
+        Inserts a formula at the current cursor position in the document.
+
+        Args:
+            formula (str): The formula to insert at the current cursor position.
+
+        Returns:
+            bool: True if successful, False otherwise
+        """
+        try:
+            embedded_obj = cls.doc.createInstance("com.sun.star.text.TextEmbeddedObject")
+            embedded_obj.setPropertyValue("CLSID", "078B7ABA-54FC-457F-8551-6147e776a997")
+            embedded_obj.setPropertyValue("AnchorType", AS_CHARACTER)
+            cls.text.insertTextContent(cls.cursor, embedded_obj, False)
+            math_obj = embedded_obj.getEmbeddedObject()
+            math_obj.Formula = formula
+            cls.ret = "Formula inserted successfully"
+            return True
+        except Exception as e:
+            cls.ret = f"Error inserting formula: {str(e)}"
+            return False
+
+    @classmethod
+    def insert_image_at_cursor(cls, image_path, width=None, height=None):
+        """
+        Inserts an image at the current cursor position in the document.
+
+        Args:
+            image_path (str): Full path to the image file to insert
+            width (int, optional): Width to display the image in pixels
+            height (int, optional): Height to display the image in pixels
+
+        Returns:
+            str: Success message or error message
+        """
+        try:
+            if image_path.startswith("~"):
+                image_path = os.path.expanduser(image_path)
+            if not os.path.exists(image_path):
+                cls.ret = f"Error: Image file not found at {image_path}"
+                return cls.ret
+            image_path = os.path.abspath(image_path)
+            if os.name == "nt":
+                file_url = "file:///" + image_path.replace("\\", "/")
+            else:
+                file_url = "file://" + image_path
+            graphic = cls.doc.createInstance("com.sun.star.text.GraphicObject")
+            graphic.GraphicURL = file_url
+            graphic.AnchorType = AS_CHARACTER
+            if width is not None:
+                graphic.Width = width * 100
+            if height is not None:
+                graphic.Height = height * 100
+            cls.text.insertTextContent(cls.cursor, graphic, False)
+            cls.ret = "Success: Image inserted"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def set_strikethrough(cls, pattern, paragraph_indices=None):
+        """
+        Sets the strikethrough formatting for text matching the specified pattern in the document.
+
+        Args:
+            pattern (str): The regular expression pattern to match in the document
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+                                               If not provided, applies to all paragraphs.
+
+        Returns:
+            str: Success message or error information
+        """
+        try:
+            paragraphs = cls.doc.getText().createEnumeration()
+            para_index = 0
+            found_matches = 0
+            while paragraphs.hasMoreElements():
+                paragraph = paragraphs.nextElement()
+                if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                    if paragraph_indices and para_index not in paragraph_indices:
+                        para_index += 1
+                        continue
+                    para_text = paragraph.getString()
+                    matches = list(re.finditer(pattern, para_text))
+                    for match in matches:
+                        text_range = paragraph.getStart()
+                        cursor = cls.doc.getText().createTextCursorByRange(text_range)
+                        cursor.goRight(match.start(), False)
+                        cursor.goRight(match.end() - match.start(), True)
+                        cursor.CharStrikeout = 1
+                        found_matches += 1
+                    para_index += 1
+            cls.ret = f"Successfully applied strikethrough to {found_matches} matches of pattern: {pattern}"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error applying strikethrough: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def set_font_size(cls, font_size, pattern, paragraph_indices=None):
+        """
+        Changes the font size of specified text in the document.
+
+        Args:
+            font_size (float): The font size to apply (in points).
+            pattern (str): The pattern to match in the document, should be a regular expression.
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+                                               If not provided, applies to all paragraphs.
+
+        Returns:
+            str: Result message indicating success or failure.
+        """
+        try:
+            regex = re.compile(pattern)
+            paragraphs = cls.doc.getText().createEnumeration()
+            current_index = 0
+            while paragraphs.hasMoreElements():
+                paragraph = paragraphs.nextElement()
+                if paragraph_indices and current_index not in paragraph_indices:
+                    current_index += 1
+                    continue
+                if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                    para_cursor = cls.text.createTextCursorByRange(paragraph)
+                    para_text = paragraph.getString()
+                    matches = list(regex.finditer(para_text))
+                    for match in reversed(matches):
+                        start_pos = match.start()
+                        end_pos = match.end()
+                        para_cursor.gotoStart(False)
+                        para_cursor.goRight(start_pos, False)
+                        para_cursor.goRight(end_pos - start_pos, True)
+                        para_cursor.CharHeight = font_size
+                current_index += 1
+            cls.ret = f"Successfully changed font size to {font_size} for text matching '{pattern}'"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error changing font size: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def export_to_pdf(cls, output_path=None, output_filename=None, include_comments=False, quality="standard"):
+        """
+        Exports the current document to PDF format.
+
+        Args:
+            output_path (str, optional): The full path where the PDF should be saved.
+                If not provided, uses the same location as the original document.
+            output_filename (str, optional): The filename to use for the PDF.
+                If not provided, uses the original document's filename with .pdf extension.
+            include_comments (bool, optional): Whether to include comments in the exported PDF.
+                Defaults to False.
+            quality (str, optional): The quality of the PDF export ('standard', 'high', 'print').
+                Defaults to 'standard'.
+
+        Returns:
+            str: Path to the exported PDF file or error message
+        """
+        try:
+            doc_url = cls.doc.getURL()
+            if not doc_url and not output_path:
+                return "Error: Document has not been saved and no output path provided"
+            if doc_url:
+                doc_path = uno.fileUrlToSystemPath(os.path.dirname(doc_url))
+                doc_filename = os.path.basename(doc_url)
+                doc_name = os.path.splitext(doc_filename)[0]
+            else:
+                doc_path = ""
+                doc_name = "export"
+            final_path = output_path if output_path else doc_path
+            final_filename = output_filename if output_filename else f"{doc_name}.pdf"
+            if not final_filename.lower().endswith(".pdf"):
+                final_filename += ".pdf"
+            full_output_path = os.path.join(final_path, final_filename)
+            output_url = uno.systemPathToFileUrl(full_output_path)
+            export_props = []
+            if quality == "high":
+                export_props.append(PropertyValue(Name="SelectPdfVersion", Value=1))
+            elif quality == "print":
+                export_props.append(PropertyValue(Name="SelectPdfVersion", Value=2))
+            else:
+                export_props.append(PropertyValue(Name="SelectPdfVersion", Value=0))
+            export_props.append(PropertyValue(Name="ExportNotes", Value=include_comments))
+            export_props.extend(
+                [
+                    PropertyValue(Name="FilterName", Value="writer_pdf_Export"),
+                    PropertyValue(Name="Overwrite", Value=True),
+                ]
+            )
+            cls.doc.storeToURL(output_url, tuple(export_props))
+            cls.ret = f"PDF exported to: {full_output_path}"
+            return full_output_path
+        except Exception as e:
+            cls.ret = f"Error exporting to PDF: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def set_paragraph_alignment(cls, alignment, paragraph_indices=None):
+        """
+        Sets the text alignment for specified paragraphs in the document.
+
+        Args:
+            alignment (str): The alignment to apply ('left', 'center', 'right', 'justify').
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+                                               If not provided, applies to all paragraphs.
+
+        Returns:
+            str: Success message or error message
+        """
+        try:
+            alignment_map = {"left": LEFT, "center": CENTER, "right": RIGHT, "justify": 3}
+            if alignment.lower() not in alignment_map:
+                cls.ret = f"Error: Invalid alignment '{alignment}'. Use 'left', 'center', 'right', or 'justify'."
+                return cls.ret
+            alignment_value = alignment_map[alignment.lower()]
+            text = cls.doc.getText()
+            paragraph_enum = text.createEnumeration()
+            paragraphs = []
+            while paragraph_enum.hasMoreElements():
+                paragraph = paragraph_enum.nextElement()
+                if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                    paragraphs.append(paragraph)
+            if paragraph_indices:
+                valid_indices = [i for i in paragraph_indices if 0 <= i < len(paragraphs)]
+                if len(valid_indices) != len(paragraph_indices):
+                    cls.ret = f"Warning: Some paragraph indices were out of range (0-{len(paragraphs) - 1})"
+                for idx in valid_indices:
+                    paragraphs[idx].ParaAdjust = alignment_value
+            else:
+                for paragraph in paragraphs:
+                    paragraph.ParaAdjust = alignment_value
+            cls.ret = f"Successfully applied '{alignment}' alignment to paragraphs"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error setting paragraph alignment: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def capitalize_words(cls, paragraph_indices=None):
+        """
+        Capitalizes the first letter of each word for specified paragraphs in the document.
+
+        Args:
+            paragraph_indices (list, optional): Indices of paragraphs to modify (0-based indexing).
+                                               If not provided, applies to all paragraphs.
+
+        Returns:
+            str: Success message or error message
+        """
+        try:
+            text = cls.doc.getText()
+            enum = text.createEnumeration()
+            paragraphs = []
+            while enum.hasMoreElements():
+                paragraph = enum.nextElement()
+                if paragraph.supportsService("com.sun.star.text.Paragraph"):
+                    paragraphs.append(paragraph)
+            if not paragraph_indices:
+                target_paragraphs = list(range(len(paragraphs)))
+            else:
+                target_paragraphs = paragraph_indices
+            valid_indices = [idx for idx in target_paragraphs if 0 <= idx < len(paragraphs)]
+            for idx in valid_indices:
+                paragraph = paragraphs[idx]
+                text_content = paragraph.getString()
+                if not text_content.strip():
+                    continue
+                capitalized_text = " ".join(word.capitalize() if word else "" for word in text_content.split(" "))
+                para_cursor = text.createTextCursorByRange(paragraph.getStart())
+                para_cursor.gotoRange(paragraph.getEnd(), True)
+                para_cursor.setString(capitalized_text)
+            cls.ret = f"Successfully capitalized words in {len(valid_indices)} paragraphs"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error capitalizing words: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def set_default_font(cls, font_name, font_size=None):
+        """
+        Sets the default font for new text in the document without changing existing text.
+
+        Args:
+            font_name (str): The name of the font to set as default (e.g., 'Times New Roman', 'Arial', 'Calibri')
+            font_size (float, optional): The default font size in points.
+
+        Returns:
+            str: Success message or error message
+        """
+        try:
+            style_families = cls.doc.getStyleFamilies()
+            paragraph_styles = style_families.getByName("ParagraphStyles")
+            default_style_names = ["Default", "Standard", "Normal"]
+            standard_style = None
+            for style_name in default_style_names:
+                if paragraph_styles.hasByName(style_name):
+                    standard_style = paragraph_styles.getByName(style_name)
+                    break
+            if standard_style is None:
+                style_names = paragraph_styles.getElementNames()
+                if style_names:
+                    standard_style = paragraph_styles.getByName(style_names[0])
+                else:
+                    raise Exception("Could not find default paragraph style")
+            standard_style.setPropertyValue("CharFontName", font_name)
+            standard_style.setPropertyValue("CharFontNameAsian", font_name)
+            standard_style.setPropertyValue("CharFontNameComplex", font_name)
+            if font_size is not None:
+                standard_style.setPropertyValue("CharHeight", float(font_size))
+                standard_style.setPropertyValue("CharHeightAsian", float(font_size))
+                standard_style.setPropertyValue("CharHeightComplex", float(font_size))
+            cls.cursor.setPropertyValue("CharFontName", font_name)
+            cls.cursor.setPropertyValue("CharFontNameAsian", font_name)
+            cls.cursor.setPropertyValue("CharFontNameComplex", font_name)
+            if font_size is not None:
+                cls.cursor.setPropertyValue("CharHeight", float(font_size))
+                cls.cursor.setPropertyValue("CharHeightAsian", float(font_size))
+                cls.cursor.setPropertyValue("CharHeightComplex", float(font_size))
+            cls.ret = f"Default font set to '{font_name}'" + (f" with size {font_size}pt" if font_size else "")
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error setting default font: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def add_page_numbers(cls, position, start_number=1, format=None):
+        """
+        Adds page numbers to the document at the specified position.
+
+        Args:
+            position (str): Position of the page numbers ('bottom_left', 'bottom_center', 'bottom_right',
+                            'top_left', 'top_center', 'top_right')
+            start_number (int, optional): The starting page number. Defaults to 1.
+            format (str, optional): Format of the page numbers (e.g., '1', 'Page 1', '1 of N').
+                                   Defaults to simple number format.
+
+        Returns:
+            str: Success message or error message
+        """
+        try:
+            page_styles = cls.doc.StyleFamilies.getByName("PageStyles")
+            default_style = page_styles.getByName("Standard")
+            try:
+                default_style.setPropertyValue("PageNumberOffset", start_number)
+            except:
+                pass
+            if position.startswith("top"):
+                default_style.HeaderIsOn = True
+                target = default_style.HeaderText
+            else:
+                default_style.FooterIsOn = True
+                target = default_style.FooterText
+            cursor = target.createTextCursor()
+            cursor.gotoStart(False)
+            cursor.gotoEnd(True)
+            cursor.setString("")
+            cursor.gotoStart(False)
+            if position.endswith("_left"):
+                cursor.ParaAdjust = LEFT
+            elif position.endswith("_center"):
+                cursor.ParaAdjust = CENTER
+            elif position.endswith("_right"):
+                cursor.ParaAdjust = RIGHT
+            if not format or format == "1":
+                page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+                page_number.NumberingType = 4
+                target.insertTextContent(cursor, page_number, False)
+            elif format == "Page 1" or "Page" in format and "of" not in format:
+                target.insertString(cursor, "Page ", False)
+                page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+                page_number.NumberingType = 4
+                target.insertTextContent(cursor, page_number, False)
+            elif format == "1 of N" or format == "Page {page} of {total}" or "of" in format:
+                if "Page" in format:
+                    target.insertString(cursor, "Page ", False)
+                page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+                page_number.NumberingType = 4
+                target.insertTextContent(cursor, page_number, False)
+                target.insertString(cursor, " of ", False)
+                page_count = cls.doc.createInstance("com.sun.star.text.TextField.PageCount")
+                page_count.NumberingType = 4
+                target.insertTextContent(cursor, page_count, False)
+            else:
+                page_number = cls.doc.createInstance("com.sun.star.text.TextField.PageNumber")
+                page_number.NumberingType = 4
+                target.insertTextContent(cursor, page_number, False)
+            cls.ret = "Successfully added page numbers"
+            return cls.ret
+        except Exception as e:
+            cls.ret = f"Error adding page numbers: {str(e)}"
+            return cls.ret
+
+    @classmethod
+    def insert_page_break(cls, position="at_cursor"):
+        """
+        Inserts a page break at the specified position.
+
+        Args:
+            position (str): Where to insert the page break: 'at_cursor' for current cursor position,
+                           'end_of_document' for end of document. Defaults to 'at_cursor'.
+        """
+        try:
+            if position == "end_of_document":
+                cls.cursor.gotoEnd(False)
+            cls.text.insertControlCharacter(cls.cursor, PARAGRAPH_BREAK, False)
+            cls.cursor.gotoStartOfParagraph(True)
+            cls.cursor.BreakType = uno.Enum("com.sun.star.style.BreakType", "PAGE_BEFORE")
+            cls.ret = "Page break inserted successfully"
+            return True
+        except Exception as e:
+            cls.ret = f"Error inserting page break: {str(e)}"
+            return False
--- a/mm_agents/autoglm_v/tools/package/vlc.py
+++ b/mm_agents/autoglm_v/tools/package/vlc.py
@ -0,0 +1,233 @@
+import json
+import os
+import re
+import xml.etree.ElementTree as ET
+from pathlib import Path
+from urllib.parse import quote
+
+import requests
+from requests.auth import HTTPBasicAuth
+
+
+class VLCTools:
+    host = "localhost"
+    port = 8080
+    base_url = f"http://{host}:{port}/requests"
+    password = "password"
+    auth = HTTPBasicAuth("", password)
+    ret = ""
+
+    @classmethod
+    def print_result(cls):
+        print(cls.ret)
+
+    @classmethod
+    def _make_request(cls, endpoint, params=None):
+        url = f"{cls.base_url}/{endpoint}"
+        try:
+            response = requests.get(url, params=params, auth=cls.auth)
+            response.raise_for_status()
+            return response
+        except requests.exceptions.RequestException as e:
+            return None
+
+    @classmethod
+    def _get_status(cls):
+        response = cls._make_request("status.xml")
+        if response:
+            return ET.fromstring(response.content)
+        return None
+
+    @classmethod
+    def env_info(cls):
+        cls.ret = "None"
+
+    @classmethod
+    def get_playlist(cls):
+        response = cls._make_request("playlist.xml")
+        if response:
+            info = ET.fromstring(response.content)
+            playlist_node = info.find('.//node[@name="Playlist"]')
+            if playlist_node is not None:
+                playlist_items = []
+                for leaf in playlist_node.findall("leaf"):
+                    item = {"name": leaf.get("name"), "uri": leaf.get("uri"), "duration": leaf.get("duration") + "s"}
+                    playlist_items.append(item)
+            cls.ret = f"Playlist: {playlist_items}"
+            return cls.ret
+        cls.ret = "Error getting playlist"
+        return None
+
+    @classmethod
+    def play(cls):
+        response = cls._make_request("status.xml", {"command": "pl_play"})
+        if response:
+            cls.ret = "Start playing the media"
+            return cls.ret
+        cls.ret = "Error playing the media"
+        return None
+
+    @classmethod
+    def pause(cls):
+        response = cls._make_request("status.xml", {"command": "pl_pause"})
+        if response:
+            cls.ret = "Pause the media"
+            return cls.ret
+        cls.ret = "Error pausing the media"
+        return None
+
+    @classmethod
+    def next(cls):
+        response = cls._make_request("status.xml", {"command": "pl_next"})
+        if response:
+            cls.ret = "Switch to next media"
+            return cls.ret
+        cls.ret = "Error switching to next media"
+        return None
+
+    @classmethod
+    def previous(cls):
+        response = cls._make_request("status.xml", {"command": "pl_previous"})
+        if response:
+            cls.ret = "Switch to previous media"
+            return cls.ret
+        cls.ret = "Error switching to previous media"
+        return None
+
+    @classmethod
+    def add_to_playlist(cls, uri):
+        if uri.startswith("http"):
+            encoded_uri = uri
+        else:
+            encoded_uri = "file://" + quote(uri.replace("file://", ""))
+
+        response = cls._make_request("status.xml", {"command": "in_play", "input": encoded_uri})
+        if response:
+            cls.ret = f"Add {uri} to playlist"
+            return cls.ret
+        cls.ret = f"Error adding {uri} to playlist"
+        return None
+
+    @classmethod
+    def get_current_time(cls):
+        status = cls._get_status()
+        if status is not None:
+            time = status.find("time")
+            cls.ret = int(time.text) if time is not None else None
+            return cls.ret
+        return None
+
+    @classmethod
+    def get_media_duration(cls):
+        status = cls._get_status()
+        if status is not None:
+            length = status.find("length")
+            if length is not None:
+                cls.ret = f"Media duration: {length.text} seconds"
+                return cls.ret
+        cls.ret = "Error getting media duration"
+        return None
+
+    @classmethod
+    def get_settings(cls):
+        settings = {}
+        with open(Path.home() / ".config/vlc/vlcrc", "r") as f:
+            for line in f:
+                if line:
+                    try:
+                        key, value = line.split("=")
+                        if key.strip().startswith("#"):
+                            continue
+                        settings[key.strip()] = value.strip()
+                    except:
+                        continue
+        cls.ret = json.dumps(settings, indent=4, ensure_ascii=False)
+        return cls.ret
+
+    @classmethod
+    def set_settings(cls, field, value):
+        with open(Path.home() / ".config/vlc/vlcrc", "r") as rf:
+            settings = rf.read()
+
+        # 正则表达式匹配settings中的field项并替换
+        pattern = re.compile(r"#? *" + re.escape(field) + r"=.*")
+        # 判断是否存在field项
+        if pattern.search(settings):
+            settings = pattern.sub(f"{field}={value}", settings)
+        else:
+            settings += f"{field}={value}\n"
+
+        with open(Path.home() / ".config/vlc/vlcrc", "w") as wf:
+            wf.write(settings)
+
+        cls.ret = f"Set {field} to {value}"
+        return cls.ret
+
+    @classmethod
+    def toggle_fullscreen(cls, enable=None):
+        """
+        Toggle fullscreen mode or set it explicitly based on the enable parameter.
+
+        Args:
+            enable (bool, optional): If provided, explicitly set fullscreen mode (True for fullscreen, False for windowed)
+
+        Returns:
+            str: Success or error message
+        """
+        if enable is not None:
+            command = "fullscreen" if enable else "fullscreen off"
+        else:
+            command = "fullscreen"
+        response = cls._make_request("status.xml", {"command": command})
+        if response:
+            action = "enabled" if enable is True else "disabled" if enable is False else "toggled"
+            cls.ret = f"Fullscreen mode {action}"
+            return cls.ret
+        cls.ret = "Error changing fullscreen mode"
+        return None
+
+    @classmethod
+    def get_media_files(cls, path, suffix=None):
+        """
+        Gets the media files for the specified path.
+
+        Args:
+            path (str): The path to the media files
+            suffix (List[str], optional): The suffix of the media files.
+                Defaults to ['mp4', 'avi', 'mkv', 'mov', 'mp3', 'm4a', 'wav']
+        """
+        # Set default suffix if not provided
+        if suffix is None:
+            suffix = ["mp4", "avi", "mkv", "mov", "mp3", "m4a", "wav"]
+
+        # Validate path
+        if not path:
+            cls.ret = "Path cannot be empty"
+            return None
+
+        if not os.path.exists(path):
+            cls.ret = f"Path not found: {path}"
+            return None
+
+        # Initialize result list
+        media_files = []
+
+        # Convert suffix list to lowercase for case-insensitive comparison
+        suffix = [s.lower() for s in suffix]
+
+        # Walk through directory
+        try:
+            for root, _, files in os.walk(path):
+                for file in files:
+                    # Check if file extension matches any of the specified suffixes
+                    if any(file.lower().endswith(f".{s}") for s in suffix):
+                        # Add full path of the file to results
+                        full_path = os.path.join(root, file)
+                        media_files.append(full_path)
+
+        except Exception as e:
+            cls.ret = f"Error while scanning directory: {str(e)}"
+            return None
+
+        cls.ret = media_files
+        return cls.ret
--- a/run_autoglm_v.py
+++ b/run_autoglm_v.py
@ -0,0 +1,608 @@
+"""Script to run end-to-end evaluation on the benchmark.
+Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py.
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+import math
+import ast
+import time
+import backoff
+import httpx
+import requests
+from openai import APIConnectionError, APIError, RateLimitError
+from requests.exceptions import SSLError
+from tqdm import tqdm
+
+import lib_run_single
+from desktop_env.desktop_env import MAX_RETRIES, DesktopEnv as DesktopEnvBase
+from mm_agents.autoglm_v import AutoGLMAgent
+from typing import Optional, Dict, Any
+from openai import OpenAI
+
+# Almost deprecated since it's not multi-env, use run_multienv_*.py instead
+
+#  Logger Configs {{{ #
+logger = logging.getLogger()
+logger.setLevel(logging.DEBUG)
+
+datetime_str: str = datetime.datetime.now().strftime("%Y%m%d@%H%M%S")
+
+file_handler = logging.FileHandler(os.path.join("logs", "normal-{:}.log".format(datetime_str)), encoding="utf-8")
+debug_handler = logging.FileHandler(os.path.join("logs", "debug-{:}.log".format(datetime_str)), encoding="utf-8")
+stdout_handler = logging.StreamHandler(sys.stdout)
+sdebug_handler = logging.FileHandler(os.path.join("logs", "sdebug-{:}.log".format(datetime_str)), encoding="utf-8")
+
+file_handler.setLevel(logging.INFO)
+debug_handler.setLevel(logging.DEBUG)
+stdout_handler.setLevel(logging.INFO)
+sdebug_handler.setLevel(logging.DEBUG)
+
+formatter = logging.Formatter(
+    fmt="\x1b[1;33m[%(asctime)s \x1b[31m%(levelname)s \x1b[32m%(module)s/%(lineno)d-%(processName)s\x1b[1;33m] \x1b[0m%(message)s"
+)
+file_handler.setFormatter(formatter)
+debug_handler.setFormatter(formatter)
+stdout_handler.setFormatter(formatter)
+sdebug_handler.setFormatter(formatter)
+
+stdout_handler.addFilter(logging.Filter("desktopenv"))
+sdebug_handler.addFilter(logging.Filter("desktopenv"))
+
+logger.addHandler(file_handler)
+logger.addHandler(debug_handler)
+logger.addHandler(stdout_handler)
+logger.addHandler(sdebug_handler)
+#  }}} Logger Configs #
+
+logger = logging.getLogger("desktopenv.experiment")
+
+
+def config() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run end-to-end evaluation on the benchmark")
+
+    # environment config
+    parser.add_argument("--path_to_vm", type=str)
+    parser.add_argument(
+        "--provider_name",
+        type=str,
+        default="docker",
+        help="Virtualization provider (vmware, docker, aws, azure, gcp, virtualbox)",
+    )
+    parser.add_argument("--headless", action="store_true", default=True, help="Run in headless machine")
+    parser.add_argument("--action_space", type=str, default="autoglm_computer_use", help="Action type")
+    parser.add_argument(
+        "--observation_type",
+        choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"],
+        default="a11y_tree",
+        help="Observation type",
+    )
+    parser.add_argument("--screen_width", type=int, default=1920)
+    parser.add_argument("--screen_height", type=int, default=1080)
+    parser.add_argument("--sleep_after_execution", type=float, default=1.0)
+    parser.add_argument("--max_steps", type=int, default=50)
+
+    # agent config
+    parser.add_argument("--max_trajectory_length", type=int, default=3)
+    parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples/examples")
+
+    # lm config
+    parser.add_argument("--model", type=str, default="autoglm-os")
+    parser.add_argument("--temperature", type=float, default=0.4)
+    parser.add_argument("--top_p", type=float, default=0.5)
+    parser.add_argument("--max_tokens", type=int, default=4096)
+    parser.add_argument("--stop_token", type=str, default=None)
+    parser.add_argument("--image_width", type=int, default=1280)
+    parser.add_argument("--image_height", type=int, default=720)
+
+    # example config
+    parser.add_argument("--domain", type=str, default="all")
+    parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json")
+
+    # aws config
+    parser.add_argument(
+        "--region", type=str, default="us-east-1", help="AWS region for the VM"
+    )
+    parser.add_argument(
+        "--client_password", type=str, default="", help="Client password"
+    )
+
+    # logging related
+    parser.add_argument("--result_dir", type=str, default="./results")
+    args = parser.parse_args()
+
+    return args
+
+
+class DesktopEnv(DesktopEnvBase):
+    def step(self, action, pause=2):
+        self._step_no += 1
+        self.action_history.append(action)
+        
+        # Mark environment as used when step is called
+        self.is_environment_used = True
+
+        reward = 0  # todo: Define reward calculation for each example
+        done = False  # todo: Define episode termination condition for each example
+        info = {}
+        logger.info(f"Step {self._step_no} in trajectory {self._traj_no} with action: {action}")
+
+        # handle the special actions
+        if action in ['WAIT', 'FAIL', 'DONE']:
+            if action == 'WAIT':
+                time.sleep(pause)
+                exe_result = 'Wait ' + str(pause) + ' seconds'
+            elif action == 'FAIL':
+                done = True
+                info = {"fail": True}
+                exe_result = 'Finish: fail'
+            elif action == 'DONE':
+                done = True
+                info = {"done": True}
+                exe_result = 'Finish: success'
+        elif type(action) == dict:
+            if action['action_type'] == 'OPEN_APP':
+                self.setup_controller._launch_setup(action['parameters']['launch_app_command'], shell=True)
+                exe_result = 'Open ' + action['parameters']['app_name']
+            elif action['action_type'] == 'OPEN_CHROME_TAB':
+                self.setup_controller._chrome_open_tabs_setup(action['parameters']['urls_to_open'])
+                exe_result = 'Open ' + str(action['parameters']['urls_to_open']) + ' in Chrome successfully'
+        else:
+            # the set of all possible python commands insides `pyautogui`
+            result = self.controller.execute_python_command(action)
+            try:
+                if result['error']:
+                    exe_result = result['error'].strip()
+                else:
+                    exe_result = result['output'].strip()
+            except Exception as e:
+                exe_result = 'Error Action: ' + action
+                logger.error(f"Error executing action: {e}")
+
+        time.sleep(pause)
+        observation = self._get_obs()
+        observation['exe_result'] = exe_result
+        
+        return observation, reward, done, info
+
+    def reset(self, task_config: Optional[Dict[str, Any]] = None, seed=None, options=None) -> Dict[str, Any]:
+        # Reset to certain task in OSWorld
+        logger.info("Resetting environment...")
+        logger.info("Switching task...")
+        logger.info("Setting counters...")
+        self._traj_no += 1
+        self._step_no = 0
+        self.action_history.clear()
+
+        for attempt in range(MAX_RETRIES):
+            # Only revert to snapshot if environment has been used (step/setup)
+            # This optimization is especially important for cloud providers like AWS
+            # where unnecessary snapshot operations are costly and time-consuming
+            
+            if task_config is not None:
+                # Only consider task proxy requirement if proxy is enabled at system level
+                task_use_proxy = task_config.get("proxy", False) and self.enable_proxy
+                if not self.enable_proxy and task_config.get("proxy", False):
+                    logger.info("Task requires proxy but proxy is disabled at system level, ignoring proxy requirement.")
+                
+                if task_use_proxy != self.current_use_proxy:
+                    # keep because get_info_from_website depend on this
+                    self.current_use_proxy = task_use_proxy
+            
+            if self.is_environment_used:
+                logger.info("Environment has been used, reverting to snapshot {}...".format(self.snapshot_name))
+                self._revert_to_snapshot()
+                logger.info("Starting emulator...")
+                self._start_emulator()
+                logger.info("Emulator started.")
+                # Reset the usage flag after reverting
+                self.is_environment_used = False
+            else:
+                logger.info("Environment is clean, skipping snapshot revert (provider: {}).".format(self.provider_name))
+
+            if task_config is not None:
+                if task_config.get("proxy", False) and self.enable_proxy:
+                    # If using proxy and proxy is enabled, set up the proxy configuration
+                    self.setup_controller._proxy_setup(self.client_password)
+                self._set_task_info(task_config)
+                self.setup_controller.reset_cache_dir(self.cache_dir)
+                logger.info("Setting up environment...")
+                success = self.setup_controller.setup(self.config, task_config.get("proxy", False) and self.enable_proxy)
+                if success:
+                    # Mark environment as used when setup is successfully executed
+                    if self.config:  # Only mark as used if there were actual setup operations
+                        self.is_environment_used = True
+                    break
+                else:
+                    logger.error(
+                        "Environment setup failed, retrying (%d/%d)...",
+                        attempt + 1,
+                        MAX_RETRIES,
+                    )
+                    time.sleep(5)
+            else:
+                break
+            
+        logger.info("Environment setup complete.")
+
+        # Upload tools from autoglm package
+        import mm_agents.autoglm_v
+        tool_dir = os.path.join(os.path.dirname(mm_agents.autoglm_v.__file__), 'tools', 'package')
+        for file in os.listdir(tool_dir):
+            if os.path.isdir(os.path.join(tool_dir, file)):
+                continue
+            self.setup_controller._upload_file_setup([{
+                "local_path": os.path.join(tool_dir, file),
+                "path": os.path.join('~', file)
+            }])
+
+        # start soffice service for office tools
+        self.setup_controller._launch_setup('soffice --accept="socket,host=localhost,port=2002;urp;" --norestore --nologo --nodefault', shell=True)
+        time.sleep(5)
+
+        observation = self._get_obs()
+        return observation
+
+    def get_current_apps(self):
+        apps_code = r"""import subprocess;
+command = "wmctrl -xl";
+apps = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip().split('\n');
+print(apps);"""
+        window_code = r"""import subprocess;
+command = "wmctrl -a :ACTIVE: -v 2>&1 | grep 'Using window' | awk '{print $3}'";
+window_id = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip();
+print(window_id);"""
+
+        apps = self.controller.execute_python_command(apps_code)['output'].strip()
+        apps = ast.literal_eval(apps)
+        app_list = {}
+        
+        for app in apps:
+            parts = app.split(maxsplit=4)
+            if len(parts) < 4:
+                continue
+            if parts[1] != '0':
+                continue
+            window_id = parts[0]
+            app_name = '.'.join(parts[2].split('.')[-(math.ceil(parts[2].count('.') / 2)):])
+            title = parts[3]
+            app_list[window_id] = {
+                'app_name': app_name,
+                'title': title
+            }
+        
+        cur_id = self.controller.execute_python_command(window_code)['output'].strip()
+
+        return app_list, cur_id
+
+    def maximize_window(self):
+        window_state = r"""import subprocess;
+command = "xprop -id $(xprop -root _NET_ACTIVE_WINDOW | awk -F' ' '{print $5}') _NET_WM_STATE"
+output = subprocess.run(command, shell=True, capture_output=True, text=True).stdout.strip();
+print(output);"""
+        for _ in range(5):
+            try:
+                self.setup_controller._launch_setup('wmctrl -r :ACTIVE: -b add,maximized_vert,maximized_horz', shell=True)
+                time.sleep(2)
+                output = self.controller.execute_python_command(window_state)['output'].strip()
+                if '_NET_WM_STATE_FOCUSED' not in output or '_NET_WM_STATE_SKIP_TASKBAR' in output or '_NET_WM_STATE_MODAL' in output or '_NET_WM_STATE_MAXIMIZED' in output: # 没有窗口 or popups or 模态窗口 or 窗口已经最大化
+                    return
+            except Exception as e:
+                logger.error(f"Failed to maximize window: {e}")
+                time.sleep(1)
+
+    def _get_obs(self):
+        tool_list = {
+            "libreoffice_calc": "CalcTools",
+            "libreoffice_impress": "ImpressTools",
+            "libreoffice_writer": "WriterTools",
+            "code": "CodeTools",
+            "vlc": "VLCTools",
+            "google_chrome": "BrowserTools"
+        }
+        
+        self.maximize_window()
+        
+        for i in range(3):
+            try:
+                app_list, cur_id = self.get_current_apps()
+            except Exception as e:
+                if i == 2:
+                    raise e
+                logger.error(f"Failed to get current apps: {e}")
+                time.sleep(1)
+        
+        if cur_id in app_list:
+            cur_app = app_list[cur_id]['app_name']
+
+            tool_name = cur_app.strip().lower().replace('-', '_')
+            if tool_name in tool_list:
+                class_name = tool_list[tool_name]
+                command = f"from {tool_name} import *; "
+                command += f"{class_name}.env_info(); "
+                command += f"{class_name}.print_result();"
+                app_info = self.controller.execute_python_command(command)['output'].strip()
+            else:
+                app_info = None
+        else:
+            cur_app = None
+            app_info = None
+        
+        tree = self.controller.get_accessibility_tree()
+        screenshot = self.controller.get_screenshot()
+        if screenshot is None:
+            logger.error("Failed to get screenshot.")
+            screenshot = b''
+
+        return {
+            "screenshot": screenshot,
+            "accessibility_tree": tree,
+            "instruction": self.instruction,
+            "apps": app_list,
+            "cur_window_id": cur_id,
+            "cur_app": cur_app,
+            "app_info": app_info,
+        }
+
+
+def test(args: argparse.Namespace, test_all_meta: dict) -> None:
+    scores = []
+    max_steps = args.max_steps
+
+    # log args
+    logger.info("Args: %s", args)
+    # set wandb project
+    cfg_args = {
+        "path_to_vm": args.path_to_vm,
+        "provider_name": args.provider_name,
+        "headless": args.headless,
+        "action_space": args.action_space,
+        "observation_type": args.observation_type,
+        "screen_width": args.screen_width,
+        "screen_height": args.screen_height,
+        "sleep_after_execution": args.sleep_after_execution,
+        "max_steps": args.max_steps,
+        "max_trajectory_length": args.max_trajectory_length,
+        "model": args.model,
+        "temperature": args.temperature,
+        "top_p": args.top_p,
+        "max_tokens": args.max_tokens,
+        "stop_token": args.stop_token,
+        "result_dir": args.result_dir,
+    }
+
+    @backoff.on_exception(
+        backoff.constant,
+        (RateLimitError, APIConnectionError),
+        interval=0.1,
+    )
+    def call_llm(messages):
+        logger.info("Calling LLM...")
+        
+        # Prepare the request data
+        data = {
+            "model": args.model,
+            "messages": messages,
+            "max_tokens": args.max_tokens,
+            "temperature": args.temperature,
+            "top_p": args.top_p,
+            "skip_special_tokens": False,
+            "stream": False,
+            "include_stop_str_in_output": True,
+            "stop": ["<|user|>", "<|observation|>", "</answer>"]
+        }
+
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY', '')}"
+        }
+        
+        # Get API base URL from environment or use default
+        base_url = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
+        url = f"{base_url}/chat/completions"
+        
+        response = requests.post(
+            url,
+            json=data,
+            headers=headers,
+            timeout=60.0
+        )
+        response.raise_for_status()
+        
+        result = response.json()
+        logger.info("LLM called successfully.")
+        return result['choices'][0]['message']['content']
+
+    env = DesktopEnv(
+        provider_name=args.provider_name,
+        region=args.region,
+        client_password=args.client_password,
+        path_to_vm=args.path_to_vm,
+        action_space=args.action_space,
+        screen_size=(args.screen_width, args.screen_height),
+        headless=args.headless,
+        os_type="Ubuntu",
+        require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
+    )
+    agent = AutoGLMAgent(
+        action_space=args.action_space,
+        observation_type=args.observation_type,
+        screen_size=(args.screen_width, args.screen_height),
+        image_size=(args.image_width, args.image_height),
+        max_trajectory_length=args.max_trajectory_length,
+        client_password=args.client_password,
+        gen_func=call_llm,
+    )
+
+    for domain in tqdm(test_all_meta, desc="Domain"):
+        for example_id in tqdm(test_all_meta[domain], desc="Example", leave=False):
+            config_file = os.path.join(args.test_config_base_dir, f"{domain}/{example_id}.json")
+            with open(config_file, "r", encoding="utf-8") as f:
+                example = json.load(f)
+
+            logger.info(f"[Domain]: {domain}")
+            logger.info(f"[Example ID]: {example_id}")
+
+            instruction = example["instruction"]
+
+            logger.info(f"[Instruction]: {instruction}")
+            # wandb each example config settings
+            cfg_args["instruction"] = instruction
+            cfg_args["start_time"] = datetime.datetime.now().strftime("%Y:%m:%d-%H:%M:%S")
+
+            example_result_dir = os.path.join(
+                args.result_dir,
+                args.action_space,
+                args.observation_type,
+                args.model,
+                domain,
+                example_id,
+            )
+            os.makedirs(example_result_dir, exist_ok=True)
+            # example start running
+            try:
+                lib_run_single.run_single_example_autoglm(
+                    agent,
+                    env,
+                    example,
+                    max_steps,
+                    instruction,
+                    args,
+                    example_result_dir,
+                    scores,
+                )
+            except Exception as e:
+                logger.error(f"Exception in {domain}/{example_id}: {e}")
+                # Only attempt to end recording if controller exists (not Docker provider)
+                if hasattr(env, "controller") and env.controller is not None:
+                    env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+                with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+                    f.write(json.dumps({"Error": f"Time limit exceeded in {domain}/{example_id}"}))
+                    f.write("\n")
+
+    env.close()
+    logger.info(f"Average score: {sum(scores) / len(scores)}")
+
+
+def get_unfinished(action_space, use_model, observation_type, result_dir, total_file_json):
+    target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+
+    if not os.path.exists(target_dir):
+        return total_file_json
+
+    finished = {}
+    for domain in os.listdir(target_dir):
+        finished[domain] = []
+        domain_path = os.path.join(target_dir, domain)
+        if os.path.isdir(domain_path):
+            for example_id in os.listdir(domain_path):
+                if example_id == "onboard":
+                    continue
+                example_path = os.path.join(domain_path, example_id)
+                if os.path.isdir(example_path):
+                    if "result.txt" not in os.listdir(example_path):
+                        # empty all files under example_id
+                        for file in os.listdir(example_path):
+                            os.remove(os.path.join(example_path, file))
+                    else:
+                        finished[domain].append(example_id)
+
+    if not finished:
+        return total_file_json
+
+    for domain, examples in finished.items():
+        if domain in total_file_json:
+            total_file_json[domain] = [x for x in total_file_json[domain] if x not in examples]
+
+    return total_file_json
+
+
+def get_result(action_space, use_model, observation_type, result_dir, total_file_json):
+    target_dir = os.path.join(result_dir, action_space, observation_type, use_model)
+    if not os.path.exists(target_dir):
+        print("New experiment, no result yet.")
+        return None
+
+    all_result = []
+
+    for domain in os.listdir(target_dir):
+        domain_path = os.path.join(target_dir, domain)
+        if os.path.isdir(domain_path):
+            for example_id in os.listdir(domain_path):
+                example_path = os.path.join(domain_path, example_id)
+                if os.path.isdir(example_path):
+                    if "result.txt" in os.listdir(example_path):
+                        result_path = os.path.join(example_path, "result.txt")
+                        try:
+                            with open(result_path, "r") as rf:
+                                res = rf.read().strip()
+                                if res.lower() == "true":
+                                    score = 1.0
+                                else:
+                                    score = float(res)
+                        except Exception:
+                            score = 0.0
+                        all_result.append(score)
+
+    if not all_result:
+        print("New experiment, no result yet.")
+        return None
+    else:
+        print("Current Success Rate:", sum(all_result) / len(all_result) * 100, "%")
+        return all_result
+
+
+if __name__ == "__main__":
+    ####### The complete version of the list of examples #######
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    args = config()
+    if args.client_password == "":
+        if args.provider_name == "aws":
+            args.client_password = "osworld-public-evaluation"
+        else:
+            args.client_password = "password"
+    else:
+        args.client_password = args.client_password
+
+    # save args to json in result_dir/action_space/observation_type/model/args.json
+    path_to_args = os.path.join(
+        args.result_dir,
+        args.action_space,
+        args.observation_type,
+        args.model,
+        "args.json",
+    )
+    os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+    with open(path_to_args, "w", encoding="utf-8") as f:
+        json.dump(vars(args), f, indent=4)
+
+    with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
+        test_all_meta = json.load(f)
+
+    if args.domain != "all":
+        test_all_meta = {args.domain: test_all_meta[args.domain]}
+
+    test_file_list = get_unfinished(
+        args.action_space,
+        args.model,
+        args.observation_type,
+        args.result_dir,
+        test_all_meta,
+    )
+    left_info = ""
+    for domain in test_file_list:
+        left_info += f"{domain}: {len(test_file_list[domain])}\n"
+    logger.info(f"Left tasks:\n{left_info}")
+
+    get_result(
+        args.action_space,
+        args.model,
+        args.observation_type,
+        args.result_dir,
+        test_all_meta,
+    )
+    test(args, test_file_list)
--- a/run_multienv_autoglm_v.py
+++ b/run_multienv_autoglm_v.py
@ -0,0 +1,294 @@
+"""Script to run end-to-end evaluation on the benchmark.
+Utils and basic architecture credit to https://github.com/web-arena-x/webarena/blob/main/run.py.
+"""
+
+import argparse
+import datetime
+import json
+import logging
+import os
+import sys
+import math
+import ast
+import time
+import backoff
+import httpx
+import requests
+from tqdm import tqdm
+from typing import Optional, Dict, Any
+from multiprocessing import Pool
+from openai import APIConnectionError, APIError, RateLimitError
+from types import SimpleNamespace
+
+import lib_run_single
+from run_autoglm_v import DesktopEnv, get_unfinished, get_result
+from desktop_env.desktop_env import MAX_RETRIES, DesktopEnv as DesktopEnvBase
+from mm_agents.autoglm_v import AutoGLMAgent
+from openai import OpenAI
+
+logger = logging.getLogger("desktopenv.experiment")
+
+def config() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Run end-to-end evaluation on the benchmark")
+
+    # environment config
+    parser.add_argument("--path_to_vm", type=str)
+    parser.add_argument(
+        "--provider_name",
+        type=str,
+        default="docker",
+        help="Virtualization provider (vmware, docker, aws, azure, gcp, virtualbox)",
+    )
+    parser.add_argument("--headless", action="store_true", default=True, help="Run in headless machine")
+    parser.add_argument("--action_space", type=str, default="autoglm_computer_use", help="Action type")
+    parser.add_argument(
+        "--observation_type",
+        choices=["screenshot", "a11y_tree", "screenshot_a11y_tree", "som"],
+        default="a11y_tree",
+        help="Observation type",
+    )
+    parser.add_argument("--screen_width", type=int, default=1920)
+    parser.add_argument("--screen_height", type=int, default=1080)
+    parser.add_argument("--sleep_after_execution", type=float, default=1.0)
+    parser.add_argument("--max_steps", type=int, default=30)
+
+    # agent config
+    parser.add_argument("--max_trajectory_length", type=int, default=3)
+    parser.add_argument("--test_config_base_dir", type=str, default="evaluation_examples/examples")
+
+    # lm config
+    parser.add_argument("--model", type=str, default="autoglm-os")
+    parser.add_argument("--temperature", type=float, default=0.4)
+    parser.add_argument("--top_p", type=float, default=0.5)
+    parser.add_argument("--max_tokens", type=int, default=2048)
+    parser.add_argument("--stop_token", type=str, default=None)
+    parser.add_argument("--image_width", type=int, default=1280)
+    parser.add_argument("--image_height", type=int, default=720)
+
+    # example config
+    parser.add_argument("--domain", type=str, default="all")
+    parser.add_argument("--test_all_meta_path", type=str, default="evaluation_examples/test_nogdrive.json")
+
+    # aws config
+    parser.add_argument(
+        "--region", type=str, default="us-east-1", help="AWS region for the VM"
+    )
+    parser.add_argument("--client_password", type=str, default="", help="Client password")
+
+    # logging related
+    parser.add_argument("--result_dir", type=str, default="./results")
+    
+    # parallel number
+    parser.add_argument("--num_workers", type=int, default=20, help="Number of parallel workers")
+    args = parser.parse_args()
+
+    return args
+
+def _worker_run(task):
+    domain, example_id, args = task  # args 为 argparse.Namespace
+    logger = logging.getLogger("desktopenv.experiment")
+    try:
+        config_file = os.path.join(args.test_config_base_dir, f"{domain}/{example_id}.json")
+        with open(config_file, "r", encoding="utf-8") as f:
+            example = json.load(f)
+        instruction = example["instruction"]
+
+        @backoff.on_exception(backoff.constant, (RateLimitError, APIConnectionError), interval=0.1)
+        def call_llm(messages):
+            logger.info("Calling LLM...")
+            
+            # Prepare the request data
+            data = {
+                "model": args.model,
+                "messages": messages,
+                "max_tokens": args.max_tokens,
+                "temperature": args.temperature,
+                "top_p": args.top_p,
+                "skip_special_tokens": False,
+                "stream": False,
+                "include_stop_str_in_output": True,
+                "stop": ["<|user|>", "<|observation|>", "</answer>"]
+            }
+            
+            # Set up proxy
+            # if os.environ.get('LAN_PROXY', None):
+            #     proxies = {
+            #         "http": os.environ.get('LAN_PROXY'),
+            #         "https": os.environ.get('LAN_PROXY')
+            #     }
+            # else:
+            #     proxies = None
+
+            headers = {
+                "Content-Type": "application/json",
+                "Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY', '')}"
+            }
+            
+            # Get API base URL from environment or use default
+            base_url = os.environ.get('OPENAI_BASE_URL', 'https://api.openai.com/v1')
+            url = f"{base_url}/chat/completions"
+            
+            response = requests.post(
+                url,
+                json=data,
+                headers=headers,
+                # proxies=proxies,
+                timeout=60.0
+            )
+            response.raise_for_status()
+            
+            result = response.json()
+            logger.info("LLM called successfully.")
+            return result['choices'][0]['message']['content']
+
+        env = DesktopEnv(
+            provider_name=args.provider_name,
+            region=args.region,
+            client_password=args.client_password,
+            path_to_vm=args.path_to_vm,
+            action_space=args.action_space,
+            screen_size=(args.screen_width, args.screen_height),
+            headless=args.headless,
+            os_type="Ubuntu",
+            require_a11y_tree=args.observation_type in ["a11y_tree", "screenshot_a11y_tree", "som"],
+        )
+        agent = AutoGLMAgent(
+            action_space=args.action_space,
+            observation_type=args.observation_type,
+            screen_size=(args.screen_width, args.screen_height),
+            image_size=(args.image_width, args.image_height),
+            max_trajectory_length=args.max_trajectory_length,
+            client_password=args.client_password,
+            gen_func=call_llm,
+        )
+
+        example_result_dir = os.path.join(
+            args.result_dir,
+            args.action_space,
+            args.observation_type,
+            args.model,
+            domain,
+            example_id,
+        )
+        os.makedirs(example_result_dir, exist_ok=True)
+
+        local_scores = []
+        try:
+            lib_run_single.run_single_example_autoglm(
+                agent,
+                env,
+                example,
+                args.max_steps,
+                instruction,
+                args,
+                example_result_dir,
+                local_scores,
+            )
+        except Exception as e:
+            logger.error(f"[并发任务异常] {domain}/{example_id}: {e}")
+            if hasattr(env, "controller") and env.controller is not None:
+                try:
+                    env.controller.end_recording(os.path.join(example_result_dir, "recording.mp4"))
+                except Exception:
+                    pass
+            with open(os.path.join(example_result_dir, "traj.jsonl"), "a") as f:
+                f.write(json.dumps({"Error": f"Exception in {domain}/{example_id}: {str(e)}"}) + "\n")
+        finally:
+            try:
+                env.close()
+            except Exception:
+                pass
+
+        score = None
+        result_path = os.path.join(example_result_dir, "result.txt")
+        if os.path.exists(result_path):
+            try:
+                with open(result_path, "r") as rf:
+                    res = rf.read().strip()
+                    if res.lower() == "true":
+                        score = 1.0
+                    else:
+                        score = float(res)
+            except Exception:
+                score = 0.0
+        else:
+            score = 0.0
+        logger.info(f"[Finish] {domain}/{example_id} score={score}")
+        return (domain, example_id, score)
+    except Exception as e:
+        logger = logging.getLogger("desktopenv.experiment")
+        logger.error(f"[Initializing Fail] {domain}/{example_id}: {e}")
+        return (domain, example_id, 0.0)
+
+def test_parallel(args: argparse.Namespace, test_all_meta: dict):
+    tasks = []
+    for domain in test_all_meta:
+        for example_id in test_all_meta[domain]:
+            tasks.append((domain, example_id, args))
+    if not tasks:
+        logger.info("No pending tasks")
+        return
+    logger.info(f"Starting parallel execution: {args.num_workers} processes, {len(tasks)} tasks total")
+
+    results = []
+    with Pool(processes=args.num_workers) as pool:
+        for res in tqdm(pool.imap_unordered(_worker_run, tasks), total=len(tasks), desc="Parallel execution"):
+            results.append(res)
+
+    scores = [s for (_, _, s) in results if s is not None]
+    if scores:
+        avg = sum(scores) / len(scores)
+        logger.info(f"Parallel execution completed. Average score: {avg}")
+    else:
+        logger.info("No scores obtained.")
+
+if __name__ == "__main__":
+    ####### The complete version of the list of examples #######
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"
+    args = config()
+    if args.client_password == "":
+        if args.provider_name == "aws":
+            args.client_password = "osworld-public-evaluation"
+        else:
+            args.client_password = "password"
+    else:
+        args.client_password = args.client_password
+
+    # save args to json in result_dir/action_space/observation_type/model/args.json
+    path_to_args = os.path.join(
+        args.result_dir,
+        args.action_space,
+        args.observation_type,
+        args.model,
+        "args.json",
+    )
+    os.makedirs(os.path.dirname(path_to_args), exist_ok=True)
+    with open(path_to_args, "w", encoding="utf-8") as f:
+        json.dump(vars(args), f, indent=4)
+
+    with open(args.test_all_meta_path, "r", encoding="utf-8") as f:
+        test_all_meta = json.load(f)
+
+    if args.domain != "all":
+        test_all_meta = {args.domain: test_all_meta[args.domain]}
+
+    test_file_list = get_unfinished(
+        args.action_space,
+        args.model,
+        args.observation_type,
+        args.result_dir,
+        test_all_meta,
+    )
+    left_info = ""
+    for domain in test_file_list:
+        left_info += f"{domain}: {len(test_file_list[domain])}\n"
+    logger.info(f"Left tasks:\n{left_info}")
+
+    get_result(
+        args.action_space,
+        args.model,
+        args.observation_type,
+        args.result_dir,
+        test_all_meta,
+    )
+    test_parallel(args, test_file_list)