Created
May 5, 2026 02:05
-
-
Save limcheekin/cf0f3b66814c9e769f0768801e0ce39e to your computer and use it in GitHub Desktop.
Repository GenericAgent Wiki — generated by GitNexus
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>GenericAgent — Wiki</title> | |
| <script src="https://cdn.jsdelivr.net/npm/marked@11.0.0/marked.min.js"></script> | |
| <script src="https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js"></script> | |
| <style> | |
| *{margin:0;padding:0;box-sizing:border-box} | |
| :root{ | |
| --bg:#ffffff;--sidebar-bg:#f8f9fb;--border:#e5e7eb; | |
| --text:#1e293b;--text-muted:#64748b;--primary:#2563eb; | |
| --primary-soft:#eff6ff;--hover:#f1f5f9;--code-bg:#f1f5f9; | |
| --radius:8px;--shadow:0 1px 3px rgba(0,0,0,.08); | |
| } | |
| body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif; | |
| line-height:1.65;color:var(--text);background:var(--bg)} | |
| .layout{display:flex;min-height:100vh} | |
| .sidebar{width:280px;background:var(--sidebar-bg);border-right:1px solid var(--border); | |
| position:fixed;top:0;left:0;bottom:0;overflow-y:auto;padding:24px 16px; | |
| display:flex;flex-direction:column;z-index:10} | |
| .content{margin-left:280px;flex:1;padding:48px 64px;max-width:960px} | |
| .sidebar-header{margin-bottom:20px;padding-bottom:16px;border-bottom:1px solid var(--border)} | |
| .sidebar-title{font-size:16px;font-weight:700;color:var(--text);display:flex;align-items:center;gap:8px} | |
| .sidebar-title svg{flex-shrink:0} | |
| .sidebar-meta{font-size:11px;color:var(--text-muted);margin-top:6px} | |
| .nav-section{margin-bottom:2px} | |
| .nav-item{display:block;padding:7px 12px;border-radius:var(--radius);cursor:pointer; | |
| font-size:13px;color:var(--text);text-decoration:none;transition:all .15s; | |
| white-space:nowrap;overflow:hidden;text-overflow:ellipsis} | |
| .nav-item:hover{background:var(--hover)} | |
| .nav-item.active{background:var(--primary-soft);color:var(--primary);font-weight:600} | |
| .nav-item.overview{font-weight:600;margin-bottom:4px} | |
| .nav-children{padding-left:14px;border-left:1px solid var(--border);margin-left:12px} | |
| .nav-group-label{font-size:11px;font-weight:600;color:var(--text-muted); | |
| text-transform:uppercase;letter-spacing:.5px;padding:12px 12px 4px;user-select:none} | |
| .sidebar-footer{margin-top:auto;padding-top:16px;border-top:1px solid var(--border); | |
| font-size:11px;color:var(--text-muted);text-align:center} | |
| .content h1{font-size:28px;font-weight:700;margin-bottom:8px;line-height:1.3} | |
| .content h2{font-size:22px;font-weight:600;margin:32px 0 12px;padding-bottom:6px;border-bottom:1px solid var(--border)} | |
| .content h3{font-size:17px;font-weight:600;margin:24px 0 8px} | |
| .content h4{font-size:15px;font-weight:600;margin:20px 0 6px} | |
| .content p{margin:12px 0} | |
| .content ul,.content ol{margin:12px 0 12px 24px} | |
| .content li{margin:4px 0} | |
| .content a{color:var(--primary);text-decoration:none} | |
| .content a:hover{text-decoration:underline} | |
| .content blockquote{border-left:3px solid var(--primary);padding:8px 16px;margin:16px 0; | |
| background:var(--primary-soft);border-radius:0 var(--radius) var(--radius) 0; | |
| color:var(--text-muted);font-size:14px} | |
| .content code{font-family:'SF Mono',Consolas,'Courier New',monospace;font-size:13px; | |
| background:var(--code-bg);padding:2px 6px;border-radius:4px} | |
| .content pre{background:#1e293b;color:#e2e8f0;border-radius:var(--radius);padding:16px; | |
| overflow-x:auto;margin:16px 0} | |
| .content pre code{background:none;padding:0;font-size:13px;line-height:1.6;color:inherit} | |
| .content table{border-collapse:collapse;width:100%;margin:16px 0} | |
| .content th,.content td{border:1px solid var(--border);padding:8px 12px;text-align:left;font-size:14px} | |
| .content th{background:var(--sidebar-bg);font-weight:600} | |
| .content img{max-width:100%;border-radius:var(--radius)} | |
| .content hr{border:none;border-top:1px solid var(--border);margin:32px 0} | |
| .content .mermaid{margin:20px 0;text-align:center} | |
| .menu-toggle{display:none;position:fixed;top:12px;left:12px;z-index:20; | |
| background:var(--bg);border:1px solid var(--border);border-radius:var(--radius); | |
| padding:8px 12px;cursor:pointer;font-size:18px;box-shadow:var(--shadow)} | |
| @media(max-width:768px){ | |
| .sidebar{transform:translateX(-100%);transition:transform .2s} | |
| .sidebar.open{transform:translateX(0);box-shadow:2px 0 12px rgba(0,0,0,.1)} | |
| .content{margin-left:0;padding:24px 20px;padding-top:56px} | |
| .menu-toggle{display:block} | |
| } | |
| .empty-state{text-align:center;padding:80px 20px;color:var(--text-muted)} | |
| .empty-state h2{font-size:20px;margin-bottom:8px;border:none} | |
| </style> | |
| </head> | |
| <body> | |
| <button class="menu-toggle" id="menu-toggle" aria-label="Toggle menu">☰</button> | |
| <div class="layout"> | |
| <nav class="sidebar" id="sidebar"> | |
| <div class="sidebar-header"> | |
| <div class="sidebar-title"> | |
| <svg width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M2 3h6a4 4 0 014 4v14a3 3 0 00-3-3H2z"/><path d="M22 3h-6a4 4 0 00-4 4v14a3 3 0 013-3h7z"/></svg> | |
| GenericAgent | |
| </div> | |
| <div class="sidebar-meta" id="meta-info"></div> | |
| </div> | |
| <div id="nav-tree"></div> | |
| <div class="sidebar-footer">Generated by GitNexus</div> | |
| </nav> | |
| <main class="content" id="content"> | |
| <div class="empty-state"><h2>Loading…</h2></div> | |
| </main> | |
| </div> | |
| <script> | |
| var PAGES = {"frontends-frontends":"# frontends — frontends\n\n# Frontends Module\n\nThe **frontends** module provides a multi-channel interface layer for the `GeneraticAgent`. It abstracts platform-specific communication (Discord, Feishu, DingTalk, QQ) and provides a unified mechanism for message processing, command handling, and session management. Additionally, it includes a standalone \"Desktop Pet\" system for visual notifications.\n\n## Architecture Overview\n\nAll chat-based frontends follow a common pattern: they receive platform-specific events, normalize the input, and delegate the core logic to the `GeneraticAgent` via a shared mixin.\n\n```mermaid\ngraph TD\n User((User)) --> Platform[Discord/Feishu/DingTalk/QQ]\n Platform --> Frontend[Frontend App Class]\n Frontend --> Mixin[AgentChatMixin]\n Mixin --> Agent[GeneraticAgent]\n Agent -- Task Queue --> Mixin\n Mixin -- Send Text/Media --> Platform\n Platform --> User\n```\n\n## Shared Infrastructure (`chatapp_common.py`)\n\nThis file contains the core utilities and base classes used by all chat integrations.\n\n### `AgentChatMixin`\nThe primary base class for chat frontends. It implements the standard lifecycle of an agent interaction:\n- **`run_agent(chat_id, text, **ctx)`**: The main entry point. It puts a task into the agent's queue, monitors the response queue, and handles \"thinking\" pings to keep the connection alive.\n- **`handle_command(chat_id, cmd, **ctx)`**: Processes slash commands like `/help`, `/stop`, `/status`, `/llm`, `/restore`, and `/new`.\n- **`send_done(chat_id, raw_text, **ctx)`**: A high-level wrapper that cleans the agent's final response, extracts generated file paths, and sends them to the user.\n\n### Utility Functions\n- **`clean_reply(text)`**: Removes internal tags like `<thinking>`, `<summary>`, and `<tool_use>` from the agent's output before displaying it to the user.\n- **`split_text(text, limit)`**: Intelligently splits long messages at newline boundaries to respect platform character limits.\n- **`extract_files(text)`**: Parses the `[FILE:filepath]` pattern to identify files generated by the agent that need to be uploaded.\n- **`ensure_single_instance(port, label)`**: Uses a socket lock to prevent multiple instances of the same frontend from running simultaneously.\n\n## Session Management (`continue_cmd.py`)\n\nThe `/continue` command system allows users to browse and restore previous conversation states from local logs.\n\n- **`list_sessions()`**: Scans the `temp/model_responses/` directory for log files and extracts metadata (last question, last summary, round count).\n- **`restore(agent, path)`**: Reconstructs the agent's history from a log file. It attempts to perform a \"native\" restore (full JSON state) but can fall back to a \"summary\" restore if the log format is legacy.\n- **`reset_conversation(agent)`**: Aborts current tasks, snapshots the current log, and clears the agent's history and backend state.\n\n## Platform Implementations\n\n### Feishu / Lark (`fsapp.py`)\nUses `lark-oapi` with a WebSocket (Long Polling) connection.\n- **`_TaskCard`**: Implements a dynamic UI using Feishu's `interactive` cards. It uses `collapsible_panel` elements to show a step-by-step history of the agent's \"turns\" (thinking and tool use) without cluttering the chat.\n- **Media Handling**: Automatically downloads images, audio, and files sent by users to `temp/feishu_media` before passing the local paths to the agent.\n\n### Discord (`dcapp.py`)\nUses `discord.py`.\n- **Intent Requirements**: Requires `message_content` and `guilds` intents.\n- **Interaction**: Responds to DMs directly and to Guild messages only when the bot is @mentioned.\n- **File Uploads**: Converts `[FILE:...]` tags into actual Discord file attachments using `discord.File`.\n\n### DingTalk (`dingtalkapp.py`)\nUses `dingtalk-stream`.\n- **Authentication**: Manages OAuth2 access tokens with automatic refresh logic.\n- **Message Format**: Sends replies as `sampleMarkdown` to support rich text formatting.\n\n### QQ (`qqapp.py`)\nUses `qq-botpy`.\n- **Message Sequencing**: Implements `_next_msg_seq()` to handle QQ's requirement for unique message sequence numbers.\n- **C2C & Group**: Supports both private (C2C) and Group @mention interactions.\n\n## Desktop Pet System\n\nThe Desktop Pet (`desktop_pet_v2.pyw`) is a cross-platform GUI (Tkinter/Cocoa/PySide6) that acts as a visual notification agent.\n\n### HTTP Control Interface\nThe pet runs a local HTTP server on port **41983**. It can be controlled via simple GET/POST requests:\n- `GET /?msg=Hello`: Displays a speech bubble (toast) with the message.\n- `GET /?state=run`: Changes the pet's animation state (idle, walk, run, sprint).\n\n### Skin System\nSkins are defined in `skins/` directories containing a `skin.json`.\n- **Sprite Sheet Support**: The `AnimationLoader` crops frames from a single image based on `frameWidth`, `frameHeight`, and `columns` defined in the config.\n- **Transparency**: \n - **macOS**: Uses pure Cocoa (`NSWindow`) for true alpha-channel transparency.\n - **Windows**: Uses Tkinter's `-transparentcolor` attribute.\n - **Linux**: Uses PySide6 with `Qt.WA_TranslucentBackground`.\n\n### Integration with Agent\nThe `GeneraticAgent` can trigger the pet's `show_toast` method at the end of a task turn to notify the user of completion or status updates.","frontends-skins":"# frontends — skins\n\n# Frontends — Skins\n\nThe `frontends/skins` module defines the visual assets and animation metadata for characters within the application. It uses a standardized JSON-based configuration system to map sprite sheets to specific character states, allowing the rendering engine to handle different character models interchangeably.\n\n## Module Structure\n\nEach skin is contained within its own directory under `frontends/skins/`. A skin package typically consists of:\n\n- `skin.json`: The primary configuration file defining metadata and animation logic.\n- Image assets: One or more `.png` files (e.g., `skin.png`, `idle.png`) containing the sprite frames.\n- `License.txt`: (Optional) Licensing information for the assets.\n\n## Skin Configuration (`skin.json`)\n\nThe `skin.json` file defines how the frontend should interpret and render the character.\n\n### Root Properties\n\n| Property | Type | Description |\n| :--- | :--- | :--- |\n| `name` | string | Display name of the skin. |\n| `style` | string | Visual style (e.g., `\"pixel\"`). |\n| `format` | string | Asset format (e.g., `\"sprite\"`). |\n| `size` | object | The logical display dimensions (`width`, `height`) of the character in the game world. |\n| `animations` | object | A map of animation states to their sprite configurations. |\n\n### Animation Object\n\nEach key in the `animations` object represents a character state (e.g., `idle`, `walk`, `run`, `sprint`).\n\n```json\n\"idle\": {\n \"file\": \"skin.png\",\n \"loop\": true,\n \"sprite\": {\n \"frameWidth\": 64,\n \"frameHeight\": 98,\n \"frameCount\": 10,\n \"columns\": 40,\n \"fps\": 6,\n \"startFrame\": 0\n }\n}\n```\n\n#### Sprite Properties\n\n| Property | Description |\n| :--- | :--- |\n| `file` | The path to the image file relative to the skin directory. |\n| `loop` | Boolean indicating if the animation should restart after the last frame. |\n| `frameWidth` | The width of a single frame in the source image. |\n| `frameHeight` | The height of a single frame in the source image. |\n| `frameCount` | Total number of frames in this specific animation sequence. |\n| `columns` | The number of frames per row in the source sprite sheet. |\n| `fps` | Frames per second; controls the playback speed. |\n| `startFrame` | The 0-based index of the first frame for this animation in the sprite sheet. |\n\n## Animation Logic\n\nThe rendering engine calculates the source rectangle for a frame using the following logic:\n\n1. **Frame Index**: The current frame relative to the animation start (`current_index = startFrame + current_tick`).\n2. **Coordinates**:\n - `row = floor(current_index / columns)`\n - `col = current_index % columns`\n - `x = col * frameWidth`\n - `y = row * frameHeight`\n\n```mermaid\ngraph TD\n JSON[skin.json] -->|Defines| States[idle, walk, run, sprint]\n States -->|References| SpriteSheet[skin.png]\n SpriteSheet -->|Sliced by| FrameData[frameWidth / frameHeight]\n FrameData -->|Timed by| FPS[fps]\n FPS -->|Rendered| Screen[Frontend Canvas]\n```\n\n## Standardized States\n\nTo ensure compatibility across all skins, the following animation keys are implemented:\n\n- **`idle`**: Played when the character is stationary.\n- **`walk`**: Played during low-velocity movement.\n- **`run`**: Played during standard movement.\n- **`sprint`**: Played during high-velocity movement (often a faster version of the `run` animation).\n\n## Adding a New Skin\n\n1. Create a new directory in `frontends/skins/`.\n2. Add your sprite sheet(s) as `.png` files.\n3. Create a `skin.json` following the schema of existing skins (e.g., `doux` or `boy`).\n4. Ensure the `size` property matches the intended hit-box/display size, and `sprite` dimensions match the actual pixel measurements of your source image.","frontends":"# frontends\n\n# Frontends\n\nThe **frontends** module serves as the multi-channel communication layer for the `GeneraticAgent`. It acts as a bridge between external messaging platforms and the internal agent logic, providing a unified interface for message normalization, command execution, and visual representation.\n\n## Architecture and Integration\n\nThe module is structured to separate platform-specific API handling from core agent interaction logic. This is achieved through a mixin-based architecture where individual platform apps inherit shared behavior.\n\n* **Platform Apps**: Specific implementations for [Discord](frontends.md), [Feishu (fsapp)](frontends.md), [QQ](frontends.md), and [WeCom](frontends.md) handle the nuances of each platform's API, such as message sequencing in `qqapp.py` or card-based rendering in `fsapp.py`.\n* **Common Logic**: The `chatapp_common.py` and `AgentChatMixin` components provide standardized workflows for session management and agent execution, ensuring consistent behavior across all channels.\n* **Visual Assets**: The [Skins](frontends-skins.md) sub-module provides the graphical definitions used by visual frontends (like the Desktop Pet) to represent the agent's state through animations and sprite sheets.\n\n## Key Workflows\n\n### Message Processing & Agent Execution\nWhen a message or media file is received from a platform (e.g., `on_message` in Discord or `on_file` in WeCom), the frontend performs the following sequence:\n1. **Normalization**: Platform-specific events are converted into a standard format.\n2. **Agent Invocation**: The `run_agent` flow triggers the `GeneraticAgent`.\n3. **Hook Management**: Hooks are registered/unregistered (via `_register_hook`) to capture agent turns and tool usage.\n4. **Response Dispatch**: Results are formatted (e.g., `_fmt_tool`, `build_done_text`) and sent back to the user.\n\n### Media and File Handling\nThe module includes specialized logic for handling non-textual data across different environments:\n* **Ingestion**: Feishu utilizes `_download_and_save_media` to sync assets locally, while WeCom uses `_on_media` to process incoming files.\n* **Extraction**: The `extract_files` utility in `chatapp_common.py` allows the agent to output files that the frontends then upload to the respective platforms.\n\n### Command & Session Control\nFrontend-specific commands (e.g., resetting a conversation) are routed through `continue_cmd.py`. This allows users to manage the agent's state directly from the chat interface without modifying the underlying agent configuration.\n\n## Module Relationship\n\n```mermaid\ngraph LR\n subgraph Platforms\n DC[Discord]\n FS[Feishu]\n WC[WeCom]\n end\n\n subgraph frontends\n Common[chatapp_common.py]\n Mixin[AgentChatMixin]\n Apps[Platform Apps]\n Skins[skins sub-module]\n end\n\n Platforms --> Apps\n Apps --> Mixin\n Mixin --> Common\n Common --> Agent[GeneraticAgent]\n Skins -.-> Apps\n```\n\n## Sub-modules\n\n| Module | Description |\n| :--- | :--- |\n| [frontends](frontends.md) | Core platform implementations (Discord, Feishu, QQ, WeCom) and shared mixins. |\n| [skins](frontends-skins.md) | JSON-based configuration and sprite assets for character animations. |","graphify-out":"# graphify-out\n\n# graphify-out\n\nThe `graphify-out` module serves as the automated documentation and structural analysis layer for the **GenericAgent** project. It provides a high-level mapping of the system's architecture, identifying core abstractions, functional communities, and cross-module dependencies through a combination of AST (Abstract Syntax Tree) parsing and semantic inference.\n\n## System Overview\n\nThe module analyzes a corpus of approximately 100 files to generate a relational graph. The current state of the project consists of **921 nodes** and **1659 edges**, organized into **57 distinct communities**.\n\n### Core Abstractions (God Nodes)\nThe following components represent the project's \"God Nodes\"—the most highly connected entities that facilitate communication across different layers:\n\n1. **`GeneraticAgent`**: The central orchestrator. It bridges frontends (Streamlit, Qt, Telegram) with the core agent logic and tool execution.\n2. **`ChatPanel`**: The primary UI controller for desktop-based interactions.\n3. **`TMWebDriver`**: The bridge for web-based automation and CDP (Chrome DevTools Protocol) interactions.\n4. **`StepOutcome`**: The standard data structure used to communicate the results of individual agent actions back to the runner loop.\n5. **`AgentChatMixin`**: A shared utility class providing common messaging patterns across various chat-based frontends.\n\n## Functional Communities\n\nThe codebase is partitioned into several specialized communities that define the system's capabilities:\n\n### 1. Core Agent Architecture\nThis community (Community 2) contains the engine of the system.\n* **Key Components**: `agent_runner_loop()`, `BaseHandler`, `StepOutcome`.\n* **Responsibilities**: Managing the execution flow, handling tool outputs, and maintaining the agent's internal state during a task.\n\n### 2. Multi-Frontend Layer\nGenericAgent supports a wide array of interfaces, each isolated into its own community:\n* **Qt Desktop**: `ChatPanel`, `FloatingButton`, and `_Badge` components for local GUI interaction.\n* **Streamlit Web**: `ClaudeSession` and `fold_turns()` for web-based agent monitoring.\n* **Messaging Bots**: Dedicated handlers for **Telegram** (`tgapp.py`), **WeChat** (`wechatapp.py`), **Feishu** (`fsapp.py`), and **DingTalk**.\n\n### 3. Layered Memory System (L0-L4)\nThe system implements a hierarchical memory structure defined in `memory_management_sop.md`. This allows the agent to persist insights across sessions and manage long-term context without exceeding token limits.\n\n### 4. Atomic Toolset\nThe agent interacts with the environment through a standardized set of tools:\n* **File Operations**: `ga_file_read`, `ga_file_write`, `ga_file_patch`.\n* **Execution**: `ga_code_run` (Python/Bash/PowerShell).\n* **Web/UI**: `ga_web_scan`, `ga_web_execute_js`, and `adb_ui.py` for mobile automation.\n\n## Relationship Mapping\n\nThe module distinguishes between **Extracted** edges (direct code references) and **Inferred** edges (semantic connections identified via LLM analysis).\n\n```mermaid\ngraph TD\n subgraph Frontends\n Qt[Qt Desktop]\n ST[Streamlit Web]\n TG[Telegram/Messaging]\n end\n\n subgraph Core\n GA[GeneraticAgent]\n Loop[agent_runner_loop]\n Mem[Layered Memory L0-L4]\n end\n\n subgraph Tools\n Code[ga_code_run]\n Web[TMWebDriver]\n File[File System Tools]\n end\n\n Qt --> GA\n ST --> GA\n TG --> GA\n GA <--> Loop\n Loop --> Mem\n Loop --> Tools\n Tools --> Web\n```\n\n## Key Inferred Connections\nThe analysis identifies critical \"surprising\" connections where logic in one layer implicitly depends on the core agent:\n* **Media Decryption**: `wechatapp.py` uses `GeneraticAgent` to handle local file paths after decrypting WeChat media.\n* **Rich-Text Filtering**: Markdown rendering for WeChat is processed through the agent's core formatting logic.\n* **Task Patching**: Feishu (Lark) task cards are dynamically updated via hooks in the `agent_runner_loop`.\n\n## Manifest and Integrity\nThe `manifest.json` file tracks the state of the source corpus. It maps every file (e.g., `agentmain.py`, `llmcore.py`, `TMWebDriver.py`) to its last modified time and MD5 hash, ensuring that the generated graph report stays synchronized with the actual implementation.","memory":"# memory\n\n# Memory Module\n\nThe `memory` module is a multi-layered system designed for environmental perception, persistent state management, and autonomous task coordination. It serves as the agent's long-term storage and sensory toolkit, bridging the gap between raw system interactions and high-level reasoning.\n\n## Memory Hierarchy (L1–L4)\n\nThe system follows a tiered architecture to balance retrieval speed with information density, as defined in `memory_management_sop.md`:\n\n* **L1 (Global Insight):** `global_mem_insight.txt`. A strictly limited (<30 lines) index of high-frequency scene triggers and \"Red Line\" rules.\n* **L2 (Global Facts):** `global_mem.txt`. Stores environment-specific constants like paths, non-sensitive IDs, and verified configurations.\n* **L3 (Task Records):** Located in the `memory/` root. Contains specialized SOPs (`.md`) and utility scripts (`.py`) for complex, repeatable tasks.\n* **L4 (Raw Sessions):** `memory/L4_raw_sessions/`. Archive of model interaction logs, compressed and indexed for historical context.\n\n## Core Components\n\n### 1. Perception & Vision\nThe module provides tools to \"see\" and interpret the environment across different platforms.\n\n* **OCR Utilities (`ocr_utils.py`):** Uses `rapidocr-onnxruntime` for local text recognition.\n * `ocr_screen(bbox)`: Captures and parses a screen region.\n * `ocr_window(hwnd)`: Uses `PrintWindow` API to capture window content even when a Remote Desktop (RDP) session is disconnected.\n* **Vision API (`vision_api.template.py`):** A unified wrapper for multimodal models (Claude, OpenAI, ModelScope).\n * `ask_vision(image_input, prompt, backend)`: Handles image resizing (`_prepare_image`) and base64 encoding before querying the selected LLM backend.\n* **UI Detection (`ui_detect.py`):** Employs YOLO-based models to identify UI elements (icons, buttons) and combines them with OCR for structured UI understanding.\n\n### 2. Interaction & Control\nTools for executing physical actions on Windows and Android systems.\n\n* **Windows Control (`ljqCtrl.py`):** A DPI-aware wrapper for `win32api`.\n * **Critical Pattern:** All coordinates must be converted to physical pixels using `ljqCtrl.dpi_scale` before calling `Click(x, y)` or `SetCursorPos(z)`.\n * `FindBlock(fn, wrect)`: Performs template matching to locate images on screen.\n* **Android UI (`adb_ui.py`):** Manages mobile interaction.\n * `ui()`: Attempts to dump the UI hierarchy using `uiautomator2` (preferred for speed/animation) with a fallback to native `adb shell uiautomator dump`.\n\n### 3. Autonomous Operation SOP\nThe `autonomous_operation_sop/` sub-module manages the lifecycle of self-directed agent actions.\n\n* **Task Management (`helper.py`):**\n * `get_todo()`: Reads the current `TODO.txt`.\n * `complete_task(taskname, historyline, report_path)`: Atomically moves a generated report to `autonomous_reports/`, assigns a unique ID (e.g., `R102`), and updates the central `history.txt`.\n* **Planning (`task_planning.md`):** Defines a \"Planning Mode\" where the agent must use a sub-agent to validate TODOs against a value formula: `「AI Training Data Gap」×「Future Utility」`.\n\n### 4. Skill Search\nA client-side implementation for searching a massive (105K+) library of skill cards.\n\n* **Engine (`skill_search/skill_search/engine.py`):**\n * `detect_environment()`: Automatically gathers OS, shell, and installed runtimes (Python, Node, etc.) to contextualize searches.\n * `search(query, env)`: Queries a remote API to find the most relevant SOPs or scripts based on the current system state.\n\n### 5. Security & Session Management\n* **Keychain (`keychain.py`):** Provides XOR-encrypted storage for sensitive keys. Secrets are wrapped in a `SecretStr` class to prevent accidental logging of raw values.\n* **Session Compression (`compress_session.py`):**\n * `batch_process(src)`: Automatically triggered by the system scheduler to clean up raw logs.\n * `_compress_raw(text)`: Strips redundant system prompts and assistant echoes to reduce storage footprint while preserving the `<history>` blocks.\n\n## Execution Flow: Autonomous Task Completion\n\nThe following diagram illustrates the interaction between the autonomous helper and the file system during task finalization:\n\n```mermaid\ngraph TD\n A[Agent Finishes Task] --> B[complete_task]\n B --> C{Validate History Line}\n C -->|Single Line| D[Generate R-Number]\n D --> E[Move Report to /autonomous_reports/]\n E --> F[Prepend to history.txt]\n F --> G[Return Mark-as-Done Instruction]\n G --> H[Agent Updates TODO.txt]\n```\n\n## Integration Points\n\n* **Reflect/Scheduler:** Periodically calls `compress_session.batch_process` to maintain the L4 archive and checks `scheduled_task_sop.md` for recurring jobs.\n* **Plan Mode:** When a task requires more than 3 steps, the agent triggers `plan_sop.md`, creating a dedicated `plan_XXX/` directory and utilizing `subagent.md` for environment exploration.\n* **Web Tools:** `tmwebdriver_sop.md` provides the interface for browser-based memory retrieval, utilizing a CDP bridge for cookie and tab management.","overview":"# GenericAgent — Wiki\n\n# GenericAgent: Self-Evolving Autonomous Framework\n\nWelcome to the **GenericAgent** repository. This project is a minimalist, self-evolving autonomous agent framework designed to bootstrap from a small set of \"seed\" tools into a fully-featured assistant. Unlike static agents, GenericAgent is built to manage its own dependencies and \"crystallize\" successful task executions into reusable skills, effectively growing its capabilities over time.\n\n## System Architecture\n\nGenericAgent is organized into a layered architecture that separates communication, reasoning, and persistent state. The system transitions from raw user input to structured execution through a central orchestration loop.\n\n```mermaid\ngraph TD\n User((User)) --> Frontends\n Frontends --> AgentMain[Root: AgentMain]\n Reflect[Reflect Module] -- Idle Triggers --> AgentMain\n AgentMain --> AgentLoop[Root: AgentLoop]\n AgentLoop --> Memory[Memory Module L1-L4]\n AgentLoop --> Tools[Capabilities & Tools]\n AgentLoop -.-> Plugins[Plugins: Tracing]\n```\n\n### Core Components\n\nThe lifecycle of an interaction typically begins in the [**frontends**](frontends.md) module, which provides a multi-channel communication layer. Whether receiving messages via WebSockets or enterprise platforms like WeCom, the frontend normalizes external data and passes it to the [**Root**](Root.md) orchestration layer.\n\nAt the heart of the framework is `agentmain.py`, which initializes the environment and hands off execution to the `agent_loop.py`. This loop coordinates between the agent's reasoning engine and its [**memory**](memory.md) system. The memory module utilizes a tiered hierarchy (L1–L4) to balance immediate context (Global Insights) with long-term persistent state, ensuring the agent maintains environmental awareness across sessions.\n\nTo maintain autonomy even when a user is not actively providing input, the [**reflect**](reflect.md) module monitors idle time and triggers background tasks or scheduled maintenance. For developers looking to monitor performance, the [**plugins**](plugins.md) module offers opt-in observability through Langfuse tracing, using monkey-patching to intercept execution points without cluttering the core logic.\n\n## Key Execution Flows\n\n1. **Message Processing**: When a user sends an image or file, the [**frontends**](frontends.md) logic (specifically `wecomapp.py`) triggers a media handling flow. This flow utilizes `chatapp_common.py` to split text and format responses before invoking the `run_agent` command in the [**Root**](Root.md) module.\n2. **Autonomous Activation**: If the system remains idle, the [**reflect**](reflect.md) module's `autonomous.py` component checks the `INTERVAL` settings. If conditions are met, it injects a standardized prompt into the [**Root**](Root.md) logic to perform background \"crystallization\" of new skills or system cleanup.\n3. **Structural Analysis**: For a deeper look at how these modules interact, the [**graphify-out**](graphify-out.md) module automatically parses the codebase's AST to generate dependency maps and functional community reports.\n\n## Getting Started\n\nGenericAgent is designed with a \"minimal core\" philosophy. To avoid dependency bloat, you should install only the packages required for your specific environment or UI.\n\n### Installation\n\n1. **Clone the repository**:\n ```bash\n git clone https://github.com/lsdefine/GenericAgent.git\n cd GenericAgent\n ```\n\n2. **Install Minimal Core**:\n ```bash\n pip install requests beautifulsoup4 bottle simple-websocket-server\n ```\n\n3. **Optional Dependencies**:\n Depending on your use case (e.g., using the [**plugins**](plugins.md) for tracing or specific [**frontends**](frontends.md)), install additional requirements as prompted by the system or as defined in `pyproject.toml`.\n\nFor a detailed breakdown of the agent's underlying theory, refer to the [Technical Report](https://arxiv.org/abs/2604.17091).","plugins":"# plugins\n\n# Langfuse Tracing Plugin\n\nThe `plugins/langfuse_tracing.py` module provides automated, opt-in observability for the agent's execution. It uses monkey-patching to intercept core execution points without requiring modifications to the primary logic in `llmcore` or `agent_loop`.\n\n## Activation\n\nThe plugin self-activates upon import if a `langfuse_config` key is present in the configuration returned by `llmcore._load_mykeys()`. If the configuration is missing or the `langfuse` library is not installed, the module remains dormant.\n\n## Architecture and State Management\n\nThe module utilizes `threading.local()` (stored in `_tls`) to maintain trace state across the asynchronous-style generator loops. This ensures that LLM generations and tool calls are correctly parented to the specific agent task, even in multi-threaded environments.\n\n### Tracing Hierarchy\n\n```mermaid\ngraph TD\n A[agent_runner_loop] --> B[llm.chat Generation]\n A --> C[Tool Observation]\n B --> D[SSE Usage Extraction]\n C --> E[Tool Result]\n```\n\n## Monkey-Patched Hooks\n\nThe plugin intercepts three primary areas of the system:\n\n### 1. Agent Lifecycle (`agent_loop.agent_runner_loop`)\nThe `_patched_loop` function wraps the main agent execution. \n- **Start**: Creates a root observation named `agent.task`.\n- **End**: Updates the observation with the final return value and calls `_lf.flush()` to ensure delivery.\n- **Scope**: It also attempts to update any other modules that have already imported `agent_runner_loop` by iterating through `sys.modules`.\n\n### 2. LLM Generations (`llmcore._write_llm_log`)\nInstead of hooking the LLM client directly, the plugin patches the logging utility to capture prompts and responses.\n- **Prompt**: When `_write_llm_log` is called with the \"Prompt\" label, it starts a `llm.chat` generation span.\n- **Response**: When called with the \"Response\" label, it closes the span and attaches usage metadata.\n\n### 3. Tool Execution (`agent_loop.BaseHandler`)\nThe plugin patches the callback system used by tool handlers:\n- **`tool_before_callback`**: Starts a `tool` type observation, capturing the arguments (filtering out internal keys like `_index`).\n- **`tool_after_callback`**: Closes the observation, capturing the tool's return data and control flow flags (`next_prompt`, `should_exit`).\n\n## Token Usage Extraction\n\nTo provide accurate cost and usage tracking, the plugin wraps the SSE (Server-Sent Events) parsers for different providers:\n\n- **Wrapped Functions**: `llmcore._parse_claude_sse` and `llmcore._parse_openai_sse`.\n- **Mechanism**: It uses a `tee()` generator to buffer the raw response lines while the original parser consumes them.\n- **`_extract_usage`**: After the parser finishes, this helper scans the buffered lines for provider-specific usage metadata:\n - **Anthropic**: Looks for `message_start` and `message_delta` events to find `input_tokens` and `output_tokens`.\n - **OpenAI/Generic**: Looks for `usage` fields or `response.completed` events, including support for `cached_tokens` details.\n\nThe extracted usage is stored in `_tls.usage` and subsequently attached to the Langfuse generation span when the LLM log \"Response\" event triggers.\n\n## Error Handling\n\nThe plugin is designed to be non-intrusive. All patching logic and Langfuse calls are wrapped in broad `try...except` blocks. If the tracing service is unavailable or a trace fails to initialize, the core agent logic continues to function normally without interruption.","reflect":"# reflect\n\n# reflect Module\n\nThe `reflect` module provides the autonomous activation logic for the agent. it is responsible for two primary functions: triggering \"idle\" responses when the user is away and managing a persistent task scheduler for recurring background work.\n\n## Core Components\n\n### 1. Autonomous Activation (`autonomous.py`)\nThis submodule handles simple time-based triggers when the agent is idle.\n\n* **`INTERVAL`**: 1800 seconds (30 minutes).\n* **`check()`**: Returns a standardized prompt string instructing the agent to read the automation SOP and perform background tasks if the user has been inactive.\n\n### 2. Task Scheduler (`scheduler.py`)\nA robust, JSON-driven scheduling system that manages recurring tasks and internal system maintenance.\n\n#### Concurrency Control\nThe scheduler uses a TCP port lock (`127.0.0.1:45762`) to prevent multiple instances from running simultaneously. If the port is already bound, the module assumes another instance is active. During a code reload, the `_lock` is preserved in the module dictionary to avoid binding conflicts.\n\n#### The `check()` Loop\nThe `check()` function is the entry point called by the main agent loop (every 120 seconds). It performs two distinct roles:\n\n**A. System Maintenance (L4 Archiving)**\nEvery 12 hours, the scheduler triggers a silent background process to archive raw session data:\n* Calls `batch_process` from `memory.L4_raw_sessions.compress_session`.\n* Processes files in `../temp/model_responses`.\n* This ensures long-term memory (L4) is updated without manual intervention.\n\n**B. Task Execution**\nThe scheduler scans the `../sche_tasks` directory for `.json` files. For each enabled task, it evaluates:\n1. **Schedule**: Matches the current time against the `HH:MM` format.\n2. **Repeat Logic**: Supports `daily`, `weekday` (skips weekends), `weekly`, `monthly`, and custom intervals (e.g., `every_2h`, `every_30m`).\n3. **Execution Window**: Uses `max_delay_hours` (default 6h) to prevent \"stale\" tasks from triggering if the system starts long after the scheduled time.\n4. **Cooldown**: Prevents duplicate execution within the same period using `_last_run()` to check the `../sche_tasks/done` directory.\n\n```mermaid\ngraph TD\n A[check] --> B{12h passed?}\n B -- Yes --> C[L4 batch_process]\n B -- No --> D[Scan sche_tasks/*.json]\n D --> E{Task Valid?}\n E -- Yes --> F{Cooldown/Time Check}\n F -- Pass --> G[Return Task Prompt]\n F -- Fail --> D\n G --> H[Agent Executes Task]\n```\n\n## Task Configuration Schema\n\nTasks are defined as JSON files in the `sche_tasks/` directory.\n\n| Field | Type | Description |\n| :--- | :--- | :--- |\n| `enabled` | boolean | Whether the task is active. |\n| `repeat` | string | `daily`, `weekday`, `weekly`, `monthly`, or `every_Nh/m/d`. |\n| `schedule` | string | Time in `HH:MM` format. |\n| `prompt` | string | The actual instruction sent to the LLM. |\n| `max_delay_hours`| int | (Optional) Max window to trigger if late. Default: 6. |\n\n**Example Task (`sche_tasks/daily_brief.json`):**\n```json\n{\n \"enabled\": true,\n \"repeat\": \"daily\",\n \"schedule\": \"09:00\",\n \"prompt\": \"Summarize the key events from yesterday's logs.\",\n \"max_delay_hours\": 2\n}\n```\n\n## Execution Flow & Reporting\n\nWhen a task is triggered:\n1. The scheduler generates a unique report path: `../sche_tasks/done/YYYY-MM-DD_HHMM_{task_id}.md`.\n2. It returns a prompt to the agent containing the task instructions and the requirement to write the final report to the generated path.\n3. The agent is instructed to read the `scheduled_task_sop` before proceeding to ensure consistent output formatting.\n\n## Internal Helper Functions\n\n* **`_parse_cooldown(repeat)`**: Converts the repeat string into a `timedelta`. It applies a slight buffer (e.g., 20 hours for a daily task) to account for minor timing drifts in the main loop.\n* **`_last_run(tid, done_files)`**: Scans the `done/` directory for the most recent timestamp associated with a specific Task ID (`tid`) to enforce cooldowns.","root-agent-loop-py":"# Root — agent_loop.py\n\n# Root — agent_loop.py\n\nThe `agent_loop.py` module implements the core execution engine for the AI agent. It manages the iterative \"Thought-Action-Observation\" cycle, handling communication between the LLM client, tool execution logic, and state management.\n\n## Core Components\n\n### agent_runner_loop\nThe primary entry point for executing an agent task. It is implemented as a generator that yields status updates and logs while managing the conversation state.\n\n**Key Responsibilities:**\n- **Turn Management:** Iterates up to `max_turns` (default 40).\n- **Context Optimization:** Resets `client.last_tools` every 10 turns to prevent context window bloat from repetitive tool definitions.\n- **LLM Interaction:** Calls `client.chat` with the current message history and tool schemas.\n- **Tool Orchestration:** Parses tool calls from the LLM and dispatches them via the `handler`.\n- **State Transition:** Updates the message history with tool results and determines if the loop should continue or exit based on `StepOutcome`.\n\n### BaseHandler\nA base class that defines how tools are dispatched and how callbacks are handled during the agent's lifecycle.\n\n- **Dynamic Dispatch:** Uses `dispatch()` to look up methods named `do_<tool_name>`.\n- **Generator Support:** Uses `try_call_generator` to support both standard functions and generator-based tools (allowing tools to stream their own progress).\n- **Lifecycle Hooks:**\n - `tool_before_callback`: Executed before a tool runs.\n - `tool_after_callback`: Executed after a tool completes.\n - `turn_end_callback`: Executed at the end of every turn to finalize the next prompt or handle exit logic.\n\n### StepOutcome\nA dataclass used by tools to communicate their result back to the loop.\n```python\n@dataclass\nclass StepOutcome:\n data: Any # The actual result/output of the tool\n next_prompt: Optional[str] # Guidance for the LLM's next turn\n should_exit: bool = False # Whether to terminate the agent loop immediately\n```\n\n## Execution Flow\n\nThe following diagram illustrates the logic within a single turn of the `agent_runner_loop`:\n\n```mermaid\ngraph TD\n Start[Start Turn] --> Chat[Call LLM Chat]\n Chat --> Parse{Tool Calls?}\n Parse -- Yes --> Dispatch[Handler Dispatch Tool]\n Parse -- No --> NoTool[Default: no_tool]\n Dispatch --> Execute[Execute do_tool_name]\n Execute --> Outcome[Return StepOutcome]\n Outcome --> ExitCheck{should_exit?}\n ExitCheck -- Yes --> End[Return Exit Reason]\n ExitCheck -- No --> TurnEnd[turn_end_callback]\n TurnEnd --> Next[Update Messages]\n Next --> Start\n```\n\n## Tool Dispatching Mechanism\n\nThe `dispatch` method in `BaseHandler` is responsible for the safe execution of tools. It injects an `_index` into the tool arguments to track multi-tool calls in a single turn.\n\n1. **Lookup:** It searches for a method prefixed with `do_` (e.g., `do_read_file`).\n2. **Execution:** It wraps the call in `try_call_generator`. If the tool is a generator, it yields from it, allowing real-time feedback to be bubbled up to the UI/CLI.\n3. **Error Handling:** If a tool is not found, it returns a `StepOutcome` with an \"未知工具\" (Unknown Tool) message, prompting the LLM to correct its behavior.\n\n## Utility Functions\n\n### Output Formatting\n- **`get_pretty_json(data)`**: Formats JSON for logging. It specifically handles \"script\" keys by adding newlines to semicolons for better readability of injected code.\n- **`_clean_content(text)`**: Sanitizes LLM output for display. It truncates long code blocks (keeping only the first 5 lines) and strips internal XML-like tags (e.g., `<file_content>`).\n- **`_compact_tool_args(name, args)`**: Creates a condensed string representation of tool arguments for non-verbose logging (e.g., truncating long strings or showing only the filename of a path).\n\n### Execution Helpers\n- **`try_call_generator(func, *args, **kwargs)`**: A wrapper that detects if a function return is an iterator (but not a string/list/dict). If it is, it treats it as a generator and yields from it.\n- **`exhaust(g)`**: A helper that runs a generator to completion and returns the value provided in the `StopIteration` exception. This is used when `verbose=False` to run tools silently.","root-agentmain-py":"# Root — agentmain.py\n\n# Root — agentmain.py\n\nThe `agentmain.py` module serves as the central orchestration layer for the Generatic Agent system. It manages LLM sessions, handles task queuing, constructs system prompts with global memory, and provides the primary execution loop that bridges user input with the `agent_loop` logic.\n\n## Core Architecture: GeneraticAgent Class\n\nThe `GeneraticAgent` class is the stateful controller of the application. It maintains the lifecycle of LLM clients, the task queue, and the interaction history.\n\n### LLM Session Management\nThe module supports multiple backends (OpenAI, Claude, Mixin) and \"Native\" variants. Sessions are initialized via `load_llm_sessions()`, which reads configurations from `mykeys.py` (via `llmcore`).\n\n* **Session Switching**: `next_llm(n)` allows cycling through available LLM clients.\n* **Schema Adaptation**: The module dynamically loads tool schemas based on the active model. If the model name contains `glm`, `minimax`, or `kimi`, it loads a Chinese schema (`_cn`); otherwise, it defaults to the standard schema.\n* **Mixin Support**: It can wrap multiple sessions into a `MixinSession` for fallback or ensemble logic.\n\n### Task Execution Flow\nTasks are processed asynchronously using a producer-consumer pattern with `queue.Queue`.\n\n```mermaid\ngraph TD\n A[put_task] --> B{task_queue}\n B --> C[run loop]\n C --> D[Slash Cmd Handler]\n D --> E[agent_runner_loop]\n E --> F[GenericAgentHandler]\n F --> G[Display Queue]\n```\n\n1. **`put_task(query, source, images)`**: Adds a task to the internal queue and returns a `display_queue` for the caller to monitor progress.\n2. **`run()`**: The main consumer loop. It retrieves tasks, processes slash commands, initializes a `GenericAgentHandler`, and invokes the `agent_runner_loop`.\n3. **`agent_runner_loop`**: An external generator (from `agent_loop.py`) that yields response chunks.\n4. **`abort()`**: Sets a `stop_sig` and signals the `handler` to terminate current code execution or LLM generation.\n\n## System Prompt & Memory Integration\n\nThe system prompt is constructed dynamically via `get_system_prompt()`:\n1. **Base Prompt**: Loaded from `assets/sys_prompt[_en].txt`.\n2. **Temporal Context**: Current date and day of the week.\n3. **Global Memory**: Injected via `ga.get_global_memory()`, providing long-term persistence across sessions.\n4. **Peer Hinting**: If `peer_hint` is enabled, the agent is instructed to look in `temp/model_responses/` to gain awareness of other concurrent or recent sessions.\n\n## Slash Commands\n\nThe `_handle_slash_cmd` method intercepts raw queries starting with `/` to perform administrative actions:\n* `/session.key=value`: Directly modifies the backend configuration (e.g., changing temperature or model parameters). If the value is a filename in `temp/`, it reads the file content as the value.\n* `/resume`: Injects a specific prompt to help the agent find and summarize recent sessions from the `model_responses/` directory, facilitating context recovery.\n\n## Operational Modes\n\nThe module supports three primary execution modes via CLI arguments:\n\n### 1. Interactive Mode (Default)\nA standard REPL (Read-Eval-Print Loop) where the user types queries directly into the terminal. It supports incremental output (`inc_out = True`).\n\n### 2. Task Mode (`--task IODIR`)\nA file-based interface for automation:\n* Reads input from `temp/<IODIR>/input.txt`.\n* Writes results to `temp/<IODIR>/output.txt`.\n* Monitors for a `reply.txt` to continue multi-turn conversations.\n* Supports background execution via `--bg`.\n\n### 3. Reflect Mode (`--reflect SCRIPT`)\nA monitoring mode that loads a Python script and periodically calls its `check()` function.\n* If `check()` returns a string, it is treated as a new task.\n* Results are logged to `temp/reflect_logs/`.\n* Supports an `on_done(result)` callback within the reflection script for automated post-processing.\n\n## Environment and Assets\n* **Language Detection**: Uses `locale` and the `GA_LANG` environment variable to toggle between English and Chinese assets.\n* **CDP Bridge**: Automatically initializes a `config.js` for `tmwebdriver` in `assets/tmwd_cdp_bridge/` to enable advanced web automation features.\n* **Memory Files**: Manages `global_mem.txt` and `global_mem_insight.txt` within the `memory/` directory.","root-ga-py":"# Root — ga.py\n\n# Root — ga.py\n\nThe `ga.py` module serves as the primary tool implementation layer for the Generic Agent. It defines the `GenericAgentHandler` class, which inherits from `BaseHandler`, and provides a suite of tools for code execution, file manipulation, web browsing, and memory management.\n\n## Core Architecture\n\nThe module acts as the bridge between the LLM's intent (tool calls) and the underlying system. Most functions follow a pattern where a standalone utility function (e.g., `code_run`) is wrapped by a handler method (e.g., `do_code_run`) that manages the agent's state and returns a `StepOutcome`.\n\n```mermaid\ngraph TD\n LLM[LLM Response] -->|Tool Call| GAH[GenericAgentHandler]\n GAH -->|do_code_run| CR[code_run]\n GAH -->|do_web_scan| WS[web_scan]\n GAH -->|do_file_patch| FP[file_patch]\n GAH -->|do_no_tool| NT[Logic: Final Response / Validation]\n CR --> Subprocess[Subprocess Execution]\n WS --> TM[TMWebDriver]\n```\n\n## GenericAgentHandler Class\n\nThis class manages the execution context for a single agent session.\n\n### State Management\n- `self.cwd`: The current working directory for all file and code operations.\n- `self.history_info`: A list of summarized past actions used to build the prompt context.\n- `self.working`: A dictionary for \"Working Memory,\" storing `key_info`, `related_sop`, and plan-mode state.\n- `self.current_turn`: Tracks the number of iterations in the current task.\n\n### Context Construction\nThe handler uses `_get_anchor_prompt` to build the system prompt for each turn. It implements a \"folding\" mechanism via `_fold_earlier` to compress older history into summaries, preventing token overflow while maintaining context.\n\n## Tool Implementations\n\n### Code Execution (`do_code_run`)\nExecutes Python, PowerShell, or Bash scripts.\n- **Python Mode**: Writes code to a temporary `.ai.py` file, prepends an optional `code_run_header.py`, and executes it using the current Python interpreter.\n- **Shell Mode**: Executes commands directly via `powershell` (Windows) or `bash` (Unix).\n- **Safety**: Includes a `timeout` (default 60s) and a `stop_signal` mechanism to kill runaway processes.\n\n### File System Operations\n- `do_file_read`: Reads files with support for line numbering, keyword searching, and automatic truncation of large files. It logs access to `file_access_stats.json`.\n- `do_file_write`: Supports `overwrite`, `append`, and `prepend` modes. It expects content within `<file_content>` tags or markdown code blocks.\n- `do_file_patch`: A precision editing tool. It searches for a unique `old_content` block and replaces it with `new_content`. This is preferred over full overwrites for large files.\n- `expand_file_refs`: A utility that allows tools to reference other files using the `{{file:path:start:end}}` syntax, which is expanded before execution.\n\n### Web Automation\nThe module integrates with `TMWebDriver` for browser control.\n- `do_web_scan`: Retrieves a list of open tabs and the simplified HTML content of the active page. It uses `simphtml` to filter out non-essential elements (sidebars, ads) to save tokens.\n- `do_web_execute_js`: The primary tool for complex web interaction. It executes arbitrary JavaScript and can save the return value to a local file. It includes a \"monitor\" feature to track DOM changes during execution.\n\n### Memory & Planning\n- `do_update_working_checkpoint`: Updates the agent's short-term \"Working Memory\" with key facts or SOP references.\n- `do_start_long_term_update`: Triggers a specialized workflow to distill successful actions into long-term memory files (L1/L2/L3) based on the `memory_management_sop.md`.\n- **Plan Mode**: When a plan file (e.g., `plan.md`) is detected, the handler enters a state that enforces verification steps and tracks task completion via `[ ]` checkbox detection.\n\n## Internal Logic & Validation\n\n### `do_no_tool`\nThis is a pseudo-tool triggered when the LLM provides a natural language response without calling a specific function. It performs several safety checks:\n1. **Empty Response Detection**: Retries if the LLM returns nothing.\n2. **Incomplete Response Detection**: Checks for truncation markers like `max_tokens !!!]`.\n3. **Validation Interception**: In Plan Mode, it blocks \"Task Complete\" claims if a `[VERIFY]` step hasn't been performed.\n4. **Code Block Detection**: If the LLM provides a large code block but forgets to call `file_write` or `code_run`, this method intercepts and asks for clarification.\n\n### `turn_end_callback`\nExecuted after every turn to:\n- Append a summary of the action to `history_info`.\n- Inject \"Danger\" prompts if the turn count is high (e.g., turn 7, 10, or 65), forcing the agent to reconsider its strategy or ask the user for help.\n- Handle external interventions via `_keyinfo` or `_intervene` files.\n\n## Utility Functions\n\n- `smart_format(data, max_str_len)`: Truncates long strings by keeping the head and tail, inserting an `[omitted]` marker in the middle.\n- `format_error(e)`: Provides a detailed traceback including the filename, line number, and the specific line of code that failed.\n- `get_global_memory()`: Loads structural context from `global_mem_insight.txt` and `insight_fixed_structure.txt` to provide the agent with its environment's \"world view.\"","root-getting-started-md":"# Root — GETTING_STARTED.md\n\n# GenericAgent Onboarding and System Initialization\n\nThe `GETTING_STARTED.md` module serves as the primary entry point for both users and developers to initialize the GenericAgent environment. It defines the bootstrapping process where the agent transitions from a minimal script to a fully-featured autonomous assistant capable of managing its own dependencies and expanding its skill set.\n\n## System Entry Points\n\nThe project provides two primary execution paths:\n\n1. **`agentmain.py` (CLI Mode):** The core entry point. It initializes the command-line interface and is used for initial setup, dependency installation, and headless operation.\n2. **`launch.pyw` (GUI Mode):** A windowed wrapper (typically using `pywebview`) that provides a floating desktop interface. This is intended for daily use after the initial environment is stabilized.\n\n## Configuration Schema (`mykey.py`)\n\nThe system uses a dynamic configuration loading mechanism. Users must create `mykey.py` based on `mykey_template.py`. The system determines the communication protocol (Session type) based on the **variable names** defined in this file rather than the model name itself.\n\n### Protocol Mapping Logic\n\n| Variable Name Pattern | Protocol / Session Class | Target API |\n| :--- | :--- | :--- |\n| Contains `oai` | OpenAI Chat Completions | OpenAI, DeepSeek, MiniMax, etc. |\n| Contains `claude` (no `native`) | Claude Messages API | Anthropic (via third-party proxies) |\n| Contains `native` + `claude` | Claude Native Tool Use | Anthropic Official API |\n| Contains `native` + `oai` | OpenAI Native Tool Use | OpenAI Official API |\n\n### API Base Resolution\nThe system automatically normalizes `apibase` URLs:\n* `http://host:port` -> Appends `/v1/chat/completions`\n* `http://host:port/v1` -> Appends `/chat/completions`\n* Full paths are used as-is.\n\n## Bootstrapping Workflow\n\nGenericAgent follows a \"Self-Installing\" pattern. Instead of a traditional `requirements.txt` installation, the developer is encouraged to let the agent configure its own environment.\n\n```mermaid\ngraph TD\n A[Install Python] --> B[Configure mykey.py]\n B --> C[Run agentmain.py]\n C --> D{Agent Self-Setup}\n D --> E[Install Dependencies]\n D --> F[Configure Git/Path]\n E --> G[Launch GUI launch.pyw]\n F --> G\n```\n\n### Dependency Management\nThe agent is capable of introspecting its own source code to identify missing libraries. By issuing a command like `\"Install all necessary python dependencies\"`, the agent triggers internal shell execution to run `pip install` for required modules (e.g., `requests`, `pywebview`, `rapidocr-onnxruntime`).\n\n## Capability Unlocking (Skill Acquisition)\n\nThe system architecture relies on \"Skills\" and \"Memory\" rather than hardcoded features. Capabilities are added to the agent's runtime context through specific SOPs (Standard Operating Procedures):\n\n* **Web Automation:** Triggered by `web setup sop`. This injects browser extensions and configures the driver to interact with the user's active browser session.\n* **OCR/Vision:** Configured by instructing the agent to implement `rapidocr` or vision-based `llmcore` wrappers.\n* **Persistence:** The agent can \"solidify\" successful execution paths into its long-term memory, effectively creating new tools without manual code changes.\n\n## The \"Code as Documentation\" Philosophy\n\nA unique aspect of this module is the instruction for developers to treat the source code as the primary manual. The agent is designed to:\n1. Read its own implementation files.\n2. Explain internal modes (e.g., `Reflect`, `Plan`, `SubAgent`).\n3. Self-update via `git` and interpret commit logs to understand new features.\n\nThis reduces the maintenance burden of external documentation, as the agent's understanding of its capabilities evolves synchronously with the codebase.","root-hub-pyw":"# Root — hub.pyw\n\n# Root — hub.pyw\n\n`hub.pyw` is a lightweight, cross-platform service launcher and manager for the GenericAgent ecosystem. Built using Python's standard library and `tkinter`, it provides a graphical interface to discover, start, stop, and monitor various agent services and frontends without requiring third-party dependencies.\n\n## Core Functionality\n\n### Singleton Instance Control\nThe module ensures only one instance of the launcher runs at a time using `acquire_singleton()`. It attempts to bind a TCP socket to `LOCK_PORT` (19735). If the bind fails, the application assumes another instance is active, displays a message box, and exits.\n\n### Service Discovery\nThe `discover_services()` function scans the local directory structure to identify runnable components:\n\n1. **Reflect Services**: Scans the `reflect/` directory for `.py` files (excluding those starting with `_`). These are launched via `agentmain.py` using the `--reflect` flag.\n2. **Frontend Applications**: Scans the `frontends/` directory for files containing `app` in the name (excluding `chatapp_common.py`).\n * **Streamlit Apps**: If the filename contains `stapp`, it is executed via `streamlit run`.\n * **Standard Apps**: Other matches are executed as standard Python scripts.\n\n## Process Management\n\nThe `ServiceManager` class encapsulates the lifecycle of background processes.\n\n### Execution Logic\nWhen a service is started via `ServiceManager.start(name, cmd)`:\n- It sets the `PYTHONUNBUFFERED` environment variable to ensure real-time log capturing.\n- On Windows, it uses the `CREATE_NO_WINDOW` flag to prevent console popups.\n- It redirects `stderr` to `stdout` and pipes the output.\n\n### Log Buffering\nFor every started process, a dedicated daemon thread runs `_reader()`. This thread populates a `collections.deque` (limited to the last 500 lines) associated with the service name. This allows the UI to display logs without blocking the main event loop or consuming excessive memory.\n\n```mermaid\ngraph TD\n A[LauncherApp] -->|Start/Stop| B[ServiceManager]\n B -->|Popen| C[Subprocess]\n C -->|Stdout/Stderr| D[Reader Thread]\n D -->|Append| E[Circular Buffer - Deque]\n A -->|Poll| E\n A -->|Update UI| F[Tkinter Text Widget]\n```\n\n## UI Architecture\n\nThe `LauncherApp` class manages the `tkinter` interface, organized into two primary sections:\n\n### Service List\n- **Dynamic Rows**: Each discovered service is rendered as a row with a checkbox (start/stop) and a status indicator.\n- **Selection**: Clicking a service row selects it, highlighting the row and switching the \"Output\" console to display that service's specific buffer.\n- **Rescan**: The `_rescan()` method allows users to refresh the service list without restarting the launcher. It preserves the state of currently running services.\n\n### Output Console\nThe log viewer uses a `tk.Text` widget with a custom refresh logic in `_refresh_output()`:\n- **Smart Scrolling**: If the user is scrolled to the bottom, the view automatically follows new output. If the user has scrolled up to inspect previous logs, the scroll position is maintained even as new data arrives.\n- **Performance**: It compares the new buffer content with the current display to avoid unnecessary UI flickers.\n\n### The Polling Loop\nThe `_poll()` method executes every 1000ms. It performs two critical tasks:\n1. Synchronizes the UI status labels and checkboxes with the actual state of the subprocesses (e.g., detecting if a process crashed or closed externally).\n2. Triggers the log refresh for the currently selected service.\n\n## Key Constants and Configuration\n\n| Constant | Value | Description |\n| :--- | :--- | :--- |\n| `LOCK_PORT` | 19735 | TCP port used to prevent multiple launcher instances. |\n| `BASE_DIR` | `os.path.dirname` | The root directory of the project. |\n| `maxlen` | 500 | The maximum number of log lines kept in memory per service. |\n| `PYTHONUNBUFFERED` | `'1'` | Env var used to force immediate output from child processes. |\n\n## Usage and Contribution\nTo run the launcher, execute `pythonw hub.pyw`. \n\nWhen adding new services:\n- Place reflection logic in `reflect/` as a `.py` file.\n- Place UI frontends in `frontends/` with `app` in the filename.\n- Ensure the service handles `SIGTERM` (sent via `proc.terminate()`) for clean shutdowns.","root-launch-pyw":"# Root — launch.pyw\n\n# Module: launch.pyw\n\nThe `launch.pyw` module serves as the primary entry point and orchestrator for the GenericAgent application. It is responsible for managing the lifecycle of the Streamlit backend, initializing the `pywebview` desktop interface, and spawning optional bot subprocesses (Telegram, QQ, Feishu, etc.).\n\nThe `.pyw` extension indicates that on Windows, the application will run without an attached console window.\n\n## Core Responsibilities\n\n- **Process Orchestration**: Launches and manages the Streamlit server and various messaging bot adapters as subprocesses.\n- **Desktop Wrapper**: Wraps the web-based Streamlit UI in a native desktop window using `pywebview`.\n- **Automated Monitoring**: Implements an \"Idle Monitor\" that detects user inactivity and triggers autonomous agent tasks.\n- **UI Injection**: Provides a bridge to programmatically interact with the Streamlit frontend via JavaScript injection.\n\n## Process Architecture\n\nThe module uses `subprocess.Popen` to run components concurrently. It ensures system resources are cleaned up by registering `proc.kill` calls with the `atexit` module.\n\n```mermaid\ngraph TD\n Main[launch.pyw] -->|Subprocess| ST[Streamlit: stapp.py]\n Main -->|Subprocess| Bots[Bot Apps: tgapp, qqapp, etc.]\n Main -->|Subprocess| Sched[Scheduler: agentmain.py]\n Main -->|Thread| Idle[Idle Monitor]\n Main -->|GUI| WV[pywebview Window]\n WV -.->|JS Injection| ST\n```\n\n## Key Components\n\n### 1. Streamlit Management\nThe `start_streamlit(port)` function initializes the web server. It runs `stapp.py` in headless mode on a dynamically assigned local port.\n- **Port Selection**: `find_free_port()` scans the range `18501-18599` to avoid collisions.\n- **Address**: Bound to `localhost` for security.\n\n### 2. Desktop Window Configuration\nThe application uses `ctypes` to calculate screen metrics, ensuring the window is pinned to the right side of the screen by default.\n- **Dimensions**: 600px (W) x 900px (H).\n- **Positioning**: Calculated via `get_screen_width()` to align the window to the right edge on Windows systems.\n\n### 3. JavaScript Bridge & Injection\nThe `inject(text)` function allows the Python backend to drive the Streamlit UI. This is used for automation and autonomous task execution.\n- **React Bypass**: Since Streamlit is a React application, `inject` uses a native property descriptor setter (`HTMLTextAreaElement.prototype`) to bypass React's internal state management and trigger the necessary `input` and `change` events.\n- **Auto-Submit**: After setting the text, it programmatically clicks the `stChatInputSubmitButton`.\n\n### 4. Idle Monitor & Paste Hook\nThe `idle_monitor()` runs in a daemon thread and performs two main tasks:\n- **Paste Hooking**: Injects `PASTE_HOOK_JS` into the browser context. This intercepts clipboard events to detect if a user is attempting to paste files or images, updating the UI state accordingly.\n- **Inactivity Detection**: Every 5 seconds, it checks the `#last-reply-time` element in the DOM. If no activity is detected for 30 minutes (1800s), it calls `inject()` to trigger an autonomous SOP (Standard Operating Procedure) task.\n\n## Command Line Arguments\n\nThe module supports several flags to enable specific frontends and services:\n\n| Argument | Description |\n| :--- | :--- |\n| `port` | Optional. Specify a port (defaults to searching for a free one). |\n| `--tg` | Starts the Telegram Bot (`tgapp.py`). |\n| `--qq` | Starts the QQ Bot (`qqapp.py`). |\n| `--feishu` / `--fs` | Starts the Feishu Bot (`fsapp.py`). |\n| `--wecom` | Starts the WeCom Bot (`wecomapp.py`). |\n| `--dingtalk` / `--dt` | Starts the DingTalk Bot (`dingtalkapp.py`). |\n| `--sched` | Starts the Task Scheduler (`agentmain.py` with `scheduler.py` reflection). |\n| `--llm_no` | Passes a specific LLM configuration index to the scheduler. |\n\n## Execution Flow\n\n1. **Argument Parsing**: Determines which bots and services to enable.\n2. **Port Discovery**: Finds an available port for the Streamlit server.\n3. **Subprocess Launch**: Starts the Streamlit server and any requested bot adapters.\n4. **Thread Initialization**: Starts the `idle_monitor` thread.\n5. **Window Creation**: Initializes the `pywebview` window.\n6. **Main Loop**: Enters the `webview.start()` loop, which blocks until the window is closed.\n7. **Cleanup**: Upon exit, `atexit` handlers kill all spawned subprocesses.","root-llmcore-py":"# Root — llmcore.py\n\n# Root — llmcore.py\n\nThe `llmcore.py` module serves as the central abstraction layer for interacting with Large Language Models (LLMs). It provides a unified interface for multiple providers (Anthropic, OpenAI, and compatible relays), manages conversation history, handles streaming responses, and implements robust tool-calling protocols.\n\n## Core Architecture\n\nThe module is built around a hierarchy of Session classes that encapsulate API logic, state management, and provider-specific formatting.\n\n```mermaid\ngraph TD\n BaseSession --> ClaudeSession\n BaseSession --> LLMSession\n BaseSession --> NativeClaudeSession\n NativeClaudeSession --> NativeOAISession\n BaseSession --> MixinSession\n```\n\n### Key Session Types\n- **`BaseSession`**: The abstract base class handling configuration (API keys, base URLs), history storage, and the high-level `ask()` interface.\n- **`ClaudeSession`**: Implements the Anthropic Messages API, including native prompt caching and thinking block management.\n- **`LLMSession`**: Implements the OpenAI Chat Completions and Responses API.\n- **`NativeClaudeSession`**: A specialized implementation for \"Claude Code\" style interactions, utilizing specific beta headers (e.g., `interleaved-thinking`, `claude-code-20250219`) and complex block structures.\n- **`MixinSession`**: A resilience wrapper that allows for multi-session fallback. It rotates through a list of backends if the primary provider fails or hits rate limits.\n\n## History and Context Management\n\nThe module includes sophisticated logic to maintain long-running conversations within context window limits.\n\n### Trimming and Compression\n- **`trim_messages_history(history, context_win)`**: Monitors the character count of the history. If it exceeds the `context_win`, it triggers compression and eventually pops the oldest messages (ensuring the history always starts with a `user` role).\n- **`compress_history_tags(messages, ...)`**: Reduces token usage by truncating content inside `<thinking>`, `<tool_use>`, and `<tool_result>` tags in older messages while keeping the most recent messages intact.\n- **`_sanitize_leading_user_msg(msg)`**: Converts complex block-based user messages into plain text if they become the first message in a trimmed history, preventing orphaned tool references.\n\n## Request Execution Flow\n\nThe primary entry point for developers is the `ask()` method (or `chat()` in Client wrappers).\n\n1. **Prompt Preparation**: The session appends the user prompt to `self.history`.\n2. **History Maintenance**: `trim_messages_history` is called to ensure the payload fits the model's limits.\n3. **Provider Mapping**: `make_messages()` transforms the internal history format into the provider's specific schema (e.g., `_msgs_claude2oai` for OpenAI backends).\n4. **Execution**: `raw_ask()` invokes `_stream_with_retry`.\n5. **Streaming & Parsing**:\n - **OpenAI**: Handled by `_parse_openai_sse` or `_parse_openai_json`.\n - **Claude**: Handled by `_parse_claude_sse` or `_parse_claude_json`.\n6. **Response Finalization**: The assistant's response is appended to history, and tool calls are extracted.\n\n## Tool Calling Protocols\n\n`llmcore` supports two primary methods for tool interaction:\n\n### 1. Native Tool Use\nUtilized by `NativeClaudeSession` and `NativeOAISession`. It uses the provider's built-in tool-calling schema. The `openai_tools_to_claude` utility handles schema conversion between OpenAI's function format and Anthropic's `input_schema`.\n\n### 2. Protocol-Based Tool Use (`ToolClient`)\nFor models with weaker native tool support, `ToolClient` enforces a text-based XML/JSON protocol:\n- **Thinking**: Encapsulated in `<thinking>` tags.\n- **Summary**: A mandatory `<summary>` tag providing a snapshot of the state.\n- **Action**: Tool calls are wrapped in `<tool_use>` tags.\n- **Parsing**: `_parse_text_tool_calls` uses regex and `tryparse` (a fuzzy JSON parser) to extract calls from raw text if the model fails to use native blocks.\n\n## Resilience and Failover\n\nThe `MixinSession` class provides high availability:\n- **Spring-back Logic**: If it switches to a secondary session due to an error, it will automatically attempt to \"spring back\" to the primary session after a configurable `spring_back` delay (default 300s).\n- **Broadcast Attributes**: Setting attributes like `system`, `tools`, or `history` on a `MixinSession` automatically broadcasts those changes to all underlying session instances.\n- **Retry Logic**: `_stream_with_retry` implements exponential backoff and handles specific retryable HTTP status codes (429, 500, 502, 503, 504).\n\n## Configuration and Keys\n\nThe module dynamically manages API keys via `reload_mykeys()`. It looks for a `mykey.py` or `mykey.json` file.\n- **Hot Reloading**: The module checks the file's modification time (`st_mtime_ns`) and reloads keys automatically if the file changes on disk.\n- **PEP 562**: Uses `__getattr__` at the module level to provide a global `mykeys` object that is always up-to-date.\n\n## Utility Functions\n\n- **`auto_make_url(base, path)`**: Intelligently joins base URLs and paths, ensuring `/v1/` or `/v1/messages` suffixes are correctly applied based on the provider type.\n- **`_stamp_oai_cache_markers`**: Automatically injects `cache_control` headers into the last two user messages for Anthropic models to optimize performance and cost on supported relays.\n- **`safeprint`**: A wrapper around `print` that catches `OSError`, preventing crashes in environments with unstable standard output (e.g., certain detached processes).","root-mykey-template-en-py":"# Root — mykey_template_en.py\n\n# Module: mykey_template_en.py\n\nThe `mykey_template_en.py` file serves as the configuration blueprint for the GenericAgent (GA) framework. It defines the credentials, model parameters, and session behaviors required to interface with LLM providers. \n\nTo activate the configuration, this file must be copied to a new file named `mykey.py` in the root directory.\n\n## Configuration Auto-Detection\n\nThe framework employs a convention-over-configuration approach. When `agentmain.py` or `launch.pyw` initializes, it scans `mykey.py` for variables containing specific keywords to determine which session class to instantiate:\n\n| Variable Name Keywords | Resulting Session Class | Provider Target |\n| :--- | :--- | :--- |\n| `native` + `claude` | `NativeClaudeSession` | Anthropic API |\n| `native` + `oai` | `NativeOAISession` | OpenAI API or compatible |\n| `mixin` | `MixinSession` | Failover/Redundancy logic |\n\nThe framework also automatically detects variables containing `api`, `config`, or `cookie` to populate session settings.\n\n## Session Types\n\n### 1. NativeClaudeSession\nDesigned for direct interaction with Anthropic's API. It supports native tool-calling (function calling) and specific Claude features.\n\n* **Key Fields:**\n * `apikey`: Supports `sk-ant-` prefixes (sent as `x-api-key`) or standard Bearer tokens.\n * `model`: Supports context window suffixes like `[1m]` for the 1M-context beta.\n * `thinking_type`: Controls Claude 3.7+ reasoning capabilities (`adaptive`, `enabled`, or `disabled`).\n * `thinking_budget_tokens`: Required if `thinking_type` is set to `enabled`.\n\n### 2. NativeOAISession\nUsed for OpenAI or any provider implementing the OpenAI-compatible `/v1/chat/completions` or `/v1/responses` endpoints.\n\n* **Key Fields:**\n * `api_mode`: Can be toggled between `chat_completions` and the newer `responses` endpoint.\n * `reasoning_effort`: Configures reasoning intensity for models like `o1` or `o3` (`low`, `medium`, `high`).\n * `apibase`: The base URL for the provider (e.g., `https://api.openai.com/v1`).\n\n### 3. MixinSession\nA failover wrapper that manages multiple native sessions. If the primary session fails (e.g., rate limits or API errors), the Mixin automatically rotates to the next available session in the list.\n\n* **Constraint:** All sessions referenced in `llm_nos` must be \"Native\" sessions. You cannot mix Native sessions with legacy or non-native session types within a single Mixin.\n\n```mermaid\ngraph TD\n A[mykey.py] --> B{Variable Name?}\n B -- \"native + claude\" --> C[NativeClaudeSession]\n B -- \"native + oai\" --> D[NativeOAISession]\n B -- \"mixin\" --> E[MixinSession]\n E -->|References| C\n E -->|References| D\n```\n\n## Global and Integration Settings\n\nBeyond LLM configurations, the module supports global environment settings:\n\n* **Proxy:** The `proxy` variable (e.g., `http://127.0.0.1:7890`) sets a global HTTP proxy for all network requests unless overridden within a specific session config.\n* **Chat Platforms:** Variables like `tg_bot_token` and `tg_allowed_users` configure the Telegram bot interface, allowing the agent to be accessed via mobile or external chat clients.\n\n## Runtime Overrides\n\nWhile `mykey.py` sets the persistent defaults, the framework allows developers to override these settings during a live REPL session using the `/session` command.\n\n**Examples:**\n* `/session.temperature=0.3` — Adjusts creativity.\n* `/session.reasoning_effort=high` — Increases depth for OpenAI reasoning models.\n* `/session.thinking_type=adaptive` — Toggles Claude's thinking mode.\n\nThese overrides are applied to the active session instance without modifying the underlying `mykey.py` file.","root-mykey-template-py":"# Root — mykey_template.py\n\n# Root — mykey_template.py\n\nThe `mykey_template.py` module serves as the central configuration blueprint for the GenericAgent system. It defines how the agent connects to Large Language Models (LLMs), manages API credentials, configures session behaviors, and integrates with third-party messaging platforms.\n\nTo use this configuration, developers must copy this file to `mykey.py`. The system's entry points (`agentmain.py` or `launch.pyw`) dynamically scan `mykey.py` to initialize the environment.\n\n## Session Discovery Logic\n\nThe system uses a convention-over-configuration approach. When `agentmain.py` loads `mykey.py`, it scans for global variables whose names contain the keywords `api`, `config`, or `cookie`. It then determines which `Session` class to instantiate based on specific keywords within those variable names.\n\n| Variable Keywords | Resulting Session Class | Tool Protocol |\n| :--- | :--- | :--- |\n| `native` AND `claude` | `NativeClaudeSession` | API-native `tools` field |\n| `native` AND `oai` | `NativeOAISession` | API-native `tools` field |\n| `claude` (no `native`) | `ClaudeSession` | Text-based protocol (Deprecated) |\n| `oai` (no `native`) | `LLMSession` | Text-based protocol (Deprecated) |\n| `mixin` | `MixinSession` | Failover/Rotation logic |\n\n**Priority Note:** Keywords are matched top-down. A variable named `oai_claude_config` will match `claude` first and instantiate a `ClaudeSession` rather than an OpenAI-based one.\n\n## Native vs. Non-Native Protocols\n\n* **Native:** Tools are passed via the API's structured tool/function calling fields. This is required for models optimized for Claude Code or OpenAI Codex, as they may ignore tool descriptions provided in the system prompt.\n* **Non-Native (Deprecated):** Tools are described within the text prompt. While more compatible with older or simpler models, it is less effective for modern \"overfitted\" coding models.\n\n## Configuration Reference\n\n### Core LLM Fields\nThese fields are used by `BaseSession.__init__` and its subclasses:\n\n* **`apikey`**: The authentication token.\n * Prefix `sk-ant-`: Uses `x-api-key` header (Anthropic).\n * Other prefixes: Uses `Authorization: Bearer` header.\n* **`apibase`**: The base URL for the API.\n * `http://host:2001` → Appends `/v1/chat/completions`.\n * `http://host:2001/v1` → Appends `/chat/completions`.\n * `NativeClaudeSession` automatically appends `?beta=true` to trigger Anthropic beta features.\n* **`model`**: The model identifier. Appending `[1m]` triggers the `context-1m-2025-08-07` beta flag.\n* **`name`**: A unique identifier used for display in `/llms` and for referencing in `MixinSession`.\n* **`proxy`**: Optional per-session HTTP proxy (e.g., `'http://127.0.0.1:2082'`).\n\n### Reasoning and Thinking\nSpecific controls for models with internal reasoning capabilities (Claude 3.7+, OpenAI o1/o3):\n\n* **`thinking_type`** (Claude):\n * `adaptive`: Model decides its own reasoning budget (Claude Code default).\n * `enabled`: Requires `thinking_budget_tokens`.\n * `disabled`: Explicitly disables the thinking block.\n* **`reasoning_effort`** (OpenAI/Claude):\n * Values: `none`, `minimal`, `low`, `medium`, `high`, `xhigh`.\n * For Claude, `xhigh` maps to `max` in `output_config.effort`.\n* **`fake_cc_system_prompt`**: Must be `True` for third-party relays (like CC Switch or Anyrouter) that validate Claude Code fingerprints.\n\n### Mixin Failover (`MixinSession`)\nThe `mixin_config` allows for high availability by rotating through multiple sessions.\n\n```python\nmixin_config = {\n 'llm_nos': ['gpt-native', 'cc-relay-1'], # List of session 'name' fields\n 'max_retries': 10, # Total retries across all nodes\n 'base_delay': 0.5, # Exponential backoff start\n 'spring_back': 300, # Seconds before trying the primary node again\n}\n```\n\n## Integration Settings\n\nThe template includes placeholders for various chat adapters and tracking tools:\n\n* **Messaging Platforms**: Tokens and allowed user IDs for Telegram (`tg_bot_token`), QQ (`qq_app_id`), Feishu (`fs_app_id`), Enterprise WeChat (`wecom_bot_id`), and DingTalk (`dingtalk_client_id`).\n* **Observability**: `langfuse_config` for linking to a Langfuse instance for trace logging and performance monitoring.\n* **Global Proxy**: A top-level `proxy` variable that acts as a fallback for all sessions that do not define their own.\n\n## Runtime Adjustments\n\nParameters defined in `mykey.py` can be overridden during a live session via the REPL using the `/session` command:\n\n```bash\n/session.reasoning_effort=high\n/session.thinking_budget_tokens=32768\n/session.temperature=0.3\n```\nThese changes persist until the session is re-initialized or the model is changed.","root-pyproject-toml":"# Root — pyproject.toml\n\n# Project Configuration (pyproject.toml)\n\nThe `pyproject.toml` file serves as the manifest for the `genericagent` framework. It defines the project metadata, build system requirements, and a tiered dependency structure designed to keep the core installation lightweight while allowing for modular expansion.\n\n## Project Metadata\n\n- **Name:** `genericagent`\n- **Version:** `0.1.0`\n- **Description:** Minimalist self-evolving autonomous agent framework.\n- **Python Compatibility:** Requires Python `3.10` through `3.13`.\n- **License:** MIT\n\n## Dependency Architecture\n\nThe framework follows a \"Minimal Core\" philosophy. Developers should only install the dependencies required for their specific environment or deployment target.\n\n### Core Dependencies\nThese are installed by default and provide the fundamental capabilities for networking, web scraping, and basic communication:\n\n| Package | Purpose |\n| :--- | :--- |\n| `requests` | Synchronous HTTP client for API interactions. |\n| `beautifulsoup4` | HTML parsing and data extraction for web-based tasks. |\n| `bottle` | A fast, simple, and lightweight WSGI micro web-framework. |\n| `simple-websocket-server` | Minimalist WebSocket implementation for real-time agent communication. |\n\n### Optional Dependencies (Extras)\nTo prevent dependency bloat, specialized features are grouped into optional extras.\n\n#### `ui`\nUsed for local graphical interfaces and dashboards.\n- `streamlit`: For data-driven web interfaces.\n- `pywebview`: For wrapping web content in native GUI windows.\n\n#### `all-frontends`\nIncludes SDKs for various messaging platforms and enterprise bots.\n- **Messaging:** `python-telegram-bot`, `qq-botpy`.\n- **Enterprise:** `lark-oapi`, `wecom-aibot-sdk`, `dingtalk-stream`.\n- **Utilities:** `pycryptodome` (encryption), `qrcode` (generation).\n\n## Installation Patterns\n\nDevelopers should choose the installation command that matches their use case:\n\n**1. Minimal Core (Headless/API only)**\n```bash\npip install .\n```\n\n**2. UI Development (Local Dashboard)**\n```bash\npip install \".[ui]\"\n```\n\n**3. Full Bot Integration**\n```bash\npip install \".[all-frontends]\"\n```\n\n## Build System\n\nThe project uses `setuptools` as the build backend.\n\n```toml\n[build-system]\nrequires = [\"setuptools>=68.0\"]\nbuild-backend = \"setuptools.build_meta\"\n```\n\nThe configuration specifies `py-modules = []` within the `[tool.setuptools]` table, indicating that the project structure likely relies on automatic discovery or a specific directory layout defined in the root.\n\n## Dependency Flow\n\nThe following diagram illustrates how the dependency tiers relate to the core framework:\n\n```mermaid\ngraph TD\n Core[Core: genericagent] --> Req[requests/bs4]\n Core --> Srv[bottle/websocket]\n \n subgraph Extras\n UI[ui] -.-> ST[streamlit/pywebview]\n FE[all-frontends] -.-> Bots[Telegram/Lark/DingTalk]\n end\n\n Core -.-> UI\n Core -.-> FE\n```\n\n## Developer Notes\n- **On-Demand Installation:** The framework is designed such that missing packages for specific frontends can be installed on demand. Do not add new packages to the `dependencies` list unless they are strictly required for the agent's core lifecycle.\n- **Version Constraints:** Python 3.14 is currently excluded to ensure compatibility with libraries that may use deprecated C-APIs or internal structures not yet updated for the latest Python releases.","root-readme-md":"# Root — README.md\n\n# GenericAgent: Self-Evolving Autonomous Framework\n\n**GenericAgent** is a minimalist, self-evolving autonomous agent framework designed to give Large Language Models (LLMs) system-level control over a local environment. With a core codebase of approximately 3,000 lines, it focuses on \"capability crystallization\"—the process of turning successful task executions into reusable skills.\n\n## Core Philosophy: Minimalist & Self-Bootstrapping\n\nUnlike traditional agent frameworks that ship with hundreds of pre-defined plugins, GenericAgent starts with a \"seed\" of 9 atomic tools. It uses these tools to explore the environment, install dependencies, and write its own scripts. Once a task is successful, the agent \"crystallizes\" the logic into its memory, effectively growing its own API over time.\n\n## System Architecture\n\nThe framework is built around three pillars: the **Autonomous Execution Loop**, the **Layered Memory System**, and the **Atomic Toolset**.\n\n```mermaid\ngraph TD\n LLM[LLM Engine] --> Loop[agent_loop.py]\n Loop --> Tools[Atomic Toolset]\n Loop --> Memory[Layered Memory L0-L4]\n Tools --> OS[Local OS / Browser / ADB]\n Memory -.-> LLM\n OS -.-> Loop\n```\n\n### 1. The Autonomous Execution Loop\nThe heart of the system is `agent_loop.py`, a concise (~100 lines) implementation of the Perceive-Reason-Act cycle. \n1. **Perceive**: The agent gathers state from the terminal, browser, or screen.\n2. **Reason**: The LLM determines the next step based on the current goal and available tools.\n3. **Act**: The agent invokes a tool (e.g., `code_run` or `web_execute_js`).\n4. **Learn**: If a milestone is reached, the agent updates its memory.\n\n### 2. Layered Memory System\nGenericAgent uses a tiered approach to manage context and long-term knowledge, ensuring high information density within a small token window (<30K tokens).\n\n| Layer | Name | Purpose |\n| :--- | :--- | :--- |\n| **L0** | Meta Rules | Core behavioral constraints and system prompts. |\n| **L1** | Insight Index | A high-level index for fast routing and memory recall. |\n| **L2** | Global Facts | Stable, long-term knowledge about the user or environment. |\n| **L3** | Task Skills (SOPs) | Reusable workflows and scripts generated during evolution. |\n| **L4** | Session Archive | Distilled records of past sessions for long-horizon recall. |\n\n### 3. Atomic Toolset\nThe agent interacts with the world through 9 primary functions. These are the only tools the agent needs to build more complex capabilities.\n\n* **Execution**: `code_run` (executes arbitrary Python/Shell code).\n* **Filesystem**: `file_read`, `file_write`, `file_patch`.\n* **Web/UI**: `web_scan` (DOM/content perception), `web_execute_js` (browser interaction).\n* **Human-in-the-Loop**: `ask_user` (for confirmation or clarification).\n* **Memory Management**: `update_working_checkpoint`, `start_long_term_update`.\n\n## Self-Evolution Mechanism\n\nThe evolution process is the framework's defining feature. When faced with a novel task (e.g., \"Monitor this specific stock\"), the agent follows this workflow:\n\n1. **Exploration**: Uses `code_run` to search for libraries (e.g., `pip install mootdx`), writes test scripts, and debugs errors.\n2. **Verification**: Runs the script to ensure the task goal is met.\n3. **Crystallization**: The agent identifies the successful execution path and saves it as a **Skill** in the L3 memory layer.\n4. **Recall**: The next time a similar request is made, the agent retrieves the script from L3 and executes it directly, bypassing the exploration phase.\n\n## Developer Setup\n\n### Prerequisites\n- Python 3.9+\n- LLM API Key (Claude, Gemini, or OpenAI-compatible)\n\n### Installation\n1. Clone the repository:\n ```bash\n git clone https://github.com/lsdefine/GenericAgent.git\n cd GenericAgent\n ```\n2. Install core dependencies:\n ```bash\n pip install requests streamlit pywebview\n ```\n3. Configure credentials:\n Copy `mykey_template.py` to `mykey.py` and populate your API keys and preferred model settings.\n\n### Launching the Agent\nThe primary entry point is `launch.pyw`, which starts the default Streamlit-based desktop interface:\n```bash\npython launch.pyw\n```\n\n### Alternative Frontends\nGenericAgent supports multiple communication interfaces located in the `frontends/` directory:\n- **Telegram**: `python frontends/tgapp.py`\n- **WeChat**: `python frontends/wechatapp.py`\n- **Command Line/Qt**: `python frontends/qtapp.py`\n\n## Chat Commands\nAll frontends support standard control commands:\n- `/new`: Resets the current session and clears context.\n- `/continue`: Displays a list of recoverable session snapshots.\n- `/continue N`: Restores the session to the $N^{th}$ snapshot.","root-simphtml-py":"# Root — simphtml.py\n\n# Root — simphtml.py\n\nThe `simphtml.py` module provides a suite of tools for simplifying, optimizing, and monitoring HTML content. Its primary purpose is to transform complex web pages into a token-efficient format suitable for Large Language Model (LLM) processing while retaining essential semantic structure and interactive elements.\n\n## Core Functionality\n\nThe module operates through a combination of Python-based BeautifulSoup manipulation and injected JavaScript for browser-side analysis.\n\n### 1. Semantic HTML Optimization (`js_optHTML`)\nThe core of the simplification logic resides in the `js_optHTML` JavaScript string. When executed via a WebDriver, it performs an \"Enhanced DOM Copy\":\n* **Visibility Filtering:** It calculates the bounding box and computed styles for every node. Elements that are hidden, zero-area, or positioned far outside the viewport are pruned.\n* **Semantic Marking:** It categorizes nodes into types such as `K:main`, `K:secondary`, `K:topBar`, and `K:overlayParent`.\n* **Iframe & Shadow DOM Integration:** It flattens `IFRAME` content (if accessible) and `ShadowRoot` nodes into the main document tree for a unified view.\n* **Interactive Element Preservation:** It ensures that inputs, buttons, and menus are preserved even if they have minimal text content.\n* **Overlay Detection:** It identifies modals and dialogs, \"hoisting\" them to the top of the simplified tree to ensure the LLM focuses on the active UI layer.\n\n### 2. Token Optimization (`optimize_html_for_tokens`)\nThis Python function uses `BeautifulSoup` to further reduce the character count:\n* **Attribute Stripping:** Removes non-essential attributes (e.g., `data-v-xxx`, `style`, `onclick`) while keeping critical ones like `id`, `class`, `href`, and `role`.\n* **URL/Image Shortening:** Replaces long Base64 strings with `__img__` and long URLs with `__url__` or `__link__`.\n* **SVG Cleaning:** Clears the internal paths of SVG elements to save space while keeping the tag.\n\n### 3. List Detection and Cutting (`cutlist`)\nThe `js_findMainList` script identifies repeating patterns (e.g., search results, product grids). \n* **Scoring:** It scores containers based on child count, area ratio, and visual uniformity.\n* **Truncation:** When `cutlist=True` is passed to `get_html`, the module identifies these lists and keeps only the first few items (or items matching a specific instruction), replacing the rest with a `[FAKE ELEMENT]` hint. This significantly reduces token usage on data-heavy pages.\n\n## Key Functions\n\n### `get_html(driver, cutlist=False, maxchars=35000, ...)`\nThe primary entry point for retrieving a simplified version of the current page.\n\n1. Executes `js_optHTML` to get a semantically cleaned DOM.\n2. Runs `optimize_html_for_tokens` to strip metadata.\n3. If `cutlist` is enabled, identifies and prunes repetitive list items.\n4. If the resulting string exceeds `maxchars`, it invokes `smart_truncate`.\n\n### `smart_truncate(soup, budget)`\nA recursive algorithm that prunes the BeautifulSoup tree to fit a character budget:\n* **Single-Child Penetration:** If a node has only one child, it recurses deeper without cutting.\n* **Proportional Reduction:** If a node has multiple large children, it distributes the \"cut\" budget among the top 3 largest children proportionally.\n* **Tail Cutting:** If children are small, it removes them from the end of the list first.\n\n### `execute_js_rich(script, driver)`\nA high-level wrapper for executing JavaScript that provides context about the execution's impact:\n* **Transient Detection:** Captures temporary UI elements (like toast messages or alerts) that appear and disappear during execution.\n* **DOM Diffing:** Uses `find_changed_elements` to report exactly what changed in the DOM (e.g., \"DOM变化量: 5, 最显著变化: [HTML snippet]\").\n* **Tab Monitoring:** Detects if the script opened new tabs or caused a page reload.\n\n## Execution Flow: `get_html`\n\n```mermaid\ngraph TD\n A[Start get_html] --> B[Execute js_optHTML in Browser]\n B --> C{text_only?}\n C -- Yes --> D[Return Clean Text]\n C -- No --> E[Python: optimize_html_for_tokens]\n E --> F{cutlist enabled?}\n F -- Yes --> G[Execute js_findMainList & Prune Items]\n F -- No --> H[Check maxchars]\n G --> H\n H --> I{Over Budget?}\n I -- Yes --> J[smart_truncate Recursive Pruning]\n I -- No --> K[Return Final HTML]\n J --> K\n```\n\n## Monitoring Utilities\n\nThe module includes a \"Temporary Monitor\" (`start_temp_monitor`) designed to catch asynchronous UI changes:\n* **`start_temp_monitor(driver)`**: Injects a background interval that tracks all text nodes in the document.\n* **`get_temp_texts(driver)`**: Compares the current text nodes against the initial state and the \"seen\" set to identify messages that appeared briefly (e.g., \"Form submitted successfully\") and have since vanished.","root-tmwebdriver-py":"# Root — TMWebDriver.py\n\n# TMWebDriver\n\n`TMWebDriver` is a custom automation bridge that enables remote JavaScript execution within browser tabs. Unlike standard Selenium or Playwright drivers, it operates via a lightweight communication layer using WebSockets (WS) and HTTP long-polling, allowing it to control existing browser sessions or extension-managed tabs.\n\n## Core Architecture\n\nThe module functions as a dual-protocol server (WebSocket + HTTP) that maintains a registry of active browser \"Sessions.\" It can operate in two modes:\n1. **Master Mode**: Runs the WS and HTTP servers to communicate directly with browser clients.\n2. **Remote Mode**: Acts as a proxy, forwarding commands to an existing Master instance via an HTTP API.\n\n```mermaid\ngraph TD\n A[Python Script] -->|execute_js| B(TMWebDriver)\n B -->|WebSocket| C[Browser Tab / Extension]\n B -->|HTTP Long-poll| D[Browser Tab]\n C -->|Result| B\n D -->|Result| B\n B -->|Return| A\n```\n\n## Session Management\n\nThe `Session` class tracks the state of individual browser tabs.\n\n### Session Types\n- `ws`: A direct WebSocket connection from a specific web page.\n- `ext_ws`: A connection from a browser extension that may manage multiple tab IDs.\n- `http`: A connection maintained via the `/api/longpoll` endpoint.\n\n### Lifecycle Methods\n- `is_active()`: Determines if a session is still valid based on disconnection timestamps or HTTP timeouts (60s).\n- `mark_disconnected()`: Sets the `disconnect_at` timestamp.\n- `reconnect(client, info)`: Updates the session with a new transport client (WS object or HTTP Queue) while preserving the session ID.\n\n## Key Components\n\n### TMWebDriver Class\nThe main controller responsible for server orchestration and command routing.\n\n#### Server Initialization\n- `start_ws_server()`: Launches a `WebSocketServer` on the primary port. It handles `ready` (new tab), `ext_ready`/`tabs_update` (extension tabs), and `result` messages.\n- `start_http_server()`: Launches a Bottle-based server on `port + 1`.\n - `/api/longpoll`: Used by HTTP-based clients to receive commands.\n - `/api/result`: Used by HTTP-based clients to return execution results.\n - `/link`: A control API for remote instances to query sessions or trigger execution.\n\n#### JavaScript Execution Flow\nThe `execute_js` method is the primary interface for interacting with the browser:\n\n1. **Session Selection**: Uses the provided `session_id` or falls back to the `default_session_id`. If the target session is inactive, it attempts to failover to the latest active session.\n2. **Payload Dispatch**: Generates a unique `exec_id` (UUID) and wraps the code.\n - For `ws`/`ext_ws`: Sends via `ws_client.send_message`.\n - For `http`: Pushes to the session's `http_queue`.\n3. **Synchronization**: Enters a polling loop checking `self.results` for the `exec_id`.\n4. **ACK Handling**: If the client sends an `ack`, the timeout timer is reset, allowing long-running scripts to continue.\n5. **Cleanup**: Removes the result from the internal buffer and returns the data.\n\n## API Reference\n\n### `execute_js(code, timeout=15, session_id=None)`\nExecutes arbitrary JavaScript in the context of the specified session.\n- **Returns**: A dictionary containing `data` (the JS return value) and optionally `newTabs`.\n- **Raises**: `Exception` if the JS execution fails or `ValueError` if the session is unavailable.\n\n### `set_session(url_pattern)`\nSearches active sessions for a URL matching the pattern and sets it as the default for future commands.\n- **Returns**: The `session_id` if successful, else `None`.\n\n### `get_all_sessions()`\nReturns a list of metadata for all currently active sessions, including IDs, URLs, and titles.\n\n### `jump(url, timeout=10)`\nA convenience wrapper around `execute_js` that performs a `window.location.href` navigation.\n\n## Remote Execution Logic\nWhen `TMWebDriver` initializes, it checks if `port + 1` is already occupied. If so, it sets `self.is_remote = True`. \nIn this state:\n- Calls to `execute_js`, `get_all_sessions`, and `find_session` are serialized and forwarded via `_remote_cmd` to the Master instance.\n- This allows multiple Python processes to share control over the same browser tabs through a single Master coordinator.\n\n## Error Handling and Timeouts\n- **ACK vs Result**: The driver distinguishes between a script being received (`ack`) and a script finishing (`result`).\n- **Session Jump**: If a session reloads during execution (detected via `is_active` toggling), the driver returns a \"reloaded\" status to prevent hanging on a stale execution context.\n- **Cleanup**: `clean_sessions()` is called during execution to purge sessions that have been disconnected for more than 600 seconds.","root":"# Root\n\n# Root Module: GenericAgent Framework\n\nThe **Root** module contains the core logic for GenericAgent, a minimalist, self-evolving autonomous framework. It is designed to bootstrap from a small set of \"seed\" tools into a fully-featured assistant by managing its own dependencies and \"crystallizing\" successful task executions into reusable skills.\n\n## Core Architecture\n\nThe system is organized into four functional layers: Orchestration, Execution, Capabilities, and Configuration.\n\n### 1. Orchestration & Entry Points\nThese modules manage the application lifecycle and user interface.\n* [**agentmain.py**](agentmain.md): The central orchestration layer. It defines the `GeneraticAgent` class, which manages LLM sessions, task queuing, and global memory.\n* [**launch.pyw**](launch.md): The primary GUI entry point. It manages the lifecycle of the Streamlit backend and wraps the interface in a `pywebview` desktop window.\n* [**hub.pyw**](hub.md): A service manager used to discover, start, and monitor various agent services and frontends.\n* [**GETTING_STARTED.md**](GETTING_STARTED.md): Defines the bootstrapping process for transitioning from a minimal script to a full system.\n\n### 2. Execution Engine\nThe logic that drives the agent's decision-making process.\n* [**agent_loop.py**](agent_loop.md): Implements the \"Thought-Action-Observation\" cycle. It manages turn limits, context window optimization, and the interaction between the LLM and tool execution.\n* [**llmcore.py**](llmcore.md): A unified abstraction layer for multiple LLM providers (Anthropic, OpenAI, etc.). It handles conversation history and streaming responses through a standardized `Session` hierarchy.\n\n### 3. Tooling & Environment Control\nThe \"hands\" of the agent, allowing it to interact with the local system and the web.\n* [**ga.py**](ga.md): The primary tool implementation layer. It provides the `GenericAgentHandler` for file manipulation, code execution, and memory management.\n* [**TMWebDriver.py**](TMWebDriver.md): A custom automation bridge for remote JavaScript execution in browser tabs via WebSockets and HTTP.\n* [**simphtml.py**](simphtml.md): A utility for transforming complex HTML into token-efficient formats, ensuring the LLM can process web content effectively.\n\n### 4. Configuration & Metadata\n* [**mykey_template.py**](mykey_template.md) / [**mykey_template_en.py**](mykey_template_en.md): Blueprints for API credentials and session behaviors.\n* [**pyproject.toml**](pyproject.md): Defines the project manifest and tiered dependency structure.\n\n## System Workflow\n\nThe following diagram illustrates how a user request flows through the sub-modules to interact with the environment:\n\n```mermaid\ngraph TD\n User[User Input] --> Launch[launch.pyw / agentmain.py]\n Launch --> AgentMain[agentmain.py: GeneraticAgent]\n AgentMain --> LLMCore[llmcore.py: Session Management]\n AgentMain --> Loop[agent_loop.py: Execution Loop]\n \n Loop -->|Tool Call| GAH[ga.py: GenericAgentHandler]\n GAH -->|Code Execution| Local[Local System]\n GAH -->|Web Task| TMWD[TMWebDriver.py]\n TMWD -->|HTML Processing| Simp[simphtml.py]\n Simp -->|Clean Content| Loop\n \n Loop -->|Observation| AgentMain\n AgentMain -->|Response| User\n```\n\n## Key Integration Patterns\n\n* **Capability Crystallization:** When the agent successfully completes a task using `ga.py` tools (like `do_code_run`), it can write new scripts to its local environment, which are then discovered by `hub.pyw` or loaded as new tools in subsequent sessions.\n* **Context Management:** `agent_loop.py` works closely with `llmcore.py` to prune tool definitions and history every 10 turns, preventing the \"context bloat\" that typically degrades long-running autonomous tasks.\n* **Convention-over-Configuration:** `agentmain.py` dynamically scans `mykey.py` (created from templates) to automatically instantiate the correct `Session` classes from `llmcore.py` based on variable naming conventions."}; | |
| var TREE = [{"name":"Root","slug":"root","files":[],"children":[{"name":"Root — GETTING_STARTED.md","slug":"root-getting-started-md","files":["GETTING_STARTED.md"]},{"name":"Root — README.md","slug":"root-readme-md","files":["README.md"]},{"name":"Root — TMWebDriver.py","slug":"root-tmwebdriver-py","files":["TMWebDriver.py"]},{"name":"Root — agent_loop.py","slug":"root-agent-loop-py","files":["agent_loop.py"]},{"name":"Root — agentmain.py","slug":"root-agentmain-py","files":["agentmain.py"]},{"name":"Root — ga.py","slug":"root-ga-py","files":["ga.py"]},{"name":"Root — hub.pyw","slug":"root-hub-pyw","files":["hub.pyw"]},{"name":"Root — launch.pyw","slug":"root-launch-pyw","files":["launch.pyw"]},{"name":"Root — llmcore.py","slug":"root-llmcore-py","files":["llmcore.py"]},{"name":"Root — mykey_template.py","slug":"root-mykey-template-py","files":["mykey_template.py"]},{"name":"Root — mykey_template_en.py","slug":"root-mykey-template-en-py","files":["mykey_template_en.py"]},{"name":"Root — pyproject.toml","slug":"root-pyproject-toml","files":["pyproject.toml"]},{"name":"Root — simphtml.py","slug":"root-simphtml-py","files":["simphtml.py"]}]},{"name":"assets","slug":"assets","files":["assets/SETUP_FEISHU.md","assets/code_run_header.py","assets/global_mem_insight_template.txt","assets/global_mem_insight_template_en.txt","assets/insight_fixed_structure.txt","assets/insight_fixed_structure_en.txt","assets/install-macos-app.sh","assets/install_python_windows.bat","assets/sys_prompt.txt","assets/sys_prompt_en.txt","assets/tmwd_cdp_bridge/background.js","assets/tmwd_cdp_bridge/content.js","assets/tmwd_cdp_bridge/disable_dialogs.js","assets/tmwd_cdp_bridge/manifest.json","assets/tmwd_cdp_bridge/popup.html","assets/tmwd_cdp_bridge/popup.js","assets/tool_usable_history.json","assets/tools_schema.json","assets/tools_schema_cn.json"]},{"name":"frontends","slug":"frontends","files":[],"children":[{"name":"frontends — frontends","slug":"frontends-frontends","files":["frontends/DESKTOP_PET_README.md","frontends/chatapp_common.py","frontends/continue_cmd.py","frontends/dcapp.py","frontends/desktop_pet.pyw","frontends/desktop_pet_v2.pyw","frontends/dingtalkapp.py","frontends/fsapp.py","frontends/qqapp.py","frontends/qtapp.py","frontends/stapp.py","frontends/stapp2.py","frontends/tgapp.py","frontends/wechatapp.py","frontends/wecomapp.py"]},{"name":"frontends — skins","slug":"frontends-skins","files":["frontends/skins/boy/skin.json","frontends/skins/dinosaur/skin.json","frontends/skins/doux/skin.json","frontends/skins/glube/skin.json","frontends/skins/line/License.txt","frontends/skins/line/skin.json","frontends/skins/mort/skin.json","frontends/skins/tard/skin.json","frontends/skins/vita/skin.json"]}]},{"name":"graphify-out","slug":"graphify-out","files":["graphify-out/GRAPH_REPORT.md","graphify-out/manifest.json"]},{"name":"memory","slug":"memory","files":["memory/L4_raw_sessions/compress_session.py","memory/adb_ui.py","memory/autonomous_operation_sop.md","memory/autonomous_operation_sop/helper.py","memory/autonomous_operation_sop/task_planning.md","memory/keychain.py","memory/ljqCtrl.py","memory/ljqCtrl_sop.md","memory/memory_cleanup_sop.md","memory/memory_management_sop.md","memory/ocr_utils.py","memory/plan_sop.md","memory/procmem_scanner.py","memory/procmem_scanner_sop.md","memory/scheduled_task_sop.md","memory/skill_search/SKILL.md","memory/skill_search/skill_search/__init__.py","memory/skill_search/skill_search/__main__.py","memory/skill_search/skill_search/engine.py","memory/supervisor_sop.md","memory/tmwebdriver_sop.md","memory/ui_detect.py","memory/vision_api.template.py","memory/vision_sop.md","memory/web_setup_sop.md"]},{"name":"plugins","slug":"plugins","files":["plugins/langfuse_tracing.py"]},{"name":"reflect","slug":"reflect","files":["reflect/autonomous.py","reflect/scheduler.py"]}]; | |
| var META = {"fromCommit":"1140e22c3c9871904283469256cbab231fb405fb","generatedAt":"2026-05-05T01:25:57.546Z","model":"gemini-3-flash-preview","moduleFiles":{"Root":["GETTING_STARTED.md","README.md","TMWebDriver.py","agent_loop.py","agentmain.py","ga.py","hub.pyw","launch.pyw","llmcore.py","mykey_template.py","mykey_template_en.py","pyproject.toml","simphtml.py"],"Root — GETTING_STARTED.md":["GETTING_STARTED.md"],"Root — README.md":["README.md"],"Root — TMWebDriver.py":["TMWebDriver.py"],"Root — agent_loop.py":["agent_loop.py"],"Root — agentmain.py":["agentmain.py"],"Root — ga.py":["ga.py"],"Root — hub.pyw":["hub.pyw"],"Root — launch.pyw":["launch.pyw"],"Root — llmcore.py":["llmcore.py"],"Root — mykey_template.py":["mykey_template.py"],"Root — mykey_template_en.py":["mykey_template_en.py"],"Root — pyproject.toml":["pyproject.toml"],"Root — simphtml.py":["simphtml.py"],"assets":["assets/SETUP_FEISHU.md","assets/code_run_header.py","assets/global_mem_insight_template.txt","assets/global_mem_insight_template_en.txt","assets/insight_fixed_structure.txt","assets/insight_fixed_structure_en.txt","assets/install-macos-app.sh","assets/install_python_windows.bat","assets/sys_prompt.txt","assets/sys_prompt_en.txt","assets/tmwd_cdp_bridge/background.js","assets/tmwd_cdp_bridge/content.js","assets/tmwd_cdp_bridge/disable_dialogs.js","assets/tmwd_cdp_bridge/manifest.json","assets/tmwd_cdp_bridge/popup.html","assets/tmwd_cdp_bridge/popup.js","assets/tool_usable_history.json","assets/tools_schema.json","assets/tools_schema_cn.json"],"frontends":["frontends/DESKTOP_PET_README.md","frontends/chatapp_common.py","frontends/continue_cmd.py","frontends/dcapp.py","frontends/desktop_pet.pyw","frontends/desktop_pet_v2.pyw","frontends/dingtalkapp.py","frontends/fsapp.py","frontends/qqapp.py","frontends/qtapp.py","frontends/stapp.py","frontends/stapp2.py","frontends/tgapp.py","frontends/wechatapp.py","frontends/wecomapp.py","frontends/skins/boy/skin.json","frontends/skins/dinosaur/skin.json","frontends/skins/doux/skin.json","frontends/skins/glube/skin.json","frontends/skins/line/License.txt","frontends/skins/line/skin.json","frontends/skins/mort/skin.json","frontends/skins/tard/skin.json","frontends/skins/vita/skin.json"],"frontends — frontends":["frontends/DESKTOP_PET_README.md","frontends/chatapp_common.py","frontends/continue_cmd.py","frontends/dcapp.py","frontends/desktop_pet.pyw","frontends/desktop_pet_v2.pyw","frontends/dingtalkapp.py","frontends/fsapp.py","frontends/qqapp.py","frontends/qtapp.py","frontends/stapp.py","frontends/stapp2.py","frontends/tgapp.py","frontends/wechatapp.py","frontends/wecomapp.py"],"frontends — skins":["frontends/skins/boy/skin.json","frontends/skins/dinosaur/skin.json","frontends/skins/doux/skin.json","frontends/skins/glube/skin.json","frontends/skins/line/License.txt","frontends/skins/line/skin.json","frontends/skins/mort/skin.json","frontends/skins/tard/skin.json","frontends/skins/vita/skin.json"],"graphify-out":["graphify-out/GRAPH_REPORT.md","graphify-out/manifest.json"],"memory":["memory/L4_raw_sessions/compress_session.py","memory/adb_ui.py","memory/autonomous_operation_sop.md","memory/autonomous_operation_sop/helper.py","memory/autonomous_operation_sop/task_planning.md","memory/keychain.py","memory/ljqCtrl.py","memory/ljqCtrl_sop.md","memory/memory_cleanup_sop.md","memory/memory_management_sop.md","memory/ocr_utils.py","memory/plan_sop.md","memory/procmem_scanner.py","memory/procmem_scanner_sop.md","memory/scheduled_task_sop.md","memory/skill_search/SKILL.md","memory/skill_search/skill_search/__init__.py","memory/skill_search/skill_search/__main__.py","memory/skill_search/skill_search/engine.py","memory/supervisor_sop.md","memory/tmwebdriver_sop.md","memory/ui_detect.py","memory/vision_api.template.py","memory/vision_sop.md","memory/web_setup_sop.md"],"plugins":["plugins/langfuse_tracing.py"],"reflect":["reflect/autonomous.py","reflect/scheduler.py"]},"moduleTree":[{"name":"Root","slug":"root","files":[],"children":[{"name":"Root — GETTING_STARTED.md","slug":"root-getting-started-md","files":["GETTING_STARTED.md"]},{"name":"Root — README.md","slug":"root-readme-md","files":["README.md"]},{"name":"Root — TMWebDriver.py","slug":"root-tmwebdriver-py","files":["TMWebDriver.py"]},{"name":"Root — agent_loop.py","slug":"root-agent-loop-py","files":["agent_loop.py"]},{"name":"Root — agentmain.py","slug":"root-agentmain-py","files":["agentmain.py"]},{"name":"Root — ga.py","slug":"root-ga-py","files":["ga.py"]},{"name":"Root — hub.pyw","slug":"root-hub-pyw","files":["hub.pyw"]},{"name":"Root — launch.pyw","slug":"root-launch-pyw","files":["launch.pyw"]},{"name":"Root — llmcore.py","slug":"root-llmcore-py","files":["llmcore.py"]},{"name":"Root — mykey_template.py","slug":"root-mykey-template-py","files":["mykey_template.py"]},{"name":"Root — mykey_template_en.py","slug":"root-mykey-template-en-py","files":["mykey_template_en.py"]},{"name":"Root — pyproject.toml","slug":"root-pyproject-toml","files":["pyproject.toml"]},{"name":"Root — simphtml.py","slug":"root-simphtml-py","files":["simphtml.py"]}]},{"name":"assets","slug":"assets","files":["assets/SETUP_FEISHU.md","assets/code_run_header.py","assets/global_mem_insight_template.txt","assets/global_mem_insight_template_en.txt","assets/insight_fixed_structure.txt","assets/insight_fixed_structure_en.txt","assets/install-macos-app.sh","assets/install_python_windows.bat","assets/sys_prompt.txt","assets/sys_prompt_en.txt","assets/tmwd_cdp_bridge/background.js","assets/tmwd_cdp_bridge/content.js","assets/tmwd_cdp_bridge/disable_dialogs.js","assets/tmwd_cdp_bridge/manifest.json","assets/tmwd_cdp_bridge/popup.html","assets/tmwd_cdp_bridge/popup.js","assets/tool_usable_history.json","assets/tools_schema.json","assets/tools_schema_cn.json"]},{"name":"frontends","slug":"frontends","files":[],"children":[{"name":"frontends — frontends","slug":"frontends-frontends","files":["frontends/DESKTOP_PET_README.md","frontends/chatapp_common.py","frontends/continue_cmd.py","frontends/dcapp.py","frontends/desktop_pet.pyw","frontends/desktop_pet_v2.pyw","frontends/dingtalkapp.py","frontends/fsapp.py","frontends/qqapp.py","frontends/qtapp.py","frontends/stapp.py","frontends/stapp2.py","frontends/tgapp.py","frontends/wechatapp.py","frontends/wecomapp.py"]},{"name":"frontends — skins","slug":"frontends-skins","files":["frontends/skins/boy/skin.json","frontends/skins/dinosaur/skin.json","frontends/skins/doux/skin.json","frontends/skins/glube/skin.json","frontends/skins/line/License.txt","frontends/skins/line/skin.json","frontends/skins/mort/skin.json","frontends/skins/tard/skin.json","frontends/skins/vita/skin.json"]}]},{"name":"graphify-out","slug":"graphify-out","files":["graphify-out/GRAPH_REPORT.md","graphify-out/manifest.json"]},{"name":"memory","slug":"memory","files":["memory/L4_raw_sessions/compress_session.py","memory/adb_ui.py","memory/autonomous_operation_sop.md","memory/autonomous_operation_sop/helper.py","memory/autonomous_operation_sop/task_planning.md","memory/keychain.py","memory/ljqCtrl.py","memory/ljqCtrl_sop.md","memory/memory_cleanup_sop.md","memory/memory_management_sop.md","memory/ocr_utils.py","memory/plan_sop.md","memory/procmem_scanner.py","memory/procmem_scanner_sop.md","memory/scheduled_task_sop.md","memory/skill_search/SKILL.md","memory/skill_search/skill_search/__init__.py","memory/skill_search/skill_search/__main__.py","memory/skill_search/skill_search/engine.py","memory/supervisor_sop.md","memory/tmwebdriver_sop.md","memory/ui_detect.py","memory/vision_api.template.py","memory/vision_sop.md","memory/web_setup_sop.md"]},{"name":"plugins","slug":"plugins","files":["plugins/langfuse_tracing.py"]},{"name":"reflect","slug":"reflect","files":["reflect/autonomous.py","reflect/scheduler.py"]}]}; | |
| (function() { | |
| var activePage = 'overview'; | |
| document.addEventListener('DOMContentLoaded', function() { | |
| mermaid.initialize({ startOnLoad: false, theme: 'neutral', securityLevel: 'loose' }); | |
| renderMeta(); | |
| renderNav(); | |
| document.getElementById('menu-toggle').addEventListener('click', function() { | |
| document.getElementById('sidebar').classList.toggle('open'); | |
| }); | |
| if (location.hash && location.hash.length > 1) { | |
| activePage = decodeURIComponent(location.hash.slice(1)); | |
| } | |
| navigateTo(activePage); | |
| }); | |
| function renderMeta() { | |
| if (!META) return; | |
| var el = document.getElementById('meta-info'); | |
| var parts = []; | |
| if (META.generatedAt) { | |
| parts.push(new Date(META.generatedAt).toLocaleDateString()); | |
| } | |
| if (META.model) parts.push(META.model); | |
| if (META.fromCommit) parts.push(META.fromCommit.slice(0, 8)); | |
| el.textContent = parts.join(' \u00b7 '); | |
| } | |
| function renderNav() { | |
| var container = document.getElementById('nav-tree'); | |
| var html = '<div class="nav-section">'; | |
| html += '<a class="nav-item overview" data-page="overview" href="#overview">Overview</a>'; | |
| html += '</div>'; | |
| if (TREE.length > 0) { | |
| html += '<div class="nav-group-label">Modules</div>'; | |
| html += buildNavTree(TREE); | |
| } | |
| container.innerHTML = html; | |
| container.addEventListener('click', function(e) { | |
| var target = e.target; | |
| while (target && !target.dataset.page) { target = target.parentElement; } | |
| if (target && target.dataset.page) { | |
| e.preventDefault(); | |
| navigateTo(target.dataset.page); | |
| } | |
| }); | |
| } | |
| function buildNavTree(nodes) { | |
| var html = ''; | |
| for (var i = 0; i < nodes.length; i++) { | |
| var node = nodes[i]; | |
| html += '<div class="nav-section">'; | |
| html += '<a class="nav-item" data-page="' + escH(node.slug) + '" href="#' + encodeURIComponent(node.slug) + '">' + escH(node.name) + '</a>'; | |
| if (node.children && node.children.length > 0) { | |
| html += '<div class="nav-children">' + buildNavTree(node.children) + '</div>'; | |
| } | |
| html += '</div>'; | |
| } | |
| return html; | |
| } | |
| function escH(s) { | |
| var d = document.createElement('div'); | |
| d.textContent = s; | |
| return d.innerHTML; | |
| } | |
| function navigateTo(page) { | |
| activePage = page; | |
| location.hash = encodeURIComponent(page); | |
| var items = document.querySelectorAll('.nav-item'); | |
| for (var i = 0; i < items.length; i++) { | |
| if (items[i].dataset.page === page) { | |
| items[i].classList.add('active'); | |
| } else { | |
| items[i].classList.remove('active'); | |
| } | |
| } | |
| var contentEl = document.getElementById('content'); | |
| var md = PAGES[page]; | |
| if (!md) { | |
| contentEl.innerHTML = '<div class="empty-state"><h2>Page not found</h2><p>' + escH(page) + '.md does not exist.</p></div>'; | |
| return; | |
| } | |
| contentEl.innerHTML = marked.parse(md); | |
| // Rewrite .md links to hash navigation | |
| var links = contentEl.querySelectorAll('a[href]'); | |
| for (var i = 0; i < links.length; i++) { | |
| var href = links[i].getAttribute('href'); | |
| if (href && href.endsWith('.md') && href.indexOf('://') === -1) { | |
| var slug = href.replace(/\.md$/, ''); | |
| links[i].setAttribute('href', '#' + encodeURIComponent(slug)); | |
| (function(s) { | |
| links[i].addEventListener('click', function(e) { | |
| e.preventDefault(); | |
| navigateTo(s); | |
| }); | |
| })(slug); | |
| } | |
| } | |
| // Convert mermaid code blocks into mermaid divs | |
| var mermaidBlocks = contentEl.querySelectorAll('pre code.language-mermaid'); | |
| for (var i = 0; i < mermaidBlocks.length; i++) { | |
| var pre = mermaidBlocks[i].parentElement; | |
| var div = document.createElement('div'); | |
| div.className = 'mermaid'; | |
| div.textContent = mermaidBlocks[i].textContent; | |
| pre.parentNode.replaceChild(div, pre); | |
| } | |
| try { mermaid.run({ querySelector: '.mermaid' }); } catch(e) {} | |
| window.scrollTo(0, 0); | |
| document.getElementById('sidebar').classList.remove('open'); | |
| } | |
| })(); | |
| </script> | |
| </body> | |
| </html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment