From 3dceb30ce96f80a7ea8ef38774a2b4f5c4268a36 Mon Sep 17 00:00:00 2001 From: "glm-5.1" Date: Tue, 21 Apr 2026 12:41:14 +0000 Subject: [PATCH] Review cleanup: fix stale tool references, update docs, add README - Remove unused src/context/notify.ts (never wired up) - Fix format.ts/search.ts: update memory_messages references to router pattern - Update AGENTS.md: reflect current state, add recommended consumer additions - Update docs/architecture.md: match router pattern, remove stale phases - Add README.md: problem/solution, install, tools, agent guidance --- AGENTS.md | 69 ++++++++++++----- README.md | 120 +++++++++++++++++++++++++++++ docs/architecture.md | 173 +++++++++++++++++++++--------------------- src/context/notify.ts | 26 ------- src/history/format.ts | 4 +- src/history/search.ts | 4 +- 6 files changed, 262 insertions(+), 134 deletions(-) create mode 100644 README.md delete mode 100644 src/context/notify.ts diff --git a/AGENTS.md b/AGENTS.md index a1778fa..5ffca46 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -38,13 +38,12 @@ bun run test # bun test ``` src/ ├── index.ts # Plugin entry: hooks + tool registration -├── tools.ts # Tool definitions (memory_*) +├── tools.ts # Tool definitions (memory router + memory_compact) ├── context/ │ ├── tracker.ts # SSE token tracking (per-session context usage) -│ ├── thresholds.ts # Threshold constants + ContextStatus type (single source of truth) -│ └── notify.ts # Context notification formatting +│ └── thresholds.ts # Threshold constants + ContextStatus type (single source of truth) ├── history/ -│ ├── queries.ts # bun:sqlite read-only query helper (all DB access goes here) +│ ├── queries.ts # bun:sqlite read-only query helper (lazy singleton) │ ├── format.ts # Markdown rendering for session/message output │ └── search.ts # LIKE-based full-text search across conversations └── compaction/ @@ -77,7 +76,7 @@ The `memory` tool dispatches to internal handlers by `tool` name, keeping the ag | sessions | List recent sessions, optionally filtered by project | limit, projectPath | | messages | Read messages from a specific session | sessionId, limit | | search | Text search across all conversations (LIKE-based) | query, limit | -| compactions | List/read compaction checkpoints for a session | sessionId, read | +| compactions | List/read compaction checkpoints for a session | sessionId, read (1-based index) | | context | Current context window usage (% , tokens, model, status) | — | | plans | List or read saved plan files | read (filename) | @@ -116,12 +115,14 @@ When compaction occurs, OpenCode creates: 2. `message.data.summary = {diffs: [...]}` on the compaction message 3. The assistant message immediately after contains the actual summary text in a `text`-type part -The `memory_compactions` tool queries for `compaction`-type parts and retrieves the adjacent summary text, presenting them as navigable checkpoints. +The `compactions` operation queries for `compaction`-type parts and retrieves the adjacent summary text, presenting them as navigable checkpoints. ### Write Operations All write operations (compaction triggering) go through the OpenCode client SDK (`ctx.client.session.summarize`). The plugin never writes to the database or any OpenCode files. +**`memory_compact` must NOT await `ctx.client.session.summarize()`** — it returns immediately and schedules via `setTimeout(() => { ... }, 0)` because compaction cannot start until the tool returns control to the event loop. + ## Key Conventions - No comments unless requested @@ -140,25 +141,51 @@ All write operations (compaction triggering) go through the OpenCode client SDK - Both implement `experimental.session.compacting` — open-memory's version is more detailed - Can be used together or independently -## Implementation Phases +## Recommended AGENTS.md Additions for Consumers -### Phase 1: Foundation (current) -- Plugin scaffolding, build, basic hooks -- Compaction prompt, context tool, history tools -- `bun:sqlite` read-only queries +When using this plugin in an OpenCode project, consider adding these lines to your project's `AGENTS.md` so that agents know about and can effectively use the memory tools: -### Phase 2: Context Awareness -- SSE token tracker (implemented, needs field testing) -- Proactive system prompt injection -- `memory_compact` tool +```markdown +## Memory Tools (via @alkdev/open-memory plugin) -### Phase 3: History Browser Enhancements -- FTS5 virtual table support (if available) -- Better search (stemming, ranking) +You have access to two tools for managing your context and accessing session history: + +### memory({tool: "...", args: {...}}) + +Read-only tool for introspecting your session history and context state. Available operations: +- `memory({tool: "help"})` — full reference with examples +- `memory({tool: "summary"})` — quick counts of projects, sessions, messages, todos +- `memory({tool: "sessions"})` — list recent sessions (useful for finding past work) +- `memory({tool: "messages", args: {sessionId: "..."}})` — read a session's conversation +- `memory({tool: "search", args: {query: "..."}})` — search across all conversations +- `memory({tool: "compactions", args: {sessionId: "..."}})` — view compaction checkpoints +- `memory({tool: "context"})` — check your current context window usage + +### memory_compact() + +Trigger compaction on the current session. This summarizes the conversation so far to free context space. + +**When to use memory_compact:** +- When context is above 80% (check with `memory({tool: "context"})`) +- When you notice you're losing track of earlier conversation details +- At natural breakpoints in multi-step tasks (after completing a subtask, before starting a new one) +- When the system prompt shows a yellow/red/critical context warning +- Proactively, rather than waiting for automatic compaction at 92% + +**When NOT to use memory_compact:** +- When context is below 50% (it wastes a compaction cycle) +- In the middle of a complex edit that you need immediate context for +- When the task is nearly complete (just finish the task instead) + +Compaction preserves your most important context in a structured summary — you will continue the session with the summary as your starting point. +``` + +## Roadmap + +### Future improvements +- FTS5 virtual table support for better search (stemming, ranking) +- Configurable thresholds via plugin config - Session comparison tools - -### Phase 4: Polish -- Configurable thresholds - Export/import helpers - Integration tests diff --git a/README.md b/README.md new file mode 100644 index 0000000..b415990 --- /dev/null +++ b/README.md @@ -0,0 +1,120 @@ +# @alkdev/open-memory + +An [OpenCode](https://opencode.ai) plugin that gives agents access to their own session history, context window awareness, and compaction control. + +## Why + +OpenCode agents have three problems this plugin solves: + +1. **Context blindness** — agents don't know how much of their context window is used until they hit the wall at ~92% and automatic compaction fires with no warning +2. **No history access** — agents can't look back at previous sessions, search past conversations, or read compaction checkpoints — the data exists in SQLite but there's no tool interface for it +3. **Disorienting compaction** — the default compaction prompt says "summarize for another agent" when it's the same agent continuing, losing task context at an unpredictable point + +Open-memory fixes all three: agents get real-time context awareness injected into their system prompt, read-only tools for browsing session history, and control over *when* compaction happens with a prompt that preserves self-continuity. + +## Install + +```bash +bun add @alkdev/open-memory +``` + +Add to your `opencode.json`: + +```json +{ + "plugin": ["@alkdev/open-memory"] +} +``` + +## Tools + +The plugin exposes exactly 2 tools to keep context bloat minimal: + +### `memory` + +Read-only router for all introspection operations. Call with `{tool: "", args: {...}}`. + +| Operation | Description | +|-----------|-------------| +| `memory({tool: "help"})` | Full reference with examples | +| `memory({tool: "summary"})` | Quick counts: projects, sessions, messages, todos | +| `memory({tool: "sessions"})` | List recent sessions (filterable by project) | +| `memory({tool: "messages", args: {sessionId: "..."}})` | Read a session's conversation | +| `memory({tool: "search", args: {query: "..."}})` | Search across all conversations | +| `memory({tool: "compactions", args: {sessionId: "..."}})` | View compaction checkpoints | +| `memory({tool: "context"})` | Current context window usage | +| `memory({tool: "plans"})` | List or read saved plan files | + +### `memory_compact` + +Trigger compaction on the current session. Summarizes the conversation to free context space. Use when context is getting high (80%+) to control *when* compaction happens, rather than waiting for automatic compaction at 92%. + +The compaction prompt is rewritten to emphasize self-continuity — the agent summarizes for itself, not "for another agent" — using a structured template (Goal, Instructions, Discoveries, Accomplished, Relevant files, Notes). + +## Context Awareness + +The plugin injects context status into the agent's system prompt: + +- **Green** (<70%): Healthy +- **Yellow** (70-85%): Advises considering compaction +- **Red** (85-92%): Strongly recommends compacting at next break +- **Critical** (>92%): Imminent automatic compaction + +The agent always knows its context state without having to call a tool. + +## Recommended AGENTS.md Additions + +For agents to effectively use these tools, add guidance to your project's `AGENTS.md`: + +```markdown +## Memory Tools (via @alkdev/open-memory plugin) + +You have access to two tools for managing your context and accessing session history: + +### memory({tool: "...", args: {...}}) + +Read-only tool for introspecting your session history and context state. Available operations: +- `memory({tool: "help"})` — full reference with examples +- `memory({tool: "summary"})` — quick counts of projects, sessions, messages, todos +- `memory({tool: "sessions"})` — list recent sessions (useful for finding past work) +- `memory({tool: "messages", args: {sessionId: "..."}})` — read a session's conversation +- `memory({tool: "search", args: {query: "..."}})` — search across all conversations +- `memory({tool: "compactions", args: {sessionId: "..."}})` — view compaction checkpoints +- `memory({tool: "context"})` — check your current context window usage + +### memory_compact() + +Trigger compaction on the current session. This summarizes the conversation so far to free context space. + +**When to use memory_compact:** +- When context is above 80% (check with `memory({tool: "context"})`) +- When you notice you're losing track of earlier conversation details +- At natural breakpoints in multi-step tasks (after completing a subtask, before starting a new one) +- When the system prompt shows a yellow/red/critical context warning +- Proactively, rather than waiting for automatic compaction at 92% + +**When NOT to use memory_compact:** +- When context is below 50% (it wastes a compaction cycle) +- In the middle of a complex edit that you need immediate context for +- When the task is nearly complete (just finish the task instead) + +Compaction preserves your most important context in a structured summary — you will continue the session with the summary as your starting point. +``` + +## Development + +```bash +bun install +bun run build # bun build + tsc declarations +bun run typecheck # tsc --noEmit +bun run lint # biome check +bun run test # bun test (16 tests) +``` + +## Architecture + +See [`docs/architecture.md`](docs/architecture.md) for detailed design decisions and technical reference. + +## License + +Apache-2.0 \ No newline at end of file diff --git a/docs/architecture.md b/docs/architecture.md index 82452eb..ade40ca 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -31,86 +31,104 @@ The core problem: OpenCode's automatic compaction fires at ~92% context usage wi ## Architecture +### Tool Design: Router Pattern + +The plugin exposes exactly 2 tools to the agent: + +| Tool | Type | Purpose | +|------|------|---------| +| `memory` | Read-only router | Dispatches to 8 internal operations by `{tool: "name", args: {...}}` | +| `memory_compact` | Mutation | Triggers compaction via `ctx.client.session.summarize()` | + +**Why a router?** OpenCode has ~13.5k token baseline context bloat with just "hello world". Each tool definition adds its JSON schema to the system prompt. 8 separate tools = 8 schemas consuming context. By collapsing into a router, the agent sees only 2 tool definitions instead of 8, dramatically reducing context overhead. + +This pattern is inspired by toolEnv's `/call` registry approach and is applicable to other plugins that expose many operations. + ### Three Pillars #### 1. Context Awareness -**SSE-based token tracking** (same pattern as `open-coordinator`'s detection system): +**SSE-based token tracking:** -- Subscribe to `ctx.client.global.event()` SSE stream -- Track `tokens.input` from `message.updated` events per session +- Subscribe to `message.updated` events via the `event` plugin hook +- Track `tokens.input` from assistant messages per session - The `tokens.input` on the latest assistant message = current context size - Compare against model's `limit.context` to compute percentage used -- Model limits available from `ctx.client.config.get()` or provider info +- Model limits available from `ctx.client.config.get()` -**Thresholds:** +**Thresholds** (defined in `src/context/thresholds.ts` as the single source of truth): - **Green** (<70%): Healthy, no action needed - **Yellow** (70-85%): Consider compacting at next break point - **Red** (85-92%): Strongly recommend compacting now - **Critical** (>92%): Imminent automatic compaction **Proactive notification:** -- Use `experimental.chat.system.transform` hook to inject context percentage into system prompt +- `experimental.chat.system.transform` hook injects context percentage into system prompt - Agent always knows its context status without calling a tool -- At yellow/red thresholds, inject an explicit advisory note - -**Tool: `memory_context`** -- Returns current token usage, model context limit, percentage, and status -- Includes trend (growing fast vs. stable) -- Lists model info +- At yellow/red thresholds, injects an explicit advisory note #### 2. Compaction Management **`memory_compact` tool:** - Calls `ctx.client.session.summarize()` to trigger compaction on the current session -- Requires `providerID` and `modelID` — obtained from the session's last user message or config +- Requires `providerID` and `modelID` — obtained from the session's last user message or context tracker +- **Must NOT await `summarize()`** — returns immediately, schedules via `setTimeout(0)` because compaction can't start until the tool returns control to the event loop +- Refuses to compact if context is below 50% (wastes a compaction cycle) - This gives the agent explicit control over *when* compaction happens **`experimental.session.compacting` hook:** - Replaces the default "summarize for another agent" prompt - Better prompt emphasizes self-continuity, preserving task context, decisions, and next steps - -**Default instructions in system prompt:** -- "When context exceeds 85%, use `memory_compact` at your next natural break point" -- "At 90%+, compact immediately if possible" +- Uses structured template: Goal, Instructions, Discoveries, Accomplished, Relevant files, Notes #### 3. Session History Browser All backed by read-only `bun:sqlite` queries to `${XDG_DATA_HOME:-$HOME/.local/share}/opencode/opencode.db`. -**Tools:** +**Operations** (all accessed via the `memory` router): -| Tool | Purpose | -|------|---------| -| `memory_compactions` | List/read compaction checkpoints for a session | -| `memory_summary` | Quick counts: projects, sessions, messages, todos | -| `memory_sessions` | List recent sessions with metadata, sorted by update time | -| `memory_messages` | Read messages from a specific session as markdown | -| `memory_search` | Full-text search across all conversations (LIKE-based) | -| `memory_plans` | List and read saved plans | +| Operation | Purpose | Key args | +|-----------|---------|----------| +| help | Show available operations | tool (optional, for details on one) | +| summary | Quick counts: projects, sessions, messages, todos | — | +| sessions | List recent sessions with metadata | limit, projectPath | +| messages | Read messages from a session as markdown | sessionId, limit | +| search | Text search across all conversations (LIKE-based) | query, limit | +| compactions | List/read compaction checkpoints for a session | sessionId, read (1-based index) | +| context | Current context window usage | — | +| plans | List and read saved plans | read (filename) | **Rendering:** - Markdown tables for session lists -- Formatted conversation transcripts for `memory_messages` +- Formatted conversation transcripts for `messages` - Snippet + session reference for search results +- Compaction checkpoints as navigable indices with summary previews - All queries use `LIMIT` and parameterized `db.prepare().all(params)` +### Compaction Data in DB + +When compaction occurs, OpenCode creates: +1. A synthetic `user` message with a `compaction`-type part (`part.data = {type: "compaction", auto: true/false, overflow: true/false}`) +2. `message.data.summary = {diffs: [...]}` on the compaction message +3. The assistant message immediately after contains the actual summary text in a `text`-type part + +The `compactions` operation queries for `compaction`-type parts and retrieves the adjacent summary text, presenting them as navigable checkpoints. This is a stepping stone toward agents having their own UI with HUD + last N messages + tools for long-term memories. + ## Component Design ``` src/ ├── index.ts # Plugin entry: hooks + tool registration -├── tools.ts # Tool definitions (memory_*) +├── tools.ts # 2 tools: memory router + memory_compact (with setTimeout fix) ├── context/ -│ ├── tracker.ts # SSE token tracking (per-session) -│ ├── thresholds.ts # Context percentage thresholds & status -│ └── notify.ts # System prompt injection for warnings +│ ├── tracker.ts # SSE token tracking (per-session context usage) +│ └── thresholds.ts # Threshold constants + ContextStatus type (single source of truth) ├── history/ -│ ├── queries.ts # bun:sqlite read-only query helper -│ ├── format.ts # Markdown rendering utilities -│ └── search.ts # Full-text search logic +│ ├── queries.ts # bun:sqlite read-only query helper (lazy singleton) +│ ├── format.ts # Markdown rendering for session/message output +│ └── search.ts # LIKE-based full-text search across conversations └── compaction/ - └── prompt.ts # Better compaction prompt template + └── prompt.ts # Compaction prompt template (self-continuity, not "for another agent") ``` ## Key Technical Details @@ -119,16 +137,13 @@ src/ From `overflow.ts` in OpenCode source: ```typescript -// The actual check is: -// count >= usable -// where: -// count = tokens.total || (input + output + cache.read + cache.write) -// reserved = config.compaction?.reserved ?? min(20000, maxOutputTokens) -// usable = model.limit.input ? model.limit.input - reserved -// : model.limit.context - maxOutputTokens +count = tokens.total || (input + output + cache.read + cache.write) +reserved = config.compaction?.reserved ?? min(20000, maxOutputTokens) +usable = model.limit.input ? model.limit.input - reserved + : model.limit.context - maxOutputTokens ``` -The `tokens.input` field on the last assistant message represents the context size at the time that message was sent. We track this and compare it against the model's context limit (from config/providers). +The `tokens.input` field on the last assistant message represents the context size at the time that message was sent. We track this and compare it against the model's context limit (from config/providers), falling back to 200k. ### Session Summarize API @@ -140,69 +155,57 @@ ctx.client.session.summarize({ }) ``` -This triggers the compaction flow in OpenCode's server. +This triggers the compaction flow in OpenCode's server. **Must not be awaited** — see the `memory_compact` deadlock note above. -### Plugin Hook: `experimental.session.compacting` +### Plugin Hooks +**`experimental.session.compacting`:** ```typescript -"experimental.session.compacting": async (input, output) => { - // output.context: string[] — appended to default prompt - // output.prompt?: string — replaces default prompt entirely - output.prompt = `You are compacting your own session...`; +async (input, output) => { + output.prompt = getCompactionPrompt(); // replaces default entirely } ``` -### Plugin Hook: `experimental.chat.system.transform` - +**`experimental.chat.system.transform`:** ```typescript -"experimental.chat.system.transform": async (input, output) => { - // Can append strings to the system prompt - const contextInfo = getContextInfo(input.sessionID); - if (contextInfo) { - output.system.push(`Context: ${contextInfo.percentage}% used (${contextInfo.status})`); +async (input, output) => { + const info = contextTracker.getContextInfo(input.sessionID); + if (info) { + output.system.push(`🟢 Context: ${info.percentage}% used (...)`); } } ``` +**`event`:** +```typescript +async ({ event }) => { + contextTracker.handleEvent(event); +} +``` + ## Relationship to `open-coordinator` - **Open-coordinator** handles worktree orchestration, session spawning, bidirectional communication - **Open-memory** handles session introspection, context awareness, history browsing - Both use SSE event streams but for different purposes - Both can be used together — coordinator for multi-session workflows, memory for context management -- The `experimental.session.compacting` hook in coordinator has a good prompt already; open-memory will provide an enhanced version that includes task context awareness +- Both implement `experimental.session.compacting` — open-memory's version is more detailed +- The router pattern (2 tools instead of many) was first applied here and can be applied to open-coordinator + +## Future Work + +- FTS5 virtual table support for better search (stemming, ranking) +- Configurable thresholds via plugin config +- Session comparison tools +- Export/import helpers +- Integration tests ## References -- OpenCode source: `/workspace/opencode` — especially `packages/opencode/src/session/compaction.ts`, `overflow.ts`, `status.ts` +- OpenCode source: `/workspace/opencode` — especially `packages/opencode/src/session/compaction.ts`, `overflow.ts` - OpenCode plugin SDK: `/workspace/opencode/packages/plugin/src/index.ts` - OpenCode plugin types: see `Hooks` interface for all available hooks -- Open-code coordinator plugin: `/workspace/@alkimiadev/open-coordinator` — architecture pattern reference -- Original memory browsing skill: `docs/research/opencode-memory/opencode-memory.md` +- Open-coordinator plugin: `/workspace/@alkimiadev/open-coordinator` — architecture pattern reference - OpenCode DB schema: `message`, `part`, `session`, `project`, `todo` tables - OpenCode config schema: `compaction.auto`, `compaction.prune`, `compaction.reserved` fields - -## Implementation Phases - -### Phase 1: Foundation (current) -- Plugin scaffolding, build setup, basic hooks -- `experimental.session.compacting` hook with better default prompt -- Basic `memory_context` tool (context percentage calculation) - -### Phase 2: History Browser -- `memory_summary`, `memory_sessions`, `memory_messages` -- `memory_search` with full-text search -- `memory_plans` for plan access -- Markdown formatting for all outputs - -### Phase 3: Context Awareness -- SSE-based token tracker -- Proactive context warnings via `experimental.chat.system.transform` -- `memory_compact` tool calling `session.summarize` -- Default system instructions on when to compact - -### Phase 4: Polish -- Configurable thresholds -- Session comparison tools -- Export/import helpers -- Integration tests \ No newline at end of file +- Bun SQLite docs: https://bun.com/docs/runtime/sqlite \ No newline at end of file diff --git a/src/context/notify.ts b/src/context/notify.ts deleted file mode 100644 index 1453e8f..0000000 --- a/src/context/notify.ts +++ /dev/null @@ -1,26 +0,0 @@ -export const formatAnomalyNotification = ( - sessionID: string, - _type: string, - percentage: number, - status: string, -): string => { - const lines: string[] = []; - - lines.push(`Context threshold reached [${status}]`); - lines.push(""); - lines.push(`Session: ${sessionID}`); - lines.push(`Context: ${percentage}% used`); - - if (status === "critical") { - lines.push(""); - lines.push("Imminent automatic compaction. Consider triggering memory_compact now."); - } else if (status === "red") { - lines.push(""); - lines.push("Context is running low. Use memory_compact at your next natural break point."); - } else if (status === "yellow") { - lines.push(""); - lines.push("Context usage is getting high. Consider memory_compact when convenient."); - } - - return lines.join("\n"); -}; diff --git a/src/history/format.ts b/src/history/format.ts index d6febde..ff1d234 100644 --- a/src/history/format.ts +++ b/src/history/format.ts @@ -15,7 +15,9 @@ export const formatSessionList = (rows: Record[]): string => { } lines.push(""); - lines.push("Use memory_messages with a session ID to read the full conversation."); + lines.push( + 'Use memory({tool: "messages", args: {sessionId: "..."}}) to read the full conversation.', + ); return lines.join("\n"); }; diff --git a/src/history/search.ts b/src/history/search.ts index 8d1595f..5e17c69 100644 --- a/src/history/search.ts +++ b/src/history/search.ts @@ -51,7 +51,9 @@ export const searchConversations = (searchTerm: string, limit: number): string = lines.push(""); } - lines.push("Use memory_messages with a session ID to read the full conversation."); + lines.push( + 'Use memory({tool: "messages", args: {sessionId: "..."}}) to read the full conversation.', + ); return lines.join("\n"); } catch (err) { return `Search failed: ${err instanceof Error ? err.message : String(err)}`;