From 90daa86d5918d925267a55c114c3e78317a978fe Mon Sep 17 00:00:00 2001 From: "glm-5.1" Date: Wed, 22 Apr 2026 13:10:19 +0000 Subject: [PATCH] Add HUD/AUI research docs: compaction, agents, handlebars, architecture --- docs/research/01-compaction-architecture.md | 774 ++++++++++ docs/research/02-agent-definitions-pattern.md | 540 +++++++ .../03-handlebars-bun-compatibility.md | 527 +++++++ docs/research/04-hud-architecture.md | 1347 +++++++++++++++++ 4 files changed, 3188 insertions(+) create mode 100644 docs/research/01-compaction-architecture.md create mode 100644 docs/research/02-agent-definitions-pattern.md create mode 100644 docs/research/03-handlebars-bun-compatibility.md create mode 100644 docs/research/04-hud-architecture.md diff --git a/docs/research/01-compaction-architecture.md b/docs/research/01-compaction-architecture.md new file mode 100644 index 0000000..82edbd2 --- /dev/null +++ b/docs/research/01-compaction-architecture.md @@ -0,0 +1,774 @@ +# Compaction Architecture: OpenCode Core & Open-Memory Plugin Integration + +## Table of Contents + +1. [Overview](#overview) +2. [Compaction in OpenCode Core](#compaction-in-opencode-core) +3. [Plugin Hook System](#plugin-hook-system) +4. [Open-Memory Plugin Integration](#open-memory-plugin-integration) +5. [System Prompt Injection Mechanisms](#system-prompt-injection-mechanisms) +6. [Persistent HUD Feasibility Analysis](#persistent-hud-feasibility-analysis) +7. [Key File Reference](#key-file-reference) + +--- + +## Overview + +Compaction is OpenCode's mechanism for freeing context window space. When a session's token usage approaches the model's context limit, the conversation history is summarized: the older messages are replaced with a concise summary that preserves essential context. This allows long-running sessions to continue without hitting provider token limits. + +The `@alkdev/open-memory` plugin integrates with this system in three ways: +1. **Custom compaction prompt** via the `experimental.session.compacting` hook (self-continuity instead of "for another agent") +2. **Context awareness** injected into system prompts via `experimental.chat.system.transform` +3. **Proactive compaction triggering** via the `memory_compact` tool (before automatic overflow kicks in) + +--- + +## Compaction in OpenCode Core + +### Trigger Conditions + +Compaction triggers in two scenarios: + +**1. Automatic overflow detection** — checked after every completed assistant message in the session loop: + +`/workspace/opencode/packages/opencode/src/session/prompt.ts:1412-1419` +```ts +if ( + lastFinished && + lastFinished.summary !== true && + (yield* compaction.isOverflow({ tokens: lastFinished.tokens, model })) +) { + yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true }) + continue +} +``` + +**2. Explicit API/tool call** — when `session.summarize()` is called (used by `memory_compact`). This creates a compaction request with `auto: false`. + +**3. Provider-initiated** — when the processor detects a "compact" result from the LLM finish reason: + +`/workspace/opencode/packages/opencode/src/session/prompt.ts:1542-1549` +```ts +if (result === "compact") { + yield* compaction.create({ + sessionID, + agent: lastUser.agent, + model: lastUser.model, + auto: true, + overflow: !handle.message.finish, + }) +} +``` + +### Overflow Detection (isOverflow) + +`/workspace/opencode/packages/opencode/src/session/overflow.ts:8-22` + +The overflow check compares total token usage against the model's usable context: + +```ts +export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { + if (input.cfg.compaction?.auto === false) return false + const context = input.model.limit.context + if (context === 0) return false + + const count = + input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write + + const reserved = + input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model)) + const usable = input.model.limit.input + ? input.model.limit.input - reserved + : context - ProviderTransform.maxOutputTokens(input.model) + return count >= usable +} +``` + +Key constants: +- `COMPACTION_BUFFER = 20_000` — default reserved tokens for generation output +- Usable context = `model.inputLimit - reserved` (or `model.contextLimit - maxOutputTokens`) +- Overflow fires when `count >= usable` + +Can be disabled via `config.compaction.auto = false`. + +### Compaction Flow (step by step) + +**Step 1: Create compaction marker** + +`SessionCompaction.create()` (`/workspace/opencode/packages/opencode/src/session/compaction.ts:349-372`): + +1. Creates a **user message** (`role: "user"`) +2. Attaches a **CompactionPart** (`type: "compaction"`) with `auto` and `overflow` flags +3. Writes both to the database via `session.updateMessage` and `session.updatePart` + +```ts +const msg = yield* session.updateMessage({ + id: MessageID.ascending(), + role: "user", + model: input.model, + sessionID: input.sessionID, + agent: input.agent, + time: { created: Date.now() }, +}) +yield* session.updatePart({ + id: PartID.ascending(), + messageID: msg.id, + sessionID: msg.sessionID, + type: "compaction", + auto: input.auto, + overflow: input.overflow, +}) +``` + +**Step 2: Detect compaction task in the loop** + +On the next iteration of `runLoop`, the compaction part is detected: + +`/workspace/opencode/packages/opencode/src/session/prompt.ts:1393-1409` +```ts +if (task?.type === "compaction") { + const result = yield* compaction.process({ + messages: msgs, + parentID: lastUser.id, + sessionID, + auto: task.auto, + overflow: task.overflow, + }) + if (result === "stop") break + continue +} +``` + +**Step 3: Process the compaction** + +`SessionCompaction.process()` (`/workspace/opencode/packages/opencode/src/session/compaction.ts:141-347`): + +1. **Resolves the compaction agent** (a dedicated "compaction" agent with potentially a different model). Falls back to the user message's model if no compaction agent model is configured. + +2. **Triggers the `experimental.session.compacting` plugin hook** — allows plugins to customize the prompt: + ```ts + const compacting = yield* plugin.trigger( + "experimental.session.compacting", + { sessionID: input.sessionID }, + { context: [], prompt: undefined }, + ) + ``` + +3. **Constructs the compaction prompt** — either the plugin-provided `prompt` or the default: + ```ts + const defaultPrompt = `Provide a detailed prompt for continuing our conversation above. + Focus on information that would be helpful for continuing the conversation... + The summary that you construct will be used so that another agent can read it and continue the work. + Do not call any tools. Respond only with the summary text. + ...` + + const prompt = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n") + ``` + + **Critical detail**: If `compacting.prompt` is set, it **replaces** the default prompt entirely. If only `compacting.context` strings are appended, they're joined with the default prompt. + +4. **Clones messages and applies messages transform hook**: + ```ts + const msgs = structuredClone(messages) + yield* plugin.trigger("experimental.chat.messages.transform", {}, { messages: msgs }) + ``` + +5. **Converts messages to model format** (stripping media for token efficiency): + ```ts + const modelMessages = yield* MessageV2.toModelMessagesEffect(msgs, model, { stripMedia: true }) + ``` + +6. **Creates an assistant message with `summary: true`**: + ```ts + const msg: MessageV2.Assistant = { + ... + mode: "compaction", + agent: "compaction", + summary: true, + ... + } + ``` + +7. **Streams the LLM response** — sends the conversation history + the compaction prompt as a user message, with **no tools** (`tools: {}`): + ```ts + const result = yield* processor.process({ + user: userMessage, + agent, + sessionID: input.sessionID, + tools: {}, + system: [], + messages: [ + ...modelMessages, + { role: "user", content: [{ type: "text", text: prompt }] }, + ], + model, + }) + ``` + +8. **Handles overflow replay** — if this was an overflow compaction, replays the last non-compaction user message so the agent continues the interrupted task. + +9. **Publishes the `session.compacted` bus event** on success. + +### Compaction's Data in the Database + +After compaction, the database contains: + +| Table | Record | Key Fields | +|-------|--------|------------| +| `message` | User message (compaction marker) | `data.role = "user"`, contains the CompactionPart | +| `part` | CompactionPart | `data.type = "compaction"`, `data.auto`, `data.overflow` | +| `message` | Assistant message (summary) | `data.summary = true`, `data.agent = "compaction"` | +| `part` | TextPart (summary text) | `data.type = "text"`, `data.text = ""` | +| `message` | User message (same parent) | `data.summary.diffs = [...]` (diff stats for work done) | + +Additionally, `SessionSummary.summarize()` attaches diff information: + +`/workspace/opencode/packages/opencode/src/session/summary.ts:106-133` + +This computes file diffs from snapshot checkpoints and stores them on the compaction user message as `info.summary.diffs`. + +### Message Filtering After Compaction + +`MessageV2.filterCompacted()` (`/workspace/opencode/packages/opencode/src/session/message-v2.ts:903-919`): + +After compaction, the session loop uses `filterCompacted` to load only the messages **from the last compaction point forward**. It walks backward through messages until it finds a completed compaction (`assistant.summary === true && finish && !error`), then stops — everything before that point is excluded from the context window: + +```ts +export function filterCompacted(msgs: Iterable) { + const result = [] as MessageV2.WithParts[] + const completed = new Set() + for (const msg of msgs) { + result.push(msg) + if ( + msg.info.role === "user" && + completed.has(msg.info.id) && + msg.parts.some((part) => part.type === "compaction") + ) + break + if (msg.info.role === "assistant" && msg.info.summary && msg.info.finish && !msg.info.error) + completed.add(msg.info.parentID) + } + result.reverse() + return result +} +``` + +### Pruning (Secondary Context Reclamation) + +`SessionCompaction.prune()` (`/workspace/opencode/packages/opencode/src/session/compaction.ts:93-139`): + +Pruning is a lighter-weight mechanism that doesn't involve an LLM call. It walks backward through tool call outputs, keeping the most recent `PRUNE_PROTECT` (40,000) tokens of tool output, and marking older ones with `part.state.time.compacted = Date.now()`. This causes those tool outputs to be excluded from the context window (the Read tool skips compacted parts). + +Constants: +- `PRUNE_MINIMUM = 20_000` — only prune if at least this many tokens can be reclaimed +- `PRUNE_PROTECT = 40_000` — protect this many tokens of recent tool output +- `PRUNE_PROTECTED_TOOLS = ["skill"]` — tools whose output is never pruned +- Can be disabled via `config.compaction.prune = false` + +--- + +## Plugin Hook System + +### Plugin Architecture + +OpenCode's plugin system is defined in `/workspace/opencode/packages/opencode/src/plugin/index.ts`. + +**Plugin type**: `Plugin = (input: PluginInput, options?: PluginOptions) => Promise` + +Each plugin is a function that receives `PluginInput` (client, project, directory, worktree, serverUrl, shell) and returns a `Hooks` object. + +**Hook trigger mechanism** (`/workspace/opencode/packages/opencode/src/plugin/index.ts:235-248`): + +```ts +const trigger = Effect.fn("Plugin.trigger")(function* <...>(name, input, output) { + const s = yield* InstanceState.get(state) + for (const hook of s.hooks) { + const fn = hook[name] as any + if (!fn) continue + yield* Effect.promise(async () => fn(input, output)) + } + return output +}) +``` + +**Key behavior**: Hooks are called sequentially in registration order. The `output` object is mutated in place and passed through all hooks. The final (mutated) `output` is what OpenCode uses. This means: +- All registered plugins can modify the same `output` object +- Order of plugin registration matters for conflicts +- Later.plugins see modifications from earlier plugins + +### Hook Definitions + +The `Hooks` interface is defined in `/workspace/opencode/packages/plugin/src/index.ts:189-276`: + +```ts +export interface Hooks { + event?: (input: { event: Event }) => Promise + config?: (input: Config) => Promise + tool?: { [key: string]: ToolDefinition } + auth?: AuthHook + provider?: ProviderHook + "chat.message"?: (...) => Promise + "chat.params"?: (...) => Promise + "chat.headers"?: (...) => Promise + "permission.ask"?: (...) => Promise + "command.execute.before"?: (...) => Promise + "tool.execute.before"?: (...) => Promise + "tool.execute.after"?: (...) => Promise + "shell.env"?: (...) => Promise + "tool.definition"?: (...) => Promise + "experimental.chat.messages.transform"?: (...) => Promise + "experimental.chat.system.transform"?: (...) => Promise + "experimental.session.compacting"?: (...) => Promise + "experimental.text.complete"?: (...) => Promise +} +``` + +### Compaction Hook + +`experimental.session.compacting`: + +**Type definition** (`/workspace/opencode/packages/plugin/src/index.ts:264-267`): +```ts +"experimental.session.compacting"?: ( + input: { sessionID: string }, + output: { context: string[]; prompt?: string }, +) => Promise +``` + +**Invocation site** (`/workspace/opencode/packages/opencode/src/session/compaction.ts:184-188`): +```ts +const compacting = yield* plugin.trigger( + "experimental.session.compacting", + { sessionID: input.sessionID }, + { context: [], prompt: undefined }, +) +``` + +**How prompt resolution works** (`/workspace/opencode/packages/opencode/src/session/compaction.ts:189-219`): +```ts +const defaultPrompt = `Provide a detailed prompt for continuing our conversation above...` +const prompt = compacting.prompt ?? [defaultPrompt, ...compacting.context].join("\n\n") +``` + +- If `output.prompt` is set → replaces the default prompt entirely +- If `output.context` has entries → appended after the default prompt +- Both can be combined: a plugin can set `prompt` (for full replacement) OR add `context` strings (for augmentation) + +### System Prompt Transform Hook + +`experimental.chat.system.transform`: + +**Type definition** (`/workspace/opencode/packages/plugin/src/index.ts:251-256`): +```ts +"experimental.chat.system.transform"?: ( + input: { sessionID?: string; model: Model }, + output: { system: string[] }, +) => Promise +``` + +**Primary invocation site** (`/workspace/opencode/packages/opencode/src/session/llm.ts:116-126`): +```ts +await Plugin.trigger( + "experimental.chat.system.transform", + { sessionID: input.sessionID, model: input.model }, + { system }, +) +// rejoin to maintain 2-part structure for caching if header unchanged +if (system.length > 2 && system[0] === header) { + const rest = system.slice(1) + system.length = 0 + system.push(header, rest.join("\n")) +} +``` + +**How it works**: +- The `system` array initially contains 1 element: the combined agent/provider prompt + system instructions + user instructions +- Plugins can `push()` additional strings onto `system` +- After all plugins run, OpenCode optimizes: if the first element hasn't changed and there are more than 2 elements, it recombines the extras into a second element (for prompt caching purposes — Anthropic and similar providers cache the first system message separately) +- Final system messages are sent as separate `system` role messages to the LLM: `system.map(x => ({ role: "system", content: x }))` + +**Secondary invocation** (agent generation, `/workspace/opencode/packages/opencode/src/agent/agent.ts:340`): +```ts +yield* Effect.promise(() => + Plugin.trigger("experimental.chat.system.transform", { model: resolved }, { system }), +) +``` + +**Note**: `sessionID` is optional in the input type. During agent generation, no sessionID is passed. Plugins must handle this gracefully (open-memory already does: `if (!input.sessionID) return;`). + +### Event Hook + +`event`: + +**Type definition** (`/workspace/opencode/packages/plugin/src/index.ts:190`): +```ts +event?: (input: { event: Event }) => Promise +``` + +**How events reach plugins** (`/workspace/opencode/packages/opencode/src/plugin/index.ts:220-229`): + +The plugin system subscribes to the global bus and forwards all events to all loaded plugins: +```ts +yield* bus.subscribeAll().pipe( + Stream.runForEach((input) => + Effect.sync(() => { + for (const hook of hooks) { + hook["event"]?.({ event: input as any }) + } + }), + ), + Effect.forkScoped, +) +``` + +**Event types** the bus publishes (partial list): +- `message.updated` — whenever a message is updated (token counts, status changes) +- `session.compacted` — after compaction completes +- `session.created`, `session.updated`, `session.deleted` +- `session.error` +- `session.diff` +- Various other lifecycle events + +The open-memory plugin only cares about `message.updated` events for assistant messages (to track token usage). + +### Messages Transform Hook + +`experimental.chat.messages.transform`: + +**Type definition** (`/workspace/opencode/packages/plugin/src/index.ts:242-250`): +```ts +"experimental.chat.messages.transform"?: ( + input: {}, + output: { + messages: { + info: Message + parts: Part[] + }[] + }, +) => Promise +``` + +Called in two places: +- Before compaction LLM call (`/workspace/opencode/packages/opencode/src/session/compaction.ts:221`) +- Before regular LLM processing (`/workspace/opencode/packages/opencode/src/session/prompt.ts:1499`) + +--- + +## Open-Memory Plugin Integration + +### Plugin Entry Point + +`/workspace/@alkdev/open-memory/src/index.ts` + +The plugin registers four hooks: + +```ts +return { + tool: createTools(ctx, contextTracker), // 2 tools: memory, memory_compact + + "experimental.session.compacting": async (_input, output) => { // Custom compaction prompt + output.prompt = getCompactionPrompt(); + }, + + "experimental.chat.system.transform": async (input, output) => { // Context awareness injection + // Pushes context % usage + advisory into system prompt + }, + + event: async ({ event }) => { // SSE event handling + contextTracker.handleEvent(event); + }, +}; +``` + +### Custom Compaction Prompt + +`/workspace/@alkdev/open-memory/src/compaction/prompt.ts` + +The plugin replaces OpenCode's default "summarize for another agent" prompt with a self-continuity prompt: + +**OpenCode's default** (at `/workspace/opencode/packages/opencode/src/session/compaction.ts:189-217`): +> "The summary that you construct will be used so that another agent can read it and continue the work." + +**Open-Memory's replacement** (`/workspace/@alkdev/open-memory/src/compaction/prompt.ts:1-40`): +> "You are compacting your own session to free context space. You will continue this session after compaction with this summary as your starting context. ... You are summarizing for yourself, not another agent." + +The key difference: the default prompt treats compaction as a handoff between agents, while open-memory's prompt frames compaction as self-continuity. The template structure is similar (Goal, Instructions, Discoveries, Accomplished, Relevant files, Notes) but the framing emphasizes "what YOU will need" rather than "what would be helpful for continuing the conversation." + +### Context Tracking + +`/workspace/@alkdev/open-memory/src/context/tracker.ts` + +The `ContextTracker` class: +1. Listens to `message.updated` events for assistant messages +2. Extracts `tokens.input` as the current context size +3. Looks up the model's context limit from config (falls back to 200,000) +4. Calculates a percentage and classifies into status levels + +**Event handling** (`/workspace/@alkdev/open-memory/src/context/tracker.ts:64-122`): +```ts +handleEvent(event: Event) { + if (event.type !== "message.updated") return; + // Only care about assistant messages + if (!info || info.role !== "assistant") return; + // Extract token counts + const inputTokens = typeof tokens.input === "number" ? tokens.input : 0; + // Store per-session tracking data + existing.lastInputTokens = inputTokens; + // Track trend via rolling window of last 5 readings +} +``` + +**Threshold classification** (`/workspace/@alkdev/open-memory/src/context/thresholds.ts`): +- Green: < 70% +- Yellow: 70-85% +- Red: 85-92% +- Critical: > 92% + +These thresholds are more aggressive than OpenCode's overflow detection (which fires at ~92%+, depending on model limits and config). Open-memory wants the agent to compact *before* automatic overflow. + +### System Prompt Injection + +`/workspace/@alkdev/open-memory/src/index.ts:16-49` + +The plugin injects context status into every LLM call via the system transform hook: + +```ts +"experimental.chat.system.transform": async (input, output) => { + if (!input.sessionID) return; + const info = contextTracker.getContextInfo(input.sessionID); + if (!info) return; + + const statusEmoji = /* red/orange/yellow/green circle based on status */; + const advisory = /* actionable advice based on status level */; + + const lines = [ + `${statusEmoji} Context: ${info.percentage}% used (${info.usedTokens.toLocaleString()} / ${info.limitTokens.toLocaleString()} tokens, ${info.model})`, + ]; + if (advisory) lines.push(advisory); + + output.system.push(lines.join("\n")); +} +``` + +**What the agent sees** (example at yellow status): +``` +🟡 Context: 75% used (150,000 / 200,000 tokens, anthropic/claude-sonnet-4-20250514) +Context usage is getting high. Consider memory_compact when convenient. +``` + +This is appended to the `system` array, so it becomes a separate `system` role message in the final prompt. Due to OpenCode's system message rejoining logic (`/workspace/opencode/packages/opencode/src/session/llm.ts:122-126`), it will be merged into the second system message block if the first block (the core prompt) hasn't changed. + +### Compaction Tool (memory_compact) + +`/workspace/@alkdev/open-memory/src/tools.ts:402-448` + +The `memory_compact` tool: +1. Checks if compaction is needed (skips if context < 50%) +2. Gets model info from the last user message or the context tracker +3. Calls `ctx.client.session.summarize()` via `setTimeout(..., 0)` to schedule compaction asynchronously + +**Critical timing note** from AGENTS.md: +> `memory_compact` must NOT await `ctx.client.session.summarize()` — it returns immediately and schedules via `setTimeout(() => { ... }, 0)` because compaction cannot start until the tool returns control to the event loop. + +This is because compaction requires the session loop to cycle — the current tool call must complete before the compaction marker can be detected. + +### Compaction History Querying + +`/workspace/@alkdev/open-memory/src/tools.ts:222-302` + +The `memory` tool's `compactions` operation queries the database for compaction checkpoints: + +1. Finds all `CompactionPart` rows for a session (`part.data.type = 'compaction'`) +2. For each, finds the adjacent assistant message (the summary text) +3. Presents them as navigable checkpoints with 1-based indexing + +--- + +## System Prompt Injection Mechanisms + +There are **four distinct mechanisms** for injecting content into the agent's prompt in OpenCode: + +### 1. AGENTS.md / CLAUDE.md / CONTEXT.md (Instruction Files) + +`/workspace/opencode/packages/opencode/src/session/instruction.ts` + +- Files named `AGENTS.md`, `CLAUDE.md`, or `CONTEXT.md` found in the project directory tree +- Also global paths like `~/.config/opencode/AGENTS.md` and `~/.claude/CLAUDE.md` +- Can be configured via `config.instructions` (including remote URLs) +- Loaded as system instructions: prepended with `"Instructions from: \n"` +- Injected by `instruction.system()` which feeds into the `system[]` array in `SessionPrompt.runLoop` + +**How injected**: As separate elements in the `system` array passed to `LLM.stream`, before plugin hooks fire. + +### 2. `experimental.chat.system.transform` Plugin Hook + +- Plugins push strings onto `output.system` +- Called in `LLM.stream()` (`/workspace/opencode/packages/opencode/src/session/llm.ts:116`) before the system messages are assembled +- Strings become additional `system` role messages + +**Persistence**: Ephemeral — evaluated fresh on every LLM call. The hook is called every time a system prompt is constructed, so injected content is always current but never persists between calls unless the plugin re-injects it. + +**Caching behavior**: OpenCode recombines system messages to maintain a 2-part structure for prompt caching (first element = provider prompt, second element = everything else). Plugins that push a single string will have it merged into the second block. + +### 3. User Message Parts (Synthetic Text) + +`/workspace/opencode/packages/opencode/src/session/prompt.ts:252-386` + +- `insertReminders()` adds synthetic text parts to the last user message +- Used for plan mode instructions, build-switch prompts +- These parts have `synthetic: true` to mark them as non-user-authored + +**How injected**: Added as parts of user messages, so they appear in the conversation flow rather than the system prompt. + +### 4. `experimental.chat.messages.transform` Plugin Hook + +- Plugins can modify the `messages` array (clone provided by OpenCode) +- Called before both regular processing and compaction +- Can add, remove, or modify messages + +**Persistence**: Transient — modifications apply only to the current LLM call. The database is not modified (a `structuredClone` is used). + +--- + +## Persistent HUD Feasibility Analysis + +A "HUD" (heads-up display) is a persistent block of text injected into every system prompt that shows current state: context usage, active task, recent files, etc. Here we analyze how such a feature could be implemented. + +### Requirements + +1. **Always present**: Must appear in every LLM call's system prompt +2. **Current**: Must reflect latest state (context %, files modified, etc.) +3. **Compact**: Must not consume excessive context tokens itself +4. **After compaction**: Must survive/reappear after compaction (which replaces older messages) + +### Existing Mechanism Already Sufficient + +The `experimental.chat.system.transform` hook is already called on **every** LLM call. The open-memory plugin already uses it to inject context percentage. This is the natural place for a HUD. + +**How it works now** (`/workspace/@alkdev/open-memory/src/index.ts:16-49`): +- Called on every `LLM.stream()` invocation +- Hook receives current sessionID and model +- Plugin pushes strings to `output.system` +- Those strings become `system` role messages in the prompt + +### What's Missing for a Rich HUD + +Currently, the plugin only injects context percentage. To make a richer HUD, we could add: + +| HUD Element | Data Source | Implementation | +|-------------|-------------|----------------| +| Context % | ContextTracker (already tracked) | Already done | +| Active task | Session title / last user message | Query DB or track via events | +| Files recently modified | Snapshot diffs / step-finish parts | Query DB or track via events | +| Compaction count | Count CompactionParts in DB | Query on each system transform call | +| Todo list status | `todo` table in DB | Query on each call | +| Session age | Session creation time | Query on each call | + +### Constraints & Considerations + +**1. Token cost of the HUD itself** + +Every string pushed to `output.system` becomes a `system` role message that counts against context. A 500-character HUD is ~125 tokens. At 200k context that's negligible, but it compounds with every LLM call (no caching for dynamic content). + +**2. Prompt caching** + +OpenCode optimizes system messages into 2 blocks for caching. The first block is the provider prompt (e.g., Anthropic's system prompt), which rarely changes. The second block contains everything else. + +If the HUD content changes between calls (likely — context % changes), it's part of the second block, which won't benefit from caching. This is acceptable but worth noting. + +**3. Compaction survival** + +The HUD does **not** need to survive compaction as a message — it's injected fresh on every LLM call. Since `experimental.chat.system.transform` is called after compaction (it's called in `LLM.stream()`, which is invoked for every new assistant turn), the HUD will always be present regardless of how many compactions have occurred. + +**4. Latency of DB queries** + +If the HUD queries the database on every system transform call, there's a risk of adding latency before each LLM call. Since `bun:sqlite` in readonly mode is very fast (sub-millisecond for simple queries), this is likely acceptable for 2-3 simple queries. However, the hook is `async`, so queries must be synchronous or carefully managed. + +**Current open-memory implementation**: The `system.transform` hook is synchronous (no DB queries — it reads from the in-memory `ContextTracker`). Adding DB queries would require making the hook `async`. + +**5. Event-driven updates vs. on-demand queries** + +Two approaches for HUD data: + +- **Event-driven**: Track state changes via the `event` hook, maintain in-memory state, inject from memory in `system.transform`. Fast, but requires tracking all relevant events. +- **On-demand**: Query the DB fresh in `system.transform`. Simple, but adds latency and requires async. + +The current context tracker uses **event-driven** for token counts (via `message.updated` events). A hybrid approach makes sense: event-driven for high-frequency data (context %, file changes), on-demand for infrequent data (compaction count, session age). + +### Recommended Architecture for a HUD + +``` +┌──────────────────────────────────┐ +│ Event Bus (SSE) │ +│ message.updated │ +│ session.compacted │ +│ session.updated │ +└────────────┬─────────────────────┘ + │ + ▼ +┌──────────────────────────────────┐ +│ HUD State Manager │ +│ (Event-driven updates) │ +│ │ +│ - Context % (from ContextTracker│ +│ - Recent file changes (track │ +│ step-finish snapshots) │ +│ - Compaction count (increment) │ +│ - Todo status (from events) │ +└────────────┬─────────────────────┘ + │ + ▼ +┌──────────────────────────────────┐ +│ system.transform hook │ +│ (reads from HUD State Manager) │ +│ │ +│ 1. Format HUD from state │ +│ 2. output.system.push(hud) │ +└──────────────────────────────────┘ +``` + +The key insight: the HUD **never needs to persist in the database or in messages**. It's purely an ephemeral system-prompt injection that's reconstructed from live state on every LLM call. This means: +- It automatically survives compaction (injected after compaction) +- It's always up-to-date (injected on every call) +- It doesn't consume context beyond the current call's injection +- It doesn't interfere with the conversation history + +### Alternative: Compaction-Time Persistence + +If we want information to persist **through compaction** as part of the conversation (not just the system prompt), the `experimental.session.compacting` hook is the mechanism. We can add `context` strings that get appended to the compaction prompt, ensuring the LLM summarizes that information. Or, if using `prompt` (full replacement), the custom prompt template already includes space for such information. + +However, this is about ensuring the **compaction summary includes** key information, not about maintaining a live HUD. The HUD is better served by system prompt injection. + +--- + +## Key File Reference + +### OpenCode Core + +| File | Purpose | +|------|---------| +| `/workspace/opencode/packages/opencode/src/session/compaction.ts` | Compaction orchestration: create marker, process compaction, prune tool outputs | +| `/workspace/opencode/packages/opencode/src/session/overflow.ts` | `isOverflow()` — determines when compaction should trigger | +| `/workspace/opencode/packages/opencode/src/session/summary.ts` | `SessionSummary` — computes diff stats and attaches to compaction messages | +| `/workspace/opencode/packages/opencode/src/session/prompt.ts` | Session loop — detects compaction tasks, triggers overflow check, orchestrates the main agent loop | +| `/workspace/opencode/packages/opencode/src/session/llm.ts` | `LLM.stream()` — builds system prompt, calls `system.transform` hook, sends to provider | +| `/workspace/opencode/packages/opencode/src/session/system.ts` | `SystemPrompt.provider()` — model-specific base prompts | +| `/workspace/opencode/packages/opencode/src/session/instruction.ts` | `Instruction` — AGENTS.md/CLAUDE.md/CONTEXT.md loading | +| `/workspace/opencode/packages/opencode/src/session/processor.ts` | `SessionProcessor` — handles LLM streaming events, step boundaries, context overflow detection | +| `/workspace/opencode/packages/opencode/src/session/message-v2.ts` | `MessageV2` — message/part schemas, `filterCompacted()`, `CompactionPart` definition | +| `/workspace/opencode/packages/opencode/src/session/session.sql.ts` | DB schema — `SessionTable`, `MessageTable`, `PartTable` | +| `/workspace/opencode/packages/opencode/src/plugin/index.ts` | Plugin loading, hook trigger mechanism, bus event subscription | +| `/workspace/opencode/packages/plugin/src/index.ts` | Plugin SDK type definitions — `Hooks`, `PluginInput`, `ToolDefinition` | + +### Open-Memory Plugin + +| File | Purpose | +|------|---------| +| `/workspace/@alkdev/open-memory/src/index.ts` | Plugin entry — hook registration (compacting, system.transform, event, tools) | +| `/workspace/@alkdev/open-memory/src/tools.ts` | Tool definitions — `memory` (router) and `memory_compact` handlers | +| `/workspace/@alkdev/open-memory/src/compaction/prompt.ts` | Custom compaction prompt template (self-continuity framing) | +| `/workspace/@alkdev/open-memory/src/context/tracker.ts` | `ContextTracker` — SSE event-driven token tracking, per-session context info | +| `/workspace/@alkdev/open-memory/src/context/thresholds.ts` | Threshold constants — green/yellow/red/critical boundaries | +| `/workspace/@alkdev/open-memory/src/history/queries.ts` | `bun:sqlite` read-only DB query helper (lazy singleton) | +| `/workspace/@alkdev/open-memory/src/history/format.ts` | Markdown rendering for message/session output | +| `/workspace/@alkdev/open-memory/src/history/search.ts` | LIKE-based text search across conversations | diff --git a/docs/research/02-agent-definitions-pattern.md b/docs/research/02-agent-definitions-pattern.md new file mode 100644 index 0000000..fa9bcb1 --- /dev/null +++ b/docs/research/02-agent-definitions-pattern.md @@ -0,0 +1,540 @@ +# Agent Definitions Pattern: Research & HUD/AUI Implications + +## 1. alkhub_ts Agent Definitions + +### 1.1 Directory Structure + +Agent definitions in alkhub_ts live in `.opencode/agents/` as individual Markdown files: + +``` +.opencode/agents/ +├── architect.md +├── architecture-reviewer.md +├── code-reviewer.md +├── coordinator.md +├── decomposer.md +├── implementation-specialist.md +├── poc-specialist.md +└── research-specialist.md +``` + +### 1.2 File Format: YAML Frontmatter + Markdown Body + +Each file uses gray-matter frontmatter for structured metadata and a Markdown body for the system prompt: + +```yaml +--- +description: Short one-liner describing the agent's purpose +mode: primary | subagent +temperature: 0.2 +--- + +You are the **Role Name**, [long-form system prompt...] +``` + +**Frontmatter fields observed across all 8 agents:** + +| Field | Type | Required | Purpose | +|-------|------|----------|---------| +| `description` | string | yes | One-line summary shown in agent picker / `@` autocomplete | +| `mode` | `"primary"` \| `"subagent"` | yes | Whether the agent appears as a top-level mode or only as a subagent | +| `temperature` | number | sometimes | Model sampling temperature override | + +**Additional fields supported by OpenCode but not used in alkhub_ts:** + +| Field | Type | Purpose | +|-------|------|---------| +| `model` | string | Override the model (e.g., `"anthropic/claude-sonnet-4"`) | +| `variant` | string | Model variant to use when using this agent's configured model | +| `top_p` | number | Top-p sampling override | +| `hidden` | boolean | Hide from the UI (for internal agents like compaction, title) | +| `color` | string | Hex color or theme color for UI display | +| `steps` | number | Maximum agentic iterations before forcing text-only response | +| `permission` | object | Per-tool permission rules (allow/deny/ask) | +| `options` | object | Arbitrary provider options merged into model calls | +| `disable` | boolean | Disable a built-in agent | + +### 1.3 Agent Roles in alkhub_ts + +The 8 agents form a coordinated workflow: + +| Agent | Mode | Role | +|-------|------|------| +| `coordinator` | primary | Orchestrates parallel task execution across worktrees | +| `architect` | primary | Creates/maintains architecture specifications (WHAT & WHY) | +| `decomposer` | primary | Breaks architecture into atomic, dependency-ordered tasks | +| `implementation-specialist` | primary | Executes atomic tasks in isolated worktrees | +| `poc-specialist` | primary | Creates proof-of-concepts in research worktrees | +| `research-specialist` | subagent | Researches technical topics, documents findings | +| `code-reviewer` | subagent | Reviews code quality at checkpoints | +| `architecture-reviewer` | subagent | Reviews architecture specs for gaps/risks | + +Key patterns: +- **Primary agents** are selectable top-level modes in the TUI +- **Subagents** are invoked only via the `@agent-name` syntax or programmatically via the task tool +- Each agent has a detailed system prompt defining its workflow, constraints, and output format +- The coordinator describes both current (open-coordinator plugin) and future (hub operations) execution models + +### 1.4 Agent Prompt Design Patterns + +The alkhub_ts agents demonstrate several reusable patterns: + +1. **Environment scoping**: Implementation specialist and POC specialist both specify exact worktree paths and use `workdir` parameter patterns +2. **Workflow phases**: Structured numbered steps (1. Load Task → 2. Verify → 3. Implement → 4. Verify → 5. Update → 6. Commit) +3. **Safe Exit protocol**: Standardized failure handling with status updates and escalation +4. **Role constraints**: "You coordinate, you do not implement" — explicit boundaries +5. **Template outputs**: Structured output templates (review reports, research documents) +6. **Tool gating**: References to specific tools available to the agent + +--- + +## 2. OpenCode Agent System (Source Code Analysis) + +### 2.1 Agent Schema (`Agent.Info`) + +Defined in `/workspace/opencode/packages/opencode/src/agent/agent.ts` (lines 27-52): + +```typescript +export const Info = z.object({ + name: z.string(), + description: z.string().optional(), + mode: z.enum(["subagent", "primary", "all"]), + native: z.boolean().optional(), + hidden: z.boolean().optional(), + topP: z.number().optional(), + temperature: z.number().optional(), + color: z.string().optional(), + permission: Permission.Ruleset, + model: z.object({ + modelID: ModelID.zod, + providerID: ProviderID.zod, + }).optional(), + variant: z.string().optional(), + prompt: z.string().optional(), + options: z.record(z.string(), z.any()), + steps: z.number().int().positive().optional(), +}) +``` + +### 2.2 Config Schema (`Config.Agent`) + +Defined in `/workspace/opencode/packages/opencode/src/config/config.ts` (lines 466-553): + +```typescript +export const Agent = z.object({ + model: ModelId.optional(), + variant: z.string().optional(), + temperature: z.number().optional(), + top_p: z.number().optional(), + prompt: z.string().optional(), + tools: z.record(z.string(), z.boolean()).optional(), // deprecated + disable: z.boolean().optional(), + description: z.string().optional(), + mode: z.enum(["subagent", "primary", "all"]).optional(), + hidden: z.boolean().optional(), + options: z.record(z.string(), z.any()).optional(), + color: z.union([z.string().regex(...), z.enum([...])]).optional(), + steps: z.number().int().positive().optional(), + maxSteps: z.number().int().positive().optional(), // deprecated + permission: Permission.optional(), +}).catchall(z.any()).transform(...) +``` + +Notable: The `catchall(z.any())` means any unknown fields in the YAML frontmatter or JSON config are swept into `options`. This is by design — it allows arbitrary per-agent configuration that gets merged into model call parameters. + +### 2.3 Loading Pipeline + +Agent definitions are loaded from four directory patterns (in `/workspace/opencode/packages/opencode/src/config/config.ts`, line 209): + +``` +/.opencode/agent/ (singular) +/.opencode/agents/ (plural) +/agent/ (singular, no dot) +/agents/ (plural, no dot) +``` + +The loading function `loadAgent()` (lines 189-226): + +1. Globs for `*.md` files in all matching directories +2. Parses each file with `ConfigMarkdown.parse()` which uses `gray-matter` to extract YAML frontmatter +3. Extracts the agent name from the file path (stripping directory prefixes and `.md` extension) +4. Combines frontmatter data + markdown body as `prompt` +5. Validates against the `Agent` schema +6. Returns a `Record` mapping name → config + +**Name resolution** (line 211): +```typescript +const patterns = ["/.opencode/agent/", "/.opencode/agents/", "/agent/", "/agents/"] +const file = rel(item, patterns) ?? path.basename(item) +const agentName = trim(file) // removes .md extension +``` + +This means: +- `.opencode/agents/coordinator.md` → agent name `"coordinator"` +- `.opencode/agents/nested/child.md` → agent name `"nested/child"` + +### 2.4 Merge Strategy + +Built-in agents (build, plan, general, explore, compaction, title, summary) are defined in code. User-defined agents from `.opencode/agents/*.md` are merged on top: + +```typescript +for (const [key, value] of Object.entries(cfg.agent ?? {})) { + if (value.disable) { + delete agents[key] + continue + } + let item = agents[key] + if (!item) { + item = agents[key] = { + name: key, + mode: "all", + permission: Permission.merge(defaults, user), + options: {}, + native: false, + } + } + // Merge each field: prompt, model, temperature, mode, etc. + item.prompt = value.prompt ?? item.prompt + item.model = value.model ? Provider.parseModel(value.model) : item.model + item.variant = value.variant ?? item.variant + // ... etc +} +``` + +Key behaviors: +- `disable: true` removes a built-in agent entirely +- If a new name doesn't match a built-in, a fresh agent with `mode: "all"` is created +- Frontmatter fields override built-in values (not deep-merge for most fields) +- Permission configs are merged (not replaced) +- `options` are deep-merged with `mergeDeep()` + +### 2.5 System Prompt Assembly + +When an LLM call is made, the system prompt is assembled in this order (from `/workspace/opencode/packages/opencode/src/session/llm.ts`, lines 101-126): + +```typescript +const system: string[] = [] +system.push( + [ + // 1. Agent-specific prompt OR provider default prompt + ...(input.agent.prompt ? [input.agent.prompt] : SystemPrompt.provider(input.model)), + // 2. Custom system prompt from the call + ...input.system, + // 3. Custom system prompt from the user message + ...(input.user.system ? [input.user.system] : []), + ] + .filter((x) => x) + .join("\n"), +) +``` + +Then the plugin hook `experimental.chat.system.transform` is triggered, allowing plugins to modify the system prompt array. + +After this, additional segments are added (from `/workspace/opencode/packages/opencode/src/session/prompt.ts`, lines 1500-1509): + +```typescript +const [skills, env, instructions, modelMsgs] = yield* Effect.all([ + Effect.promise(() => SystemPrompt.skills(agent)), + Effect.promise(() => SystemPrompt.environment(model)), + instruction.system(), + Effect.promise(() => MessageV2.toModelMessages(msgs, model)), +]) +const system = [...env, ...(skills ? [skills] : []), + ...instructions] +``` + +The full system prompt hierarchy (first message wins position, content accumulates): + +1. **Agent prompt** (from `.opencode/agents/*.md` body) — or a model-specific default (anthropic.txt, gpt.txt, etc.) +2. **Custom system** (from plugin hooks, compaction, plan mode injection) +3. **User-provided system prompt** (from the user message) +4. **Plugin modifications** via `experimental.chat.system.transform` +5. **Environment info** (model name, working directory, platform, date) +6. **Skills list** (markdown-formatted available skills) +7. **Instruction files** (AGENTS.md, CLAUDE.md found walking up directory tree) + +### 2.6 Agent Name Usage in Messages + +The `AgentPart` type (SDK types, line 833-844): +```typescript +export type AgentPart = { + id: string + sessionID: string + messageID: string + type: "agent" + name: string // agent name, e.g. "explore" + source?: { value: string, start: number, end: number } +} +``` + +When a user types `@explore` in their message, OpenCode parses this into an `AgentPart`. During prompt processing, if the text contains `@agent-name`, it resolves to the corresponding agent definition, and the subagent is launched via the task tool. + +### 2.7 Agent Generation + +OpenCode includes an LLM-powered agent generator (`Agent.generate()`). When invoked, it: + +1. Collects the list of existing agent names to avoid collisions +2. Uses a structured output call with schema `{ identifier, whenToUse, systemPrompt }` +3. The prompt (`generate.txt`) instructs the model to create an agent configuration + +This is used by the `/agent` command in the CLI to dynamically create agents from descriptions. + +--- + +## 3. Relationship Between Agents and Sessions + +### 3.1 Agent per Message, Not per Session + +Each **user message** carries an `agent` field indicating which agent handled it. This is NOT a session-level property — a single session can switch between agents: + +```typescript +// Message info structure (simplified) +interface MessageInfo { + id: MessageID + role: "user" | "assistant" + agent: string // e.g. "build", "explore", "coordinator" + model: { providerID, modelID } + // ... +} +``` + +From `prompt.ts` line 1593: +```typescript +const agentName = cmd.agent ?? input.agent ?? (yield* agents.defaultAgent()) +``` + +This means: +- A user can type `@explore` mid-conversation to switch to the explore agent for that turn +- The next turn may return to the default agent +- Each message remembers which agent produced it + +### 3.2 Agent Switching and Plan Mode + +Plan mode has special handling. From `prompt.ts` lines 261-302: +- When switching FROM plan TO build, a system reminder is injected explaining the transition +- When NOT in plan mode but the previous assistant message was from plan, a different reminder is injected +- Plan mode restricts edit permissions + +### 3.3 No Agent-Scoped State or Memory + +OpenCode does **not** have a concept of "agent state" or "agent-scoped memory". Each agent is stateless — it's defined by its: +- System prompt +- Permission ruleset +- Model configuration +- Tool access + +State lives in the **session** (messages, tool results, compaction summaries). The agent definition is purely declarative configuration for how to run LLM calls within a session. + +The `options` field on agents supports arbitrary key-value pairs that get merged into LLM call parameters, but these are static configuration, not runtime state. + +--- + +## 4. Relevance to HUD/AUI Concept + +### 4.1 Could HUD Sections Be Defined as Declarative Configs? + +**Yes — and the agent definition pattern provides a strong analogy.** + +An agent definition is essentially: +```yaml +frontmatter (structured metadata) → controls behavior +markdown body (unstructured prompt) → controls content +``` + +A HUD section definition could follow the same pattern: +```yaml +--- +section: context-status +position: top +refresh: on-event # on-event | on-demand | periodic +priority: 10 +collapse-threshold: 70 # percentage above which to always expand +always-show: false +--- + +Template for rendering this section (can reference data sources)... +``` + +Just as agent definitions declare their `mode`, `temperature`, and `permission`, HUD definitions would declare their `position`, `refresh strategy`, and `data requirements`. + +### 4.2 Declarative vs. Imperative: What Agent Definitions Teach Us + +Agent definitions are **declarative configs with a procedural core**: + +| Aspect | Agent Definition | HUD Definition (Proposed) | +|--------|-----------------|---------------------------| +| Metadata | YAML frontmatter | YAML frontmatter | +| Content | Markdown system prompt | Markdown template or rendering spec | +| Behavior | Controls LLM call parameters | Controls HUD rendering and data fetching | +| Overrides | Built-in agents can be extended/overridden | Built-in HUD sections could be extended/overridden | +| Merge | `mergeDeep` with priority | Similar merge with project-level overrides | + +The critical design insight from OpenCode's agent system: **the same merge strategy that allows `.opencode/agents/*.md` files to override built-in agents could allow `.opencode/hud/*.md` files to override built-in HUD sections**. + +### 4.3 Project-Specific HUD Layouts + +Different project types could have different HUD layouts, just as different projects have different agent rosters: + +``` +# A web app project might define: +.opencode/hud/context-bar.md → Shows token usage, model, cost +.opencode/hud/task-tracker.md → Shows task progress from tasks/*.md +.opencode/hud/test-runner.md → Shows test results + +# A data pipeline project might define: +.opencode/hud/pipeline-status.md → Shows last pipeline run status +.opencode/hud/data-quality.md → Shows data quality metrics +.opencode/hud/context-bar.md → Override: add data volume info +``` + +This mirrors how `coordinator.md` uses worktree-specific context that implementation-specialist.md doesn't need. + +### 4.4 How Could This Be Done Without Modifying OpenCode Core? + +OpenCode's plugin system provides the necessary hooks. The relevant hooks are: + +1. **`experimental.chat.system.transform`** — already used by open-memory to inject context status. This hook receives `{ sessionID, model }` and `{ system }` (a mutable array of system prompt strings). + +2. **`experimental.session.compacting`** — receives compaction events. + +3. **`event`** — receives all SSE events, which include message updates with token counts. + +A HUD definition system could work as a **plugin**: + +``` +@alkdev/open-memory/ (or a separate @alkdev/open-hud plugin) +├── src/ +│ ├── index.ts # Plugin entry +│ ├── hud/ +│ │ ├── loader.ts # Load .opencode/hud/*.md files (like loadAgent) +│ │ ├── renderer.ts # Render HUD sections into system prompt +│ │ └── sections/ # Built-in section definitions +│ │ ├── context.md +│ │ ├── tasks.md +│ │ └── git.md +│ └── hooks/ +│ ├── system-prompt.ts # experimental.chat.system.transform +│ └── event.ts # SSE event processing for data +``` + +The key architectural insight: **we don't need OpenCode to render a visual HUD**. Instead, we inject structured status information into the system prompt, and the agent's response becomes the "rendered" HUD. This is exactly what open-memory already does with context percentage injection. + +### 4.5 Proposed HUD Definition Schema + +Drawing from the agent definition pattern: + +```yaml +--- +# Section identity +name: context-status # unique identifier (from filename) +description: Context window usage and status + +# Rendering behavior +position: header # header | sidebar | footer | inline +priority: 10 # lower = shown first +refresh: on-event # on-event | on-demand | periodic | once +collapse-threshold: 70 # auto-collapse below this threshold + +# Data requirements +data-sources: + - context-tracker # from this plugin + - session-info # from OpenCode + +# Rendering constraints +max-length: 500 # max chars in system prompt injection +always-show: false # always inject, even when collapsed + +# Agent targeting +agents: # which agents should see this section + - build + - plan + # (null/undefined = all agents) +--- + +## Context Status + +Your context window is at {{context.percentage}}% usage ({{context.tokens}} / {{context.limit}} tokens). + +{{#if context.status.critical}} +⚠️ CRITICAL: Context usage above 92%. Consider using memory_compact() immediately. +{{else if context.status.red}} +🔴 Context usage above 85%. Consider compacting soon. +{{else if context.status.yellow}} +🟡 Context usage above 70%. Monitor but proceed normally. +{{else}} +🟢 Context usage is healthy (below 70%). +{{/if}} +``` + +### 4.6 Comparison: Agent Definitions vs. HUD Definitions + +| Dimension | Agent Definition | HUD Definition (Proposed) | +|-----------|-----------------|--------------------------| +| **Format** | YAML frontmatter + Markdown body | YAML frontmatter + template body | +| **Loading** | `.opencode/agents/*.md` | `.opencode/hud/*.md` (or plugin-scoped) | +| **Merge** | Built-in + config + user overrides | Built-in + project overrides | +| **Scope** | Per-agent (LLM call config) | Per-section (status display config) | +| **State** | None (stateless config) | Reactive data sources | +| **Output** | System prompt content | System prompt injection (agent-visible) | +| **Trigger** | User selects `@agent-name` | System prompt assembly (every turn) | +| **Data** | Static config only | Dynamic (from SSE events, DB queries) | + +### 4.7 Key Differences and Challenges + +1. **Statefulness**: Agent definitions are purely static config. HUD sections need reactive data (context percentage, session counts, git status). This requires runtime state management that doesn't exist in the agent system. + +2. **Rendering**: Agent definitions are consumed by the LLM as freeform text. HUD sections could be either: + - **Prompt-injection style** (like current open-memory context injection) — the agent "sees" the HUD + - **Tool-response style** — the agent queries HUD data via a memory tool + - The agent definition pattern suggests prompt-injection, but tool-response may be better for on-demand data + +3. **Conditional visibility**: Agent definitions have `hidden` and `mode` fields. HUD sections need richer conditions — "show only when context > 70%" or "show only when git has uncommitted changes". This is more complex than the simple boolean/enum agent system. + +4. **Layout ordering**: Agent definitions don't have a concept of ordering (they're selected by name). HUD sections need positional semantics (which section appears first, which is collapsible, etc.). + +5. **Refresh cadence**: Agent configs are loaded once. HUD data may need to refresh on events, periodically, or on-demand. The agent system has no equivalent concept. + +### 4.8 Recommended Approach + +**Phase 1: Mimic the agent definition loading pattern exactly.** + +Store HUD section templates as `.opencode/hud/*.md` with YAML frontmatter. Load them using the same `gray-matter` + glob pattern that OpenCode uses for agents. Inject them via the `experimental.chat.system.transform` hook. + +This requires no OpenCode core changes and establishes the file format convention. + +**Phase 2: Add data binding and conditional rendering.** + +Extend the template body with simple `${variable}` interpolation. The plugin maintains a reactive data store (context tracker, session stats) that fills in these variables at system prompt assembly time. + +**Phase 3: Consider proposing first-class HUD support to OpenCode.** + +If the pattern proves valuable, propose that OpenCode adds a `.opencode/hud/` directory as a first-class concept, similar to `.opencode/agents/` and `.opencode/skills/`. The loading infrastructure already exists (glob + gray-matter + merge). The new concept is just the "HUD section" schema with its position, refresh, and data-source metadata. + +--- + +## 5. Summary of Findings + +### Agent Definition System (OpenCode) + +- **Format**: YAML frontmatter + Markdown body in `.opencode/agents/*.md` +- **Schema**: `AgentConfig` with fields for model, prompt, mode, permissions, options, etc. +- **Loading**: Glob + gray-matter parsing, merged over built-in agents +- **Resolution**: Agent name derived from filename (with directory prefix for nested files) +- **Usage**: Selected per-message via `@agent-name` syntax or as default agent +- **System prompt**: Agent's `prompt` field becomes the primary system prompt (replacing provider default) +- **No state**: Agents are stateless config; state lives in sessions + +### alkhub_ts Agent Definitions + +- **8 agents** forming a coordinated workflow (architect → decomposer → implementation-specialist) +- **Rich prompts**: Detailed workflows, constraints, output templates, tool references +- **Pattern**: Primary agents for top-level use, subagents for specialized delegation +- **Innovation**: Worktree-scoped environment constraints, safe exit protocols, AAR processes + +### HUD/AUI Implications + +- The agent definition pattern (YAML frontmatter + template body, glob loading, merge strategy) translates directly to HUD section definitions +- Agent definitions prove the pattern works for declarative, project-specific configuration +- The key difference is state: agents are static config, HUD needs reactive data +- Can be implemented as a plugin without OpenCode core changes using `experimental.chat.system.transform` +- The same `.opencode/` directory convention would make HUD definitions discoverable and project-specific diff --git a/docs/research/03-handlebars-bun-compatibility.md b/docs/research/03-handlebars-bun-compatibility.md new file mode 100644 index 0000000..272bcc9 --- /dev/null +++ b/docs/research/03-handlebars-bun-compatibility.md @@ -0,0 +1,527 @@ +# Handlebars Template Engine Compatibility with Bun Runtime + +## Table of Contents + +1. [Executive Summary](#executive-summary) +2. [Handlebars in the npm Ecosystem](#handlebars-in-the-npm-ecosystem) +3. [Bun Runtime Compatibility](#bun-runtime-compatibility) +4. [Performance Benchmarks](#performance-benchmarks) +5. [Bundle Size Analysis](#bundle-size-analysis) +6. [Precompilation Support](#precompilation-support) +7. [Alternative Template Engines](#alternative-template-engines) +8. [Comparison with Plain Template Literals](#comparison-with-plain-template-literals) +9. [Existing Codebase Assessment](#existing-codebase-assessment) +10. [Build Pipeline Considerations](#build-pipeline-considerations) +11. [Recommendation](#recommendation) + +--- + +## Executive Summary + +Handlebars v4.7.9 works correctly in the Bun runtime with no native module dependencies. However, it adds significant bundle weight (~216 KB bundled, or ~40 KB runtime-only) and its CJS-only module format means `bun build` bundles the entire library rather than tree-shaking unused helpers. For the open-memory plugin, which currently uses plain TypeScript template literals for all output formatting, introducing Handlebars would be a net negative: it adds a dependency, increases bundle size by 8-46%, and provides no capability that cannot be achieved with template literals plus the existing `lines.push()` pattern already in use. + +If a template engine is needed in the future for user-facing or complex conditional templates, **Mustache** is the best lightweight option (14.8 KB bundled, logicless, ESM-compatible), and **Eta** is the best ergonomic option (16.1 KB bundled, ERB-style syntax) though it has a Bun-specific bug with compiled template invocation. + +--- + +## Handlebars in the npm Ecosystem + +| Property | Value | +|----------|-------| +| Latest version | **4.7.9** (published 2026-03-26) | +| License | MIT | +| Weekly downloads | ~25M | +| Repository | | +| Dependencies | `neo-async`, `source-map`, `uglify-js` (compiler only), `minimist` (CLI only), `wordwrap` (CLI only) | +| Native modules | **None** -- pure JavaScript, no `.node` binaries, no `node-gyp` | +| TypeScript support | `@types/handlebars` v4.1.0; `runtime.d.ts` included in package | +| ESM support | **No** -- CJS only, no `"module"` or `"exports"` field in `package.json`; Bun's CJS interop makes it work | + +### Package Structure + +``` +handlebars/ +├── lib/index.js # Main entry (CJS) +├── runtime.js # Alias for runtime-only entry +├── dist/ +│ ├── cjs/ +│ │ ├── handlebars.js # Full CJS bundle (204 KB, compiler + runtime) +│ │ └── handlebars.runtime.js # Runtime-only CJS (72 KB) +│ └── handlebars.min.js # Minified full (89 KB) +│ └── handlebars.runtime.min.js # Minified runtime (29 KB) +├── bin/ # CLI for precompilation +└── types/ + └── index.d.ts # Type declarations +``` + +The `runtime.js` entry point exports only the template execution engine (no compiler), which is the right import for production use with precompiled templates. + +--- + +## Bun Runtime Compatibility + +### Test Results + +| Test | Result | +|------|--------| +| `import Handlebars from "handlebars"` | Works (CJS interop) | +| `Handlebars.compile()` + render | Works correctly | +| `import HandlebarsRuntime from "handlebars/runtime"` | Works | +| Precompiled template spec + runtime | Works correctly | +| `require("handlebars")` | Works (CJS in Bun) | +| `bun build --target bun` bundling | Works, 44 modules bundled | +| `Handlebars.registerHelper()` (custom helpers) | Works | +| `Handlebars.Utils.escapeExpression()` | Works | + +### No Issues Found + +Handlebars is pure JavaScript with no native bindings. There are no `.node` files, no `node-gyp` build steps, and no WebAssembly dependencies. All filesystem operations in the compiler/CLI path use standard `fs` module calls that Bun supports. The core template compilation and rendering rely only on string manipulation and `Function()` constructor for generated template functions -- both supported by Bun. + +### CJS-Only Concern + +Handlebars does not ship an ESM entry point. In the `package.json`: + +```json +{ + "main": "lib/index.js", + "module": "", // intentionally empty / absent + "type": "" // CJS by default +} +``` + +This means `bun build` cannot tree-shake individual helpers or utilities -- the entire CJS module is bundled as a single chunk. In practice this means you get the full Handlebars library in your bundle even if you only use `compile()` and `escapeExpression()`. + +--- + +## Performance Benchmarks + +All benchmarks run in Bun v1.3.11 on Linux x64. 10,000 iterations each. + +### Simple Template (`Hello {{name}}!`) + +| Engine | Compile (μs/op) | Render (μs/op) | Combined (μs/op) | +|--------|------------------|-----------------|-------------------| +| Template literals | n/a | **0.17** | 0.17 | +| Mustache | 0.83 | **1.13** | 1.13* | +| Handlebars | 0.56 | 3.66 | 3.66* | +| EJS | 34.56 | 56.47* | 56.47* | +| Eta (renderString) | n/a | -- | 15** | + +*Mustache and EJS combine parse + render in their `render()` call; separate compilation benchmark provided for reference. + +**Eta has a bug in Bun with compiled template invocation (see below). + +### Complex Template (list of 20 items with conditional formatting) + +| Engine | Compile (μs/op) | Render (μs/op) | +|--------|------------------|-----------------| +| Template literals | n/a | **6.25** | +| Mustache | 0.47 | 18.13 | +| Handlebars | 0.70 | 18.76 | +| Eta (renderString) | n/a | 14.64 | +| EJS | 34.56 | 56.47 | + +### Key Takeaways + +1. **Template literals are ~3-30x faster** than any template engine for rendering, and ~3-10x faster even than pre-compiled engine-render paths. +2. **Handlebars and Mustache render performance** are nearly identical (~18 μs/op for complex templates). Handlebars has slightly slower render due to its richer helper system. +3. **EJS is by far the slowest** due to its `Function()` constructor approach and `with()` statement for scoping. +4. **Compilation cost is negligible** for all engines except EJS. Pre-compiling at build time saves ~1 μs at runtime -- not meaningful unless you're compiling hundreds of unique templates per second. +5. For the open-memory plugin, which renders ~1 template per tool call invocation, even the slowest engine would add under 60 μs per call. Render performance is not a concern; **bundle size is the deciding factor**. + +--- + +## Bundle Size Analysis + +### Standalone Engine Bundle Size + +Bundled with `bun build --target bun --format esm`, minimal test program: + +| Engine | Bundled Size | Modules | Notes | +|--------|-------------|---------|-------| +| **(none - template literals)** | **0 B** | 0 | Zero-dependency | +| Mustache | 14.8 KB | 2 | Smallest engine | +| Eta | 16.1 KB | 2 | ESM-native | +| EJS | 21.5 KB | 3 | Includes `jake` and async utilities | +| **Handlebars (runtime only)** | **40.4 KB** | 22 | For use with precompiled templates | +| **Handlebars (full)** | **216.8 KB** | 44 | Includes compiler + all built-in helpers | + +### Impact on open-memory Plugin + +The current open-memory plugin bundle is **474 KB** (mostly `@opencode-ai/plugin` + `@opencode-ai/sdk`). + +| Addition | Size Added | % Increase | +|----------|-----------|------------| +| Mustache | +14.8 KB | +3.1% | +| Eta | +16.1 KB | +3.4% | +| EJS | +21.5 KB | +4.5% | +| Handlebars runtime-only | +40.4 KB | +8.5% | +| Handlebars full | +216.8 KB | **+45.7%** | + +A 46% bundle size increase for Handlebars-full is unacceptable for a plugin loaded at OpenCode startup. Even the runtime-only variant adds 40 KB for template rendering capability already achievable with template literals. + +### Handlebars Runtime vs. Full + +The runtime-only bundle (`handlebars/runtime`) at 40.4 KB includes: +- Template execution engine +- `escapeExpression()` for HTML escaping +- Built-in helpers (`if`, `unless`, `each`, `with`, `log`, `lookup`) +- SafeString class +- Data tracking + +The full bundle at 216.8 KB additionally includes: +- The AST compiler (parses `{{}}` syntax into template functions) +- The JavaScript compiler (generates function source from AST) +- The printer (AST → source text) +- Source map generation + +If using precompiled templates, you only need the runtime. + +--- + +## Precompilation Support + +Handlebars supports template precompilation, which separates the compile step (build time) from the render step (runtime). + +### Precompile CLI + +```bash +npx handlebars src/templates/ -f dist/templates.js \ + --commonjs handlebars/runtime \ + --known each \ + --known if \ + --known unless +``` + +This produces a JS module containing precompiled template function specifications that can be instantiated with only the runtime: + +```typescript +import HandlebarsRuntime from "handlebars/runtime"; + +// Template spec from precompile (could be imported from a generated file) +const templateSpec = {"compiler":[8,">=4.3.0"],"main":function(container,depth0,...){...},"useData":true}; + +const template = HandlebarsRuntime.template(templateSpec); +console.log(template({ name: "World" })); // "Hello World!" +``` + +### Precompile API + +```typescript +import Handlebars from "handlebars"; + +// At build time +const spec = Handlebars.precompile("Hello {{name}}!"); +// spec is a JSON-safe object string containing the template function source + +// At runtime (only needs handlebars/runtime, 40 KB) +import HandlebarsRuntime from "handlebars/runtime"; +const template = HandlebarsRuntime.template(eval("(" + spec + ")")); +``` + +### Feasibility for open-memory + +Precompilation is feasible but adds complexity to the build pipeline. Since the open-memory plugin currently has only 4-5 formatting functions (all in `src/history/format.ts` and `src/compaction/prompt.ts`), the overhead of setting up precompilation is unjustified. Precompiled templates would save ~176 KB (full - runtime = 216.8 - 40.4) at the cost of a custom build step, with no meaningful runtime performance gain for templates called once per tool invocation. + +--- + +## Alternative Template Engines + +### Mustache (v4.2.0) + +| Property | Value | +|----------|-------| +| License | MIT | +| Philosophy | Logic-less templates -- no `if`, no `for`, only sections | +| ESM Support | Yes (conditional exports in `package.json`) | +| Dependencies | None | +| Bundle size | **14.8 KB** | +| Bun compatibility | Works perfectly | +| TypeScript types | `@types/mustache` | + +```typescript +import Mustache from "mustache"; +Mustache.render("Hello {{name}}!", { name: "World" }); +``` + +**Strengths**: Smallest bundle, zero dependencies, works in Bun, well-understood spec, XSS-safe by default. + +**Weaknesses**: No logic at all -- cannot do conditional formatting without data preprocessing. For example, you cannot render `"No sessions found."` vs. a table based on row count without preparing the data model to include a flag. This is a significant limitation for the open-memory plugin's formatting needs. + +### Eta (v4.5.1) + +| Property | Value | +|----------|-------| +| License | MIT | +| Philosophy | Lightweight ERB-style templates, ESM-native | +| ESM Support | Yes (`"type": "module"`, dual CJS/ESM exports) | +| Dependencies | None | +| Bundle size | **16.1 KB** | +| Bun compatibility | **Partial** -- `renderString()` works, but compiled template invocation fails with `TypeError: undefined is not an object (evaluating 'this.config.escapeFunction')` | +| TypeScript types | Built-in | + +```typescript +import { Eta } from "eta"; +const eta = new Eta(); +eta.renderString("Hello <%= it.name %>!", { name: "World" }); +``` + +**Strengths**: ERB-style syntax (`<%= %>`, `<% %>`) familiar to many developers, ESM-native, very small, configurable delimiters. + +**Weaknesses**: The compiled template bug in Bun is a blocker for production use. The `compile()` method produces a function that references `this.config` on a context that is `undefined` when invoked in Bun. This appears to be a `this`-binding issue in Bun's ESM module evaluation. + +**Workaround**: Use `renderString()` only (no separate compile step). This is fine for the plugin's use case but eliminates the precompilation advantage. + +### EJS (v5.0.2) + +| Property | Value | +|----------|-------| +| License | Apache-2.0 | +| Philosophy | Embedded JavaScript templates | +| ESM Support | Yes (dual CJS/ESM) | +| Dependencies | None (previously had jake, asap; now zero) | +| Bundle size | **21.5 KB** | +| Bun compatibility | Works | +| TypeScript types | `@types/ejs` | + +```typescript +import ejs from "ejs"; +ejs.render("Hello <%= name %>!", { name: "World" }); +``` + +**Strengths**: Familiar syntax, async rendering support, includes, layouts. + +**Weaknesses**: **Slowest engine** in benchmarks (56 μs/op for complex templates). Uses `Function()` constructor which is a security concern if templates contain user input (not relevant for open-memory, but worth noting). No logic-less mode -- templates can execute arbitrary JS. + +### Plain Template Literals (No Dependency) + +```typescript +// Current open-memory pattern +export const formatSessionList = (rows: Record[]): string => { + if (rows.length === 0) return "No sessions found."; + const lines: string[] = ["# Recent Sessions\n"]; + lines.push("| ID | Title | Updated | Messages |"); + lines.push("|----|-------|---------|----------|"); + for (const row of rows) { + lines.push(`| ${row.id} | ${row.title} | ${row.updated} | ${row.msgs} |`); + } + return lines.join("\n"); +}; +``` + +**Strengths**: Zero bundle cost, fastest rendering, full TypeScript type safety, no dependency to maintain, no security surface. + +**Weaknesses**: Verbose for complex conditional formatting. Harder to visually parse the output format from code. No built-in HTML escaping (irrelevant for this plugin which outputs plain text/Markdown). + +--- + +## Comparison with Plain Template Literals + +The open-memory plugin currently formats all output using TypeScript template literals and the `lines.push()` pattern. Here is an assessment of whether Handlebars would improve each formatting function: + +### `formatSessionList()` (format.ts) + +```typescript +// Current: 23 lines, clear, zero dependencies +// Handlebars equivalent: +const template = Handlebars.compile(` +# Recent Sessions +{{#if sessions.length}} +| ID | Title | Updated | Messages | +|----|-------|---------|----------| +{{#each sessions}} +| {{id}} | {{title}} | {{updated}} | {{msgs}} | +{{/each}} +{{else}} +No sessions found. +{{/if}} +`); +``` + +The Handlebars version is arguably more readable for the template structure, but adds a 216 KB dependency for marginal readability improvement. + +### `formatMessageList()` (format.ts) + +```typescript +// Current: 30 lines, with role icons, truncation logic, separator lines +// Handlebars would need a custom helper for truncation and role icons +// → Handlebars adds complexity, not simplicity +``` + +### `getCompactionPrompt()` (prompt.ts) + +```typescript +// Current: 42 lines of static template text +// This is a static string, not a dynamic template at all +// Handlebars would be pure overhead +``` + +### Verdict + +For the open-memory plugin's current formatting needs (4-5 functions, ~120 lines total), template literals are the right choice. Template engines become valuable when you have: +- Many templates (20+) that need to be maintained separately from code +- Non-developers editing templates +- Complex conditional rendering with repeated patterns +- Internationalization / localization requirements + +None of these apply to open-memory currently. + +--- + +## Existing Codebase Assessment + +### Current Dependencies (package.json) + +```json +{ + "dependencies": { + "@opencode-ai/plugin": "^1.1.3" + }, + "devDependencies": { + "@types/bun": "^1.2.0", + "@types/node": "^20.14.0", + "typescript": "^5.7.3" + } +} +``` + +**No template engine dependency exists.** All formatting is done with: +1. Template literals (`` `Hello ${name}!` ``) for simple interpolation +2. `lines.push()` + `lines.join("\n")` pattern for multi-line structured output +3. `String(row.field ?? "default")` for safe data access +4. `text.slice(0, maxLen)` for truncation + +These patterns are used consistently across: +- `src/history/format.ts` -- 3 functions, 73 lines +- `src/history/search.ts` -- 1 function, 61 lines +- `src/tools.ts` -- inline formatting in handlers (session lists, compaction tables, context status) +- `src/compaction/prompt.ts` -- 1 static template, 42 lines + +Total template-related code: ~250 lines across 4 files. Not enough to justify a template engine dependency. + +--- + +## Build Pipeline Considerations + +### Current Build Setup + +```json +{ + "scripts": { + "build": "bun build src/index.ts --outdir dist --target bun --format esm && tsc --emitDeclarationOnly" + } +} +``` + +The build uses `bun build` (Bun's native bundler) with `--target bun --format esm`. This produces a single ESM bundle at `dist/index.js` (currently 474 KB). + +### How `bun build` Handles Handlebars + +When `bun build` encounters `import Handlebars from "handlebars"`: + +1. It resolves `handlebars` through Bun's module resolution (looks in `node_modules`) +2. Since Handlebars is CJS with no ESM entry, Bun's CJS interop wraps it +3. The bundler traces all reachable exports and includes them in the output +4. **No tree-shaking occurs** because CJS exports are dynamic by nature +5. The entire Handlebars library (compiler + runtime + helpers) is included: **216.8 KB bundled** + +With precompiled templates and `import HandlebarsRuntime from "handlebars/runtime"`: +1. Only the runtime entry point is resolved +2. Still CJS, so still no tree-shaking +3. But only 22 modules (vs. 44): **40.4 KB bundled** + +### Custom Build Steps + +If using Handlebars precompilation, the build pipeline would become: + +```bash +# Step 1: Precompile templates (new step) +npx handlebars src/templates/ -f src/generated/templates.ts --commonjs handlebars/runtime + +# Step 2: Existing build +bun build src/index.ts --outdir dist --target bun --format esm + +# Step 3: Existing type declaration +tsc --emitDeclarationOnly +``` + +This adds toolchain complexity for minimal benefit. Precompiled template specs would also need TypeScript type declarations. + +--- + +## Recommendation + +### For the open-memory Plugin: Do NOT Add Handlebars + +**Rationale:** + +| Factor | Template Literals | Handlebars | +|--------|-------------------|------------| +| Bundle size impact | 0 KB | +40 KB (runtime) / +217 KB (full) | +| Dependencies added | 0 | 1 (plus transitive deps) | +| Build complexity | None | None (runtime) or added step (precompile) | +| Rendering speed | ~6 μs | ~19 μs | +| Code readability | Moderate | Slightly better for complex templates | +| Maintainability | TypeScript-native | New template syntax, separate .hbs files | +| Security surface | None | Template injection (mitigated by no user input) | + +The open-memory plugin has: +- ~250 lines of template code across 4 files +- Simple formatting (Markdown tables, lists, status lines) +- No user-editable templates +- No internationalization needs +- No complex conditional logic beyond `if (rows.length === 0)` +- Startup-time load concerns (OpenCode loads plugins at session start) + +Adding Handlebars would increase the bundle by 8-46% for zero functional benefit. + +### If a Template Engine Is Needed in the Future + +If the formatting requirements grow significantly (e.g., user-configurable output templates, i18n, dozens of templates), the recommended priority order is: + +1. **Mustache** (14.8 KB) -- If you need only interpolation and section-based logic. Smallest footprint, zero dependencies, works in Bun, XSS-safe by default. The "logic-less" constraint forces cleaner data modeling. + +2. **Eta** (16.1 KB) -- If you need ERB-style control flow (`<% if (...) { %>`) and are willing to use `renderString()` only (avoid the compiled-template `this` binding bug in Bun). ESM-native, excellent TypeScript support, configurable. + +3. **Handlebars runtime-only** (40.4 KB) -- If you need Handlebars features (partials, custom helpers, precompilation workflow) and can accept the larger bundle. Use with precompiled templates only -- do not bundle the full Handlebars compiler. + +4. **Handlebars full** (216.8 KB) -- Only if you need runtime template compilation (e.g., user-provided templates). Not recommended for plugins. + +5. **EJS** -- Not recommended. Slowest engine, security concerns with `Function()` constructor, minimal advantages over Eta. + +### Template Literal Best Practices (Current Approach) + +For now, continue using template literals but consider these improvements: + +```typescript +// Helper for markdown tables (type-safe) +function markdownTable(headers: string[], rows: string[][]): string { + const headerLine = `| ${headers.join(" | ")} |`; + const separatorLine = `| ${headers.map(() => "---").join(" | ")} |`; + const dataLines = rows.map(row => `| ${row.join(" | ")} |`); + return [headerLine, separatorLine, ...dataLines].join("\n"); +} + +// Use tagged templates for multi-line strings +const compactionPrompt = String.raw` +You are compacting your own session to free context space. +... +`; +``` + +This keeps the zero-dependency advantage while reducing the `lines.push()` boilerplate. + +--- + +## Appendix: Test Environment + +- **Runtime**: Bun v1.3.11 (Linux x64) +- **Node compatibility**: Handlebars tested on Node v22+ (works) +- **Bundle target**: `--target bun --format esm` +- **Benchmark**: 10,000 iterations per test, single-threaded, warmed up +- **Template complexity**: Simple (`Hello {{name}}!`) and complex (20-item list with conditionals) +- **All engines tested**: Handlebars 4.7.9, Mustache 4.2.0, Eta 4.5.1, EJS 5.0.2 + +--- + +*Research conducted 2026-04-22. Versions and benchmarks reflect the state of npm at the time of writing.* diff --git a/docs/research/04-hud-architecture.md b/docs/research/04-hud-architecture.md new file mode 100644 index 0000000..da14589 --- /dev/null +++ b/docs/research/04-hud-architecture.md @@ -0,0 +1,1347 @@ +# HUD/AUI Architecture: Persistent State Injection via System Prompts + +## Table of Contents + +1. [Executive Summary](#1-executive-summary) +2. [Problem Statement](#2-problem-statement) +3. [State Management](#3-state-management) +4. [Gradual Disclosure](#4-gradual-disclosure) +5. [System Prompt Injection Mechanism](#5-system-prompt-injection-mechanism) +6. [HUD Data Sources](#6-hud-data-sources) +7. [HUD Update Tools](#7-hud-update-tools) +8. [Relationship to Existing open-memory Tools](#8-relationship-to-existing-open-memory-tools) +9. [Implementation Plan](#9-implementation-plan) +10. [Risks and Open Questions](#10-risks-and-open-questions) +11. [Key File Reference](#key-file-reference) + +--- + +## 1. Executive Summary + +This document proposes an **Agent User Interface (HUD/AUI)** system for `@alkdev/open-memory` that injects structured, persistent state into the system prompt on every LLM call. The HUD gives the agent continuous awareness of its context, task state, and session history without requiring tool calls to check status. It supplements -- and in some scenarios replaces -- compaction as the primary context management mechanism. + +**Core mechanism**: The `experimental.chat.system.transform` plugin hook is called before every LLM call. The plugin reconstructs the HUD from its current state and pushes it onto the `system` array. This is the same hook already used for context percentage injection; the HUD extends it into a richer, multi-section document. + +**Key insight**: The HUD does **not** need to persist in the database or in messages. It is an ephemeral system prompt injection reconstructed from live state on every call. This means: +- It automatically survives compaction (injected fresh after each compaction cycle) +- It is always current (rebuilt on every LLM call) +- It does not consume context beyond the current call's injection +- It does not interfere with conversation history + +--- + +## 2. Problem Statement + +### 2.1 What Compaction Gets Wrong + +Current compaction is a lossy, all-or-nothing mechanism: + +1. **Information is discarded**: When compaction fires, the full conversation is replaced with an LLM-generated summary. Nuance, code snippets, exact error messages, and decision rationale are lost. + +2. **Timing is suboptimal**: Compaction fires at ~92% (automatic) or when the agent calls `memory_compact`. Either way, the agent loses the ability to refer back to earlier conversation content. + +3. **No continuous awareness**: Between compaction events, the agent has no structured view of its own state -- it relies on reading back through the conversation to re-orient. + +4. **The "blasted with history" problem**: After compaction, the agent receives the entire summary at once. The agent has no way to selectively expand sections of interest without calling `memory` tool operations that consume additional context. + +### 2.2 What a HUD Provides + +A HUD solves these problems by providing: + +1. **Continuous, structured state**: The agent always sees its current status (context %, active task, recent files) at the top of every system prompt. + +2. **Gradual disclosure**: Sections can be collapsed or expanded based on context usage, budget, or agent intent. The agent sees summaries by default and uses tools to drill deeper. + +3. **Complements compaction**: The HUD doesn't replace compaction for long conversations, but it reduces the need for it by keeping critical information in a small, always-present token budget. + +4. **Agent-maintained state**: The agent can update HUD sections (notes, task status, key decisions) via tools, giving it a structured memory that persists across tool calls and survives compaction. + +--- + +## 3. State Management + +### 3.1 Where HUD State Lives + +The HUD has two categories of state: + +| Category | Examples | Storage | Lifetime | +|----------|----------|---------|----------| +| **Derived** (auto-computed) | Context %, session count, model name | In-memory (from `ContextTracker`) | Within session | +| **Agent-maintained** (mutable) | Task notes, key decisions, file list | File system (JSON) | Across sessions | + +**Derived state** uses the same pattern as the existing `ContextTracker`: event-driven updates via the `event` hook, stored in a `Map`. This state is ephemeral -- it resets on plugin reload. + +**Agent-maintained state** is the new capability. It needs to survive across: +- Tool calls within a session (it must be immediately available after update) +- Compaction (the system prompt injection ensures visibility after compaction) +- Plugin restarts (persisted to disk) +- Potentially, across sessions (same project) + +### 3.2 File System Storage + +For agent-maintained state, we propose a JSON file per session: + +``` +${XDG_DATA_HOME:-$HOME/.local/share}/opencode/hud/ +├── sessions/ +│ └── ses_abc123.json # Per-session HUD state +└── project/ + └── .json # Project-level HUD defaults +``` + +**Per-session file** (`sessions/{sessionId}.json`): + +```typescript +interface HudState { + // Metadata + sessionId: string + projectPath: string + lastUpdated: number + + // Sections the agent can edit + currentTask: string | null // What am I working on? + keyDecisions: string[] // Important decisions made + activeFiles: string[] // Files currently being worked on + notes: string[] // Freeform notes + blockers: string[] // Things preventing progress + nextSteps: string[] // Planned next actions + + // Sections auto-maintained by the plugin + compactedContext: string | null // Summary from last compaction (if any) +} +``` + +**Why JSON, not Markdown?** +- JSON is machine-readable and writable; no parsing ambiguity +- The HUD rendering is done by TypeScript code that produces Markdown for the prompt +- The `memory` tool's existing output is Markdown, but its inputs are structured (args) +- Tool updates are atomic key-value operations, not freeform text editing + +**Why per-session files?** +- Sessions are the natural scope: each conversation has its own context +- OpenCode's DB uses session-scoped data (todos, messages, compaction summaries) +- Per-session files are simple to implement and reason about +- A project-level file provides defaults that new sessions inherit + +### 3.3 Survival Across Tool Calls + +Within a session, HUD state changes must be immediately visible. The flow is: + +``` +Agent calls hud_update({section: "currentTask", value: "Implementing auth middleware"}) + → Tool handler updates in-memory state + writes to file + → Next LLM call triggers system.transform hook + → Hook reads in-memory state (or file if cold start) + → HUD section shows "Current Task: Implementing auth middleware" +``` + +The in-memory state is a `Map` keyed by session ID. On update, we: +1. Update the in-memory map (immediate availability) +2. Write to disk asynchronously (durability) + +On the next `system.transform` call, the hook reads from the in-memory map. If the session isn't in memory (cold start after plugin reload), it reads from disk. + +### 3.4 Survival Across Compaction + +The HUD **automatically survives compaction** because: + +1. The `system.transform` hook is called on every LLM call, including after compaction +2. The HUD is reconstructed from in-memory state + file, not from conversation messages +3. Compaction removes old messages but does not touch plugin state or files + +However, **compaction summaries** are particularly valuable for the HUD. When a compaction occurs: +- The `session.compacted` event fires +- We can query the compaction summary from the DB and store it in `compactedContext` in the HUD state +- This summary then appears in the HUD on every subsequent call, preserving the key information from before compaction + +### 3.5 Survival Across Sessions + +For cross-session continuity, we use a project-level defaults file: + +```typescript +interface ProjectHudDefaults { + projectPath: string + projectNotes: string[] // Persistent project-level notes + keyFiles: string[] // Important project files to always show + conventions: string[] // Project conventions to remember + lastUpdated: number +} +``` + +When a new session starts, the HUD can show project-level defaults as a starting state. The agent then updates the session-scoped HUD as work proceeds. + +**Concurrent sessions**: Each session has its own HUD state file. Concurrent sessions work naturally because they don't share mutable state. The `system.transform` hook receives `sessionID`, so it loads the correct state for each session. + +### 3.6 In-Memory Architecture + +```typescript +class HudManager { + private sessions = new Map() + private ctx: PluginInput + private hudDir: string + + constructor(ctx: PluginInput) { + this.ctx = ctx + this.hudDir = `${process.env.XDG_DATA_HOME || `${process.env.HOME}/.local/share`}/opencode/hud` + } + + // Get HUD state for a session (in-memory, with file fallback) + getState(sessionId: string): HudState { + if (this.sessions.has(sessionId)) { + return this.sessions.get(sessionId)! + } + // Cold start: load from file + const state = this.loadFromDisk(sessionId) + this.sessions.set(sessionId, state) + return state + } + + // Update a section of the HUD + updateSection(sessionId: string, section: keyof HudState, value: unknown): HudState { + const state = this.getState(sessionId) + // Type-safe update + ;(state as any)[section] = value + state.lastUpdated = Date.now() + // Async persist + this.persistToDisk(sessionId, state).catch(() => {}) + return state + } + + // Append-based updates for array sections + appendToSection(sessionId: string, section: "keyDecisions" | "notes" | "activeFiles" | "blockers" | "nextSteps", item: string): HudState { + const state = this.getState(sessionId) + const arr = state[section] as string[] + if (!arr.includes(item)) { + arr.push(item) + state.lastUpdated = Date.now() + this.persistToDisk(sessionId, state).catch(() => {}) + } + return state + } + + // Load from file + private loadFromDisk(sessionId: string): HudState { + try { + const data = Bun.file(`${this.hudDir}/sessions/${sessionId}.json`).jsonSync() + return data as HudState + } catch { + // New session: return defaults + return { + sessionId, + projectPath: this.ctx.project?.path ?? "", + lastUpdated: Date.now(), + currentTask: null, + keyDecisions: [], + activeFiles: [], + notes: [], + blockers: [], + nextSteps: [], + compactedContext: null, + } + } + } + + // Persist to file (async, fire-and-forget) + private async persistToDisk(sessionId: string, state: HudState): Promise { + await Bun.write( + `${this.hudDir}/sessions/${sessionId}.json`, + JSON.stringify(state, null, 2), + ) + } +} +``` + +This mirrors the existing `ContextTracker` pattern (in-memory map with event-driven updates) but adds file persistence. + +--- + +## 4. Gradual Disclosure + +### 4.1 The "Not Blasted with Entire History" Problem + +The user's requirement: "how and if we could do this such that the agents aren't blasted with the entire history at once." + +This is the core UX challenge. A naive HUD that dumps everything into the system prompt on every call would: +- Consume too many tokens (history summaries can be thousands of tokens) +- Provide diminishing returns (the agent can't act on all that information simultaneously) +- Compound across calls (the same 2000-token HUD is re-sent every turn) + +### 4.2 Tiered Disclosure Strategy + +The solution is a **tiered disclosure model** with three levels: + +| Tier | Token Budget | When Shown | Content | +|------|-------------|------------|---------| +| **Status** | 150-300 tokens | Every call | Context %, model, task summary, alert flags | +| **Summary** | 300-600 tokens | When context < 70% (green) | Status + recent decisions, active files, notes previews | +| **Detail** | 600-1000 tokens | On explicit request only (via `memory` tool) | Full notes, decisions with rationale, expanded file list | + +**Status tier** (always shown, ~150-300 tokens): + +``` +## State +🟢 Context: 45% used (90,000 / 200,000 tokens, anthropic/claude-sonnet-4-20250514) +Task: Implementing auth middleware +Files: 3 active | Decisions: 2 | Notes: 4 | Blockers: 0 +``` + +**Summary tier** (shown when context is green, ~400-600 tokens): + +``` +## State +🟢 Context: 45% used (90,000 / 200,000 tokens, anthropic/claude-sonnet-4-20250514) + +### Current Task +Implementing auth middleware -- JWT validation + refresh token rotation + +### Key Decisions +- Using RS256 for JWT signing (not HS256) for key rotation support +- Refresh tokens stored hashed in DB, not plaintext + +### Active Files +- src/middleware/auth.ts (editing) +- src/routes/login.ts (editing) +- src/config/auth.ts (referencing) + +### Notes (showing first 2 of 4, use `memory({tool: "hud_notes"})` for all) +- Refresh token TTL: 7 days +- Rate limit: 5 requests/min for login endpoint + +### Next Steps +1. Complete JWT validation logic +2. Add refresh token rotation endpoint +3. Write integration tests +``` + +**Detail tier** (only via `memory` tool, on-demand): + +The agent calls `memory({tool: "hud_notes"})` or `memory({tool: "hud_decisions"})` to get the full content. This costs a tool call but does not inject into the system prompt. + +### 4.3 Context-Adaptive Rendering + +The HUD should **adapt its detail level based on context usage**: + +```typescript +function renderHud(state: HudState, contextInfo: ContextInfo | null): string { + const percentage = contextInfo?.percentage ?? 0 + + // Always show status line + const lines: string[] = ["## State"] + lines.push(renderContextLine(contextInfo)) + + if (state.currentTask) { + lines.push(`**Task**: ${state.currentTask}`) + } + + // Summary counts (always shown -- very cheap) + const counts = [ + state.keyDecisions.length && `Decisions: ${state.keyDecisions.length}`, + state.activeFiles.length && `Files: ${state.activeFiles.length}`, + state.notes.length && `Notes: ${state.notes.length}`, + state.blockers.length && `Blockers: ${state.blockers.length}`, + ].filter(Boolean) + if (counts.length) lines.push(counts.join(" | ")) + + // Below green threshold: show full summaries + if (percentage < THRESHOLDS.yellow) { + lines.push(renderFullSummary(state)) + } + + // Yellow threshold: show abbreviated summaries + if (percentage >= THRESHOLDS.yellow && percentage < THRESHOLDS.red) { + lines.push(renderAbbreviatedSummary(state)) + } + + // Red/critical: status line only, with compact advisory + if (percentage >= THRESHOLDS.red) { + lines.push("⚠ Use memory({tool: 'hud_notes'}) to view detailed state.") + } + + // Always show blockers (critical info regardless of context level) + if (state.blockers.length > 0) { + lines.push("### ⚠ Blockers") + for (const b of state.blockers) lines.push(`- ${b}`) + } + + // Always show compacted context if present (essential after compaction) + if (state.compactedContext) { + lines.push("### Compacted Context") + lines.push(state.compactedContext) + } + + return lines.join("\n") +} +``` + +### 4.4 Token Budget Accounting + +We need to be disciplined about token costs. Approximate token costs: + +| Section | Detail Level | Approx. Tokens | When Shown | +|---------|-------------|---------------|------------| +| Context status line | Minimal | ~30 | Always | +| Task line | Minimal | ~20 | Always | +| Summary counts | Minimal | ~25 | Always | +| Key decisions (summaries) | Abbreviated | ~80 | Green/Yellow | +| Key decisions (full) | Full | ~200 | Green only | +| Active files | Abbreviated | ~50 | Green/Yellow | +| Notes preview | Abbreviated | ~100 | Green only | +| Blockers | Full | ~50 | Always (if any) | +| Compacted context | Full | ~100-300 | Always (if present) | + +**Total worst case (green, all sections)**: ~700-1000 tokens +**Total typical (yellow)**: ~300-500 tokens +**Total minimal (red/critical)**: ~100-200 tokens + +Compared to the existing context injection (~50 tokens for just the status line), this is a meaningful increase. But compared to a compaction summary (typically 2000-5000 tokens), it's 5-20x more efficient for preserving the most critical state. + +### 4.5 Prompt Caching Considerations + +OpenCode uses a 2-part system prompt structure for caching (analyzed in the compaction research doc): + +1. **Block 1**: Provider prompt (static across calls within a session) +2. **Block 2**: Everything else (agent instructions, context, HUD) + +Since the HUD content changes on every call (context % updates, task changes), it's part of Block 2. This means the HUD breaks prompt caching for Block 2 on every call. + +**Mitigation strategies**: + +1. **Minimize per-call changes**: Only update the HUD content that actually changed since the last call. If nothing changed, the HUD string is identical and can benefit from caching. + +2. **Separate static and dynamic sections**: The task description, decisions, and notes change infrequently. Only the context percentage changes on every call. We could push the static sections as one string and the dynamic context line as another string, allowing the static part to be cached. + +```typescript +// Instead of one push: +output.system.push(renderHud(state, contextInfo)) + +// Consider two pushes: +output.system.push(renderHudStatic(state)) // cached across calls +output.system.push(renderHudDynamic(contextInfo)) // changes every call +``` + +However, this optimization is only meaningful if OpenCode's caching implementation respects multiple `system` array entries. From the research (see section 5), OpenCode already recombines system messages for caching. The key insight: pushing 2 strings allows OpenCode to potentially cache the first while the second changes. + +In practice, the HUD is small enough (~300-700 tokens) that the caching impact is acceptable. Models with prompt caching (Claude 3.5+, GPT-4o) cache at the prefix level, so only the changed part re-enters. + +--- + +## 5. System Prompt Injection Mechanism + +### 5.1 How `experimental.chat.system.transform` Works + +The hook is defined in the plugin SDK (`/workspace/opencode/packages/plugin/src/index.ts:251-256`): + +```typescript +"experimental.chat.system.transform"?: ( + input: { sessionID?: string; model: Model }, + output: { + system: string[] + }, +) => Promise +``` + +**Invocation sites**: + +1. **Primary** (`/workspace/opencode/packages/opencode/src/session/llm.ts:116-126`): + ```typescript + const header = system[0] + await Plugin.trigger( + "experimental.chat.system.transform", + { sessionID: input.sessionID, model: input.model }, + { system }, + ) + // rejoin to maintain 2-part structure for caching if header unchanged + if (system.length > 2 && system[0] === header) { + const rest = system.slice(1) + system.length = 0 + system.push(header, rest.join("\n")) + } + ``` + Called on every `LLM.stream()` invocation -- every assistant turn in the session loop. + +2. **Agent generation** (`/workspace/opencode/packages/opencode/src/agent/agent.ts:340`): + Called during the one-time agent generation call. No `sessionID` is passed here. + +**Key behaviors**: + +- The `output.system` array is **mutable** -- plugins can `push()` new strings or modify existing ones +- Multiple plugins modify the same `system` array in registration order +- After all plugins run, OpenCode optimizes the array for caching: if the first element (provider prompt) is unchanged and there are >2 elements, it recombines the extras into a single second element +- **The hook is called on every LLM call**, not just session start +- **The hook is async** -- it can perform I/O operations (DB queries, file reads) +- **sessionID is optional** -- it's absent during agent generation. Plugins must handle this. + +### 5.2 System Prompt Assembly Order + +From the LLM source code and prior research, the full system prompt is assembled in this order: + +1. **Agent prompt** (from `.opencode/agents/*.md`) or provider default prompt +2. **Custom system** (from plugin hooks, compaction, plan mode injection) +3. **User-provided system prompt** (from the user message) +4. **Plugin modifications** via `experimental.chat.system.transform` +5. **Environment info** (model name, working directory, platform, date) +6. **Skills list** (available skills/tools) +7. **Instruction files** (AGENTS.md, CLAUDE.md) + +After all plugins run: +8. **Caching optimization**: recombine system messages into 2 blocks if first element is unchanged + +The HUD injection happens at step 4. It appears **after** the agent/system prompt but **before** environment info and instruction files. This is a good position: it's visible to the agent but doesn't interfere with higher-priority instructions. + +### 5.3 What the Agent Sees + +Example system prompt structure with HUD injection: + +``` +[System Message 1: Agent prompt + custom system] +"You are Claude, an AI assistant..." + +[System Message 2: Plugin injections + environment + skills + instructions] +"🟢 Context: 45% used (90,000/200,000 tokens) +## State +... +Here are the available tools... +Current date: 2026-04-22 +Instructions from: AGENTS.md..." +``` + +After OpenCode's caching optimization, the HUD is merged into the second system message block. This is efficient for caching -- the first block (agent prompt) rarely changes and benefits from caching. + +### 5.4 Current open-memory Implementation + +The existing injection in `/workspace/@alkdev/open-memory/src/index.ts:16-49`: + +```typescript +"experimental.chat.system.transform": async (input, output) => { + if (!input.sessionID) return; + + const info = contextTracker.getContextInfo(input.sessionID); + if (!info) return; + + const statusEmoji = /* ... */; + const advisory = /* ... */; + const lines = [ + `${statusEmoji} Context: ${info.percentage}% used (...)`, + ]; + if (advisory) lines.push(advisory); + output.system.push(lines.join("\n")); +}, +``` + +**This is ~50 tokens per call.** The HUD would extend this to ~300-700 tokens per call, depending on the tier. + +### 5.5 Extending the Current Hook + +The HUD extension is straightforward -- we extend the existing `system.transform` hook: + +```typescript +"experimental.chat.system.transform": async (input, output) => { + if (!input.sessionID) return; + + const sessionId = input.sessionID; + const contextInfo = contextTracker.getContextInfo(sessionId); + const hudState = hudManager.getState(sessionId); + + // Render HUD based on context level + const hud = renderHud(hudState, contextInfo); + output.system.push(hud); +}, +``` + +The `renderHud` function handles tiered rendering as described in section 4.3. + +--- + +## 6. HUD Data Sources + +### 6.1 Event-Driven Data (Real-Time) + +These data sources are updated via the `event` hook, mirroring the existing `ContextTracker` pattern: + +| Data | Event Source | Tracking | +|------|-------------|----------| +| Context % / tokens | `message.updated` (assistant messages) | Already tracked in `ContextTracker` | +| Compaction occurrence | `session.compacted` event | New: trigger HUD summary update | +| File edits | `file.edited` event | New: track recently edited files | +| Todo status | `todo.updated` event | New: track task status | + +**Available SSE events** (from OpenCode source): + +| Event | Schema | Usable for HUD? | +|-------|--------|----------------| +| `message.updated` | `{ sessionID, info: Message }` | Yes -- context tracking (existing) | +| `message.part.updated` | `{ sessionID, part, time }` | Possible -- tool call tracking | +| `message.part.delta` | `{ sessionID, ... }` | No -- streaming delta, too frequent | +| `session.created` | `{ sessionID, info }` | Minimal value | +| `session.updated` | `{ sessionID, info }` | Yes -- title, status changes | +| `session.compacted` | `{ sessionID }` | Yes -- trigger summary update | +| `session.diff` | `{ sessionID, diff }` | Possible -- file change tracking | +| `file.edited` | `{ file }` | Yes -- track recently edited files | +| `todo.updated` | `{ sessionID, todos }` | Yes -- task status | + +### 6.2 On-Demand Data (Queried at Render Time) + +These are queried fresh from the database when the HUD is rendered: + +| Data | Source | Query | +|------|--------|-------| +| Session title | `session` table | `SELECT title FROM session WHERE id = ?` | +| Compaction count | `message` + `part` tables | Count compaction parts for session | +| Session start time | `session` table | `SELECT time_created FROM session WHERE id = ?` | +| Project name | `project` table | Joined with session | + +**Performance consideration**: These queries run on every `system.transform` call. Since `bun:sqlite` in readonly mode is sub-millisecond for indexed queries, 2-3 simple queries are acceptable. However, we should: +1. Cache query results in the in-memory state +2. Only re-query when a relevant event indicates a change (e.g., `session.updated` to refresh the title) +3. Never do expensive queries (no full-text search, no joins across large tables) in the system transform hook + +### 6.3 Agent-Maintained Data (Via HUD Tools) + +These are updated by the agent through tool calls: + +| Data | Tool | Update Type | +|------|------|-------------| +| Current task | `memory({tool: "hud_update", args: {section: "currentTask", value: "..."}})` | Full replacement | +| Key decisions | `memory({tool: "hud_decision", args: {decision: "..."}})` | Append | +| Active files | `memory({tool: "hud_file", args: {file: "..."}})` | Append (with dedup) | +| Notes | `memory({tool: "hud_note", args: {note: "..."}})` | Append | +| Blockers | `memory({tool: "hud_blocker", args: {blocker: "..."}})` | Append | +| Next steps | `memory({tool: "hud_step", args: {step: "..."}})` | Append | + +### 6.4 File Watching Alternative (Not Recommended) + +One could also populate HUD data by watching the filesystem (`.opencode/hud/*.md` files). This was explored in the agent definitions pattern research (doc 02). However: +- The agent definitions pattern loads static Markdown files at session start +- HUD sections need **reactive** data that changes during a session +- File watching adds complexity (watcher, debouncing, file I/O on every change) +- Agent tool calls are simpler and more explicit + +For Phase 1, we use tool calls for agent-maintained data and events for derived data. File-based HUD definitions (like `.opencode/hud/*.md`) can be added later if user configuration is desired. + +--- + +## 7. HUD Update Tools + +### 7.1 Proposed Tool Schema + +We have two design options: + +**Option A: New operations on the existing `memory` router** + +Extend the `memory` tool's routing with new operations: + +``` +memory({tool: "hud_update", args: {section: "currentTask", value: "Implementing auth"}}) +memory({tool: "hud_note", args: {note: "Refresh tokens have 7-day TTL"}}) +memory({tool: "hud_decision", args: {decision: "Using RS256 for JWT signing"}}) +memory({tool: "hud_clear", args: {section: "notes"}}) +memory({tool: "hud", args: {}}) // read current HUD state +``` + +**Option B: Separate `hud` tool** + +A dedicated `hud` tool that handles all HUD operations: + +``` +hud({action: "update", section: "currentTask", value: "Implementing auth"}) +hud({action: "note", note: "Refresh tokens have 7-day TTL"}) +hud({action: "decision", decision: "Using RS256 for JWT signing"}) +hud({action: "clear", section: "notes"}) +hud({action: "read"}) // read current HUD state +hud({action: "compact"}) // save current key info and trigger compaction +``` + +**Recommendation: Option A (extend `memory` router)**. + +Rationale: +- The router pattern already exists and is well-understood +- Adding a third tool increases the agent's visible tool surface (the AGENTS.md must document each tool) +- The `memory` tool already handles similar CRUD-like operations (search, messages, context) +- The router pattern keeps the tool count at 2 (memory + memory_compact) or 3 (memory + memory_compact + hud_compact) +- Adding operations to the router only increases the help text, not the JSON schema that consumes context + +### 7.2 Operations + +| Operation | Purpose | Key Args | Update Type | +|-----------|---------|----------|-------------| +| `hud` | Read current HUD state | section (optional, for specific section) | Read-only | +| `hud_update` | Update a mutable section | section, value | Full replacement | +| `hud_note` | Add a note | note | Append | +| `hud_decision` | Record a key decision | decision | Append | +| `hud_file` | Track an active file | file, action ("add"/"remove") | Append/Remove | +| `hud_blocker` | Add/remove a blocker | blocker, action ("add"/"remove") | Append/Remove | +| `hud_step` | Add a next step | step | Append | +| `hud_clear` | Clear a section or reset all | section (optional, clears all if omitted) | Full clear | + +### 7.3 Tool Descriptions (for Agent) + +```python +memory({tool: "hud"}): + """Read the current HUD state -- your persistent status display that appears in every system prompt. + Shows current task, key decisions, active files, notes, blockers, and next steps. + Call with no args for full state, or specify a section for just that part.""" + +memory({tool: "hud_update", args: {section: "currentTask", value: "..."}}): + """Update a HUD section with a new value. Sections: currentTask, nextSteps, activeFiles, notes, keyDecisions, blockers. + For array sections, this replaces the entire array. Use hud_note/hud_decision/hud_file for appending.""" + +memory({tool: "hud_note", args: {note: "..."}}): + """Add a note to the HUD. Notes appear in your system prompt and survive compaction. + Use for information you'll need later but might lose in conversation history.""" + +memory({tool: "hud_decision", args: {decision: "..."}}): + """Record a key decision in the HUD. Decisions survive compaction and are always visible. + Use when you make an important choice that you'll need to reference later.""" + +memory({tool: "hud_file", args: {file: "...", action: "add"}}): + """Track a file in the HUD's active files list. Use 'add' when starting to edit a file, 'remove' when done.""" + +memory({tool: "hud_blocker", args: {blocker: "...", action: "add"}}): + """Add or remove a blocker. Blockers are always shown in the HUD, even at critical context levels.""" + +memory({tool: "hud_step", args: {step: "..."}}): + """Add a next step to the HUD. Steps survive compaction and help you maintain progress tracking.""" + +memory({tool: "hud_clear", args: {section: "notes"}}): + """Clear a HUD section, or clear all sections if no section specified. Use when starting a new task.""" +``` + +### 7.4 Update Semantics + +**Full replacement** (`hud_update` for scalar sections): +- `currentTask`: The agent sets what it's currently working on. Only one task at a time. +- Setting `currentTask` to `null` or `""` clears it. + +**Append** (for array sections): +- `hud_note`, `hud_decision`, `hud_step`: Append to the end of the array. +- Duplicates are silently ignored (simple string equality check). +- Array sections have a **maximum length** to prevent unbounded growth: + - `keyDecisions`: max 10 + - `notes`: max 20 + - `activeFiles`: max 15 + - `blockers`: max 10 + - `nextSteps`: max 10 +- When the maximum is reached, the oldest entry is removed (FIFO). + +**Remove** (for array sections): +- `hud_file` with `action: "remove"`: Remove a specific file from active files. +- `hud_blocker` with `action: "remove"`: Remove a specific blocker. + +**Clear** (`hud_clear`): +- Clears a specific section or all sections. +- Useful when starting a new task or after compaction to reset state. + +### 7.5 Compaction Integration + +When a compaction event is received, the HUD should: + +1. **Capture the compaction summary** (query the DB for the latest compaction summary text) +2. **Store it in `compactedContext`** in the HUD state +3. **Clear or reset sections** that are now covered by the summary (e.g., if the summary includes "next steps", those can be removed from the HUD's next steps) + +This happens in the `event` hook: + +```typescript +event: async ({ event }) => { + contextTracker.handleEvent(event); + + if (event.type === "session.compacted") { + const sessionId = (event.properties as any)?.sessionID; + if (sessionId) { + // Load the latest compaction summary + const summary = loadLatestCompactionSummary(sessionId); + hudManager.updateSection(sessionId, "compactedContext", summary); + } + } +} +``` + +--- + +## 8. Relationship to Existing open-memory Tools + +### 8.1 Memory Tool (Existing) + +The existing `memory` tool is a router for read-only operations on the OpenCode database: + +| Operation | Purpose | +|-----------|---------| +| summary | Quick counts | +| sessions | List sessions | +| messages | Read messages | +| message | Read single message | +| search | Text search | +| compactions | View compaction checkpoints | +| context | Current context usage | +| plans | Read plan files | +| help | Tool reference | + +**The HUD complements -- not replaces -- these operations.** + +- `memory({tool: "context"})` shows the same data as the HUD's context status line, but in a more detailed format. The HUD's context line is the summary; the tool provides details. +- `memory({tool: "search"})` and `memory({tool: "messages"})` remain the way to drill into specific history. The HUD shows **pointers** to history (e.g., "3 compaction checkpoints available"), not the full content. +- `memory({tool: "compactions"})` provides full compaction summaries. The HUD shows only that compaction occurred and provides a brief excerpt. + +### 8.2 Memory_Compact Tool (Existing) + +The existing `memory_compact` tool triggers compaction. The HUD does **not** replace this: + +- `memory_compact` initiates compaction (a server-side action) +- The HUD **shows** when compaction has occurred and provides the summary +- After compaction, the HUD's `compactedContext` field is populated with the summary + +The recommended workflow becomes: +1. Agent sees HUD showing yellow/red context status +2. Agent calls `memory_compact` at a natural breakpoint +3. Compaction fires, conversation history is summarized +4. HUD persists through compaction (injected fresh on next call) +5. The `compactedContext` section shows the summary + +### 8.3 HUD vs. Compaction: Complementary Roles + +| Aspect | Compaction | HUD | +|--------|-----------|-----| +| Purpose | Free context window space | Maintain persistent state | +| Mechanism | LLM summarizes conversation | Plugin injects structured state | +| When | Triggered at 92% or manually | Every LLM call | +| Content | LLM-generated narrative | Agent-maintained structured state | +| Token cost | 0 (after compaction, summary replaces history) | 300-700 tokens (on every call) | +| Survives compaction | Yes (it is compaction) | Yes (injected from file/state, not messages) | +| Information loss | Significant (narrative approximation) | None (exact key decisions, notes, files) | + +The key insight: **the HUD reduces the need for compaction by keeping the most critical information in a small, always-visible token budget**. Instead of losing 200 messages and getting a 2000-token summary, the agent always has its key decisions, notes, and task state in 500 tokens. + +### 8.4 Integration Architecture + +``` +┌──────────────────────────────────────────────────────────────┐ +│ Plugin Entry (index.ts) │ +│ │ +│ ┌────────────────┐ ┌────────────────┐ ┌──────────────┐ │ +│ │ ContextTracker │ │ HudManager │ │ event hook │ │ +│ │ (existing) │ │ (new) │ │ (extended) │ │ +│ │ │ │ │ │ │ │ +│ │ - Track tokens │ │ - Session state │ │ - tokens │ │ +│ │ - Per-session │ │ - File persist │ │ - compaction │ │ +│ │ map │ │ - Section CRUD │ │ - file.edited │ │ +│ └───────┬────────┘ └───────┬─────────┘ │ - todo.update │ │ +│ │ │ └───────┬───────┘ │ +│ │ │ │ │ +│ └───────────┬──────┘ │ │ +│ │ │ │ +│ ┌───────────────────▼─────────────────────────────▼──────┐ │ +│ │ system.transform hook │ │ +│ │ │ │ +│ │ 1. Get context info from ContextTracker │ │ +│ │ 2. Get HUD state from HudManager │ │ +│ │ 3. Render HUD with tiered disclosure │ │ +│ │ 4. output.system.push(hud) │ │ +│ └──────────────────────────────────────────────────────────┘ │ +│ │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ Tool definitions (tools.ts) │ │ +│ │ │ │ +│ │ memory router (existing): │ │ +│ │ + hud, hud_update, hud_note, hud_decision, │ │ +│ │ hud_file, hud_blocker, hud_step, hud_clear │ │ +│ │ │ │ +│ │ memory_compact (existing, unchanged) │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└──────────────────────────────────────────────────────────────┘ +``` + +--- + +## 9. Implementation Plan + +### Phase 1: Core HUD with Static Sections (PR 1) + +**Scope**: HUD that auto-generates from existing data sources, no agent-maintained sections yet. + +Files to create/modify: + +``` +src/ +├── hud/ +│ ├── manager.ts # HudManager class (in-memory + file state) +│ ├── renderer.ts # renderHud() function with tiered disclosure +│ └── sections.ts # Section rendering functions +├── index.ts # Extend system.transform hook + event hook +└── tools.ts # Add hud operations to memory router +``` + +**What works after Phase 1**: +- Context status line (existing, unchanged) +- Session metadata (title, session duration, compaction count) from DB +- Compacted context auto-populated on compaction events +- Tiered rendering based on context level +- `memory({tool: "hud"})` to read current HUD state + +### Phase 2: Agent-Maintained Sections (PR 2) + +**Scope**: Agent can write to HUD sections via tool calls. + +Files to create/modify: + +``` +src/ +├── hud/ +│ ├── manager.ts # Add update, append, clear methods +│ └── schema.ts # HUD state schema with validation +└── tools.ts # Add hud_update, hud_note, hud_decision, etc. +``` + +**What works after Phase 2**: +- `memory({tool: "hud_update"})`, `hud_note`, `hud_decision`, etc. +- Agent-maintained task state, decisions, notes +- File persistence of HUD state across plugin restarts + +### Phase 3: Advanced Features (PR 3+) + +- **Event-driven file tracking**: Auto-update `activeFiles` based on `file.edited` events +- **Todo integration**: Show todo status in HUD (from `todo.updated` events) +- **Project-level defaults**: Inherit HUD defaults from a project config file +- **Compaction integration**: Auto-capture compaction summary into `compactedContext` +- **Smart hints**: After compaction, suggest "use `memory({tool: 'hud_note'})` to preserve critical information before compacting again" + +### 9.1 Renderer Implementation + +```typescript +// src/hud/renderer.ts + +import type { ContextInfo } from "../context/tracker.js"; +import type { HudState } from "./manager.js"; +import { THRESHOLDS } from "../context/thresholds.js"; + +const MAX_NOTES_PREVIEW = 3; +const MAX_DECISIONS_PREVIEW = 5; +const MAX_FILES_PREVIEW = 5; +const MAX_STEPS_PREVIEW = 3; + +export function renderHud(state: HudState, contextInfo: ContextInfo | null): string { + const percentage = contextInfo?.percentage ?? 0; + const lines: string[] = []; + + // --- Status line (always shown) --- + lines.push("## State"); + + if (contextInfo) { + const emoji = percentage >= THRESHOLDS.critical ? "🔴" + : percentage >= THRESHOLDS.red ? "🟠" + : percentage >= THRESHOLDS.yellow ? "🟡" + : "🟢"; + lines.push(`${emoji} Context: ${percentage}% used (${contextInfo.usedTokens.toLocaleString()} / ${contextInfo.limitTokens.toLocaleString()} tokens, ${contextInfo.model})`); + } + + if (state.currentTask) { + lines.push(`**Task**: ${state.currentTask}`); + } + + // Summary counts (always shown -- very cheap) + const counts: string[] = []; + if (state.keyDecisions.length) counts.push(`${state.keyDecisions.length} decision${state.keyDecisions.length !== 1 ? "s" : ""}`); + if (state.activeFiles.length) counts.push(`${state.activeFiles.length} file${state.activeFiles.length !== 1 ? "s" : ""}`); + if (state.notes.length) counts.push(`${state.notes.length} note${state.notes.length !== 1 ? "s" : ""}`); + if (state.blockers.length) counts.push(`${state.blockers.length} blocker${state.blockers.length !== 1 ? "s" : ""}`); + if (counts.length) lines.push(counts.join(" | ")); + + // --- Blockers: always shown (critical regardless of context level) --- + if (state.blockers.length > 0) { + lines.push("### ⚠ Blockers"); + for (const b of state.blockers) lines.push(`- ${b}`); + } + + // --- Detailed sections: shown based on context level --- + + if (percentage < THRESHOLDS.yellow) { + // GREEN: full disclosure + renderFullSections(state, lines); + } else if (percentage < THRESHOLDS.red) { + // YELLOW: abbreviated disclosure + renderAbbreviatedSections(state, lines); + } else { + // RED/CRITICAL: minimal disclosure with hint + lines.push("⚠ Context is limited. Use memory({tool: \"hud\"}) to view full state."); + } + + // --- Compacted context: always shown if present --- + if (state.compactedContext) { + lines.push("### Previous Context"); + // Truncate if context is tight + const maxLen = percentage >= THRESHOLDS.red ? 200 : 500; + const ctx = state.compactedContext.length > maxLen + ? state.compactedContext.slice(0, maxLen) + "..." + : state.compactedContext; + lines.push(ctx); + } + + return lines.join("\n"); +} + +function renderFullSections(state: HudState, lines: string[]): void { + if (state.keyDecisions.length > 0) { + lines.push("### Key Decisions"); + for (const d of state.keyDecisions) lines.push(`- ${d}`); + } + if (state.activeFiles.length > 0) { + lines.push("### Active Files"); + for (const f of state.activeFiles) lines.push(`- \`${f}\``); + } + if (state.notes.length > 0) { + lines.push("### Notes"); + for (const n of state.notes) lines.push(`- ${n}`); + } + if (state.nextSteps.length > 0) { + lines.push("### Next Steps"); + for (let i = 0; i < state.nextSteps.length; i++) { + lines.push(`${i + 1}. ${state.nextSteps[i]}`); + } + } +} + +function renderAbbreviatedSections(state: HudState, lines: string[]): void { + if (state.keyDecisions.length > 0) { + lines.push(`### Decisions (${state.keyDecisions.length})`); + const shown = state.keyDecisions.slice(0, MAX_DECISIONS_PREVIEW); + for (const d of shown) lines.push(`- ${d}`); + if (state.keyDecisions.length > MAX_DECISIONS_PREVIEW) { + lines.push(` _...and ${state.keyDecisions.length - MAX_DECISIONS_PREVIEW} more. Use memory({tool: "hud"}) for all._`); + } + } + if (state.activeFiles.length > 0) { + lines.push(`### Files (${state.activeFiles.length})`); + const shown = state.activeFiles.slice(0, MAX_FILES_PREVIEW); + lines.push(shown.map(f => `\`${f}\``).join(", ")); + } + if (state.notes.length > 0) { + lines.push(`### Notes (${state.notes.length})`); + const shown = state.notes.slice(0, MAX_NOTES_PREVIEW); + for (const n of shown) lines.push(`- ${n}`); + if (state.notes.length > MAX_NOTES_PREVIEW) { + lines.push(` _...and ${state.notes.length - MAX_NOTES_PREVIEW} more._`); + } + } + if (state.nextSteps.length > 0) { + lines.push("### Next Steps"); + const shown = state.nextSteps.slice(0, MAX_STEPS_PREVIEW); + for (let i = 0; i < shown.length; i++) lines.push(`${i + 1}. ${shown[i]}`); + } +} +``` + +### 9.2 Manager Implementation + +```typescript +// src/hud/manager.ts + +import type { PluginInput } from "@opencode-ai/plugin"; +import { mkdirSync } from "node:fs"; + +export interface HudState { + sessionId: string; + projectPath: string; + lastUpdated: number; + currentTask: string | null; + keyDecisions: string[]; + activeFiles: string[]; + notes: string[]; + blockers: string[]; + nextSteps: string[]; + compactedContext: string | null; +} + +const SECTION_LIMITS: Record = { + keyDecisions: 10, + activeFiles: 15, + notes: 20, + blockers: 10, + nextSteps: 10, +}; + +export class HudManager { + private sessions = new Map(); + private ctx: PluginInput; + private hudDir: string; + + constructor(ctx: PluginInput) { + this.ctx = ctx; + this.hudDir = `${process.env.XDG_DATA_HOME || `${process.env.HOME}/.local/share`}/opencode/hud`; + mkdirSync(`${this.hudDir}/sessions`, { recursive: true }); + } + + getState(sessionId: string): HudState { + if (this.sessions.has(sessionId)) { + return this.sessions.get(sessionId)!; + } + const state = this.loadFromDisk(sessionId); + this.sessions.set(sessionId, state); + return state; + } + + updateSection(sessionId: string, section: keyof HudState, value: unknown): HudState { + const state = this.getState(sessionId); + if (section === "keyDecisions" || section === "activeFiles" || section === "notes" || section === "blockers" || section === "nextSteps") { + const arr = value as string[]; + const limit = SECTION_LIMITS[section] ?? 20; + (state as any)[section] = arr.slice(0, limit); + } else { + (state as any)[section] = value; + } + state.lastUpdated = Date.now(); + this.persistToDisk(sessionId, state); + return state; + } + + appendToSection(sessionId: string, section: "keyDecisions" | "activeFiles" | "notes" | "blockers" | "nextSteps", item: string): HudState { + const state = this.getState(sessionId); + const arr = state[section] as string[]; + if (!arr.includes(item)) { + const limit = SECTION_LIMITS[section] ?? 20; + if (arr.length >= limit) { + arr.shift(); // FIFO: remove oldest + } + arr.push(item); + state.lastUpdated = Date.now(); + this.persistToDisk(sessionId, state); + } + return state; + } + + removeFromSection(sessionId: string, section: "activeFiles" | "blockers", item: string): HudState { + const state = this.getState(sessionId); + const arr = state[section] as string[]; + const idx = arr.indexOf(item); + if (idx !== -1) { + arr.splice(idx, 1); + state.lastUpdated = Date.now(); + this.persistToDisk(sessionId, state); + } + return state; + } + + clearSection(sessionId: string, section?: keyof HudState): HudState { + const state = this.getState(sessionId); + if (section) { + if (section === "currentTask" || section === "compactedContext") { + (state as any)[section] = null; + } else if (section === "keyDecisions" || section === "activeFiles" || section === "notes" || section === "blockers" || section === "nextSteps") { + (state as any)[section] = []; + } + } else { + state.currentTask = null; + state.keyDecisions = []; + state.activeFiles = []; + state.notes = []; + state.blockers = []; + state.nextSteps = []; + } + state.lastUpdated = Date.now(); + this.persistToDisk(sessionId, state); + return state; + } + + private loadFromDisk(sessionId: string): HudState { + try { + const data = Bun.file(`${this.hudDir}/sessions/${sessionId}.json`).jsonSync() as HudState; + return data; + } catch { + return { + sessionId, + projectPath: this.ctx.project?.path ?? "", + lastUpdated: Date.now(), + currentTask: null, + keyDecisions: [], + activeFiles: [], + notes: [], + blockers: [], + nextSteps: [], + compactedContext: null, + }; + } + } + + private persistToDisk(sessionId: string, state: HudState): void { + Bun.write( + `${this.hudDir}/sessions/${sessionId}.json`, + JSON.stringify(state, null, 2), + ).catch(() => { + // Silently fail -- in-memory state is still valid + }); + } +} +``` + +### 9.3 Extended Plugin Entry Point + +```typescript +// src/index.ts (extended) + +import type { Plugin } from "@opencode-ai/plugin"; +import { getCompactionPrompt } from "./compaction/prompt.js"; +import { startContextTracker } from "./context/tracker.js"; +import { createHudManager } from "./hud/manager.js"; +import { renderHud } from "./hud/renderer.js"; +import { createTools } from "./tools.js"; + +const OpenMemoryPlugin: Plugin = async (ctx) => { + const contextTracker = startContextTracker(ctx); + const hudManager = createHudManager(ctx); + + return { + tool: createTools(ctx, contextTracker, hudManager), + + "experimental.session.compacting": async (_input, output) => { + output.prompt = getCompactionPrompt(); + }, + + "experimental.chat.system.transform": async (input, output) => { + if (!input.sessionID) return; + + const contextInfo = contextTracker.getContextInfo(input.sessionID); + const hudState = hudManager.getState(input.sessionID); + const hud = renderHud(hudState, contextInfo); + + output.system.push(hud); + }, + + event: async ({ event }) => { + contextTracker.handleEvent(event); + + const props = event.properties as Record; + if (!props) return; + + // Handle compaction events -- capture summary for HUD + if (event.type === "session.compacted") { + const sessionId = props.sessionID as string; + if (sessionId) { + // TODO: query latest compaction summary and store in HUD state + hudManager.markCompacted(sessionId); + } + } + }, + }; +}; + +export default OpenMemoryPlugin; +``` + +--- + +## 10. Risks and Open Questions + +### 10.1 Token Budget Risk + +**Risk**: The HUD adds 300-700 tokens to every system prompt. On a 200k context model, this is negligible (0.15-0.35%). On a 32k context model, it's more significant (0.9-2.2%). + +**Mitigation**: The tiered disclosure system (section 4.3) reduces HUD size as context fills. At red/critical levels, the HUD shrinks to ~100-200 tokens. + +**Open question**: Should the total HUD token budget be configurable? Some users may have very small context windows and want minimal HUD overhead. + +### 10.2 State Drift + +**Risk**: The agent forgets to update HUD sections (e.g., sets `currentTask` but never clears it when done). Stale state is worse than no state because it misleads the agent. + +**Mitigation**: +1. Include a `lastUpdated` timestamp in the HUD. If `lastUpdated` is more than N minutes ago (configurable), add a note: "HUD state may be stale (last updated X min ago)." +2. Include guidance in the HUD text: "Use hud_update to refresh stale sections." +3. On session start, show a brief "new session" message that encourages the agent to update the HUD. + +### 10.3 Conflicting with AGENTS.md Instructions + +**Risk**: The HUD tells the agent something that conflicts with AGENTS.md or other system instructions. For example, AGENTS.md says "never use global state" but the HUD maintains global state. + +**Mitigation**: The HUD should be presented as an **advisory** display, not as instructions. Use neutral language: "Current task: X" not "You must complete task X". The agent should treat HUD sections as information, not commands. + +### 10.4 File I/O in Hook + +**Risk**: The `system.transform` hook is called on every LLM call. If `HudManager.getState()` reads from disk, it adds latency. + +**Mitigation**: The manager uses in-memory cache (`Map`) with file fallback on cache miss. Disk reads only happen on: +- Cold start (first call for a session) +- Plugin reload + +Both are infrequent. Normal operation is pure in-memory. + +### 10.5 Race Conditions + +**Risk**: Tool calls update HUD state while `system.transform` is rendering. If both happen concurrently, the render might see partial state. + +**Mitigation**: In the Bun runtime, the event loop is single-threaded. Tool calls and hook invocations are both async, so they don't truly run in parallel -- one completes before the next starts. No race condition concern in practice. + +### 10.6 Privacy and Security + +**Risk**: HUD state files contain task descriptions, decisions, and notes. These are stored in plaintext JSON under `~/.local/share/opencode/hud/sessions/`. + +**Mitigation**: This is the same trust level as the OpenCode database (also plaintext SQLite). No special handling needed, but document that HUD data is unencrypted local storage. + +### 10.7 Interaction with Prompt Caching + +**Risk**: Dynamic HUD content breaks prompt caching for the second system message block. + +**Mitigation**: This is acceptable. The HUD is small (300-700 tokens) and changes are inevitable. The first system block (agent/provider prompt) continues to benefit from caching. Additionally, we can optimize by splitting the HUD into static and dynamic parts: + +```typescript +// Static part (cached across calls when unchanged): +output.system.push(renderHudStatic(hudState)); // task, decisions, notes, files + +// Dynamic part (changes every call): +output.system.push(renderHudDynamic(contextInfo)); // context percentage, status +``` + +This is an optimization for Phase 2 or later. + +### 10.8 Should HUD Replace the Existing Context Injection? + +**Open question**: The current context injection is ~50 tokens. The HUD includes context information and much more. Should we: + +A. **Remove the standalone context injection and rely solely on the HUD** (simpler, single source of truth) +B. **Keep both** (context injection is a separate concern, HUD is broader state) +C. **Merge context injection into HUD rendering** (the HUD renderer includes context line) + +**Recommendation**: **Option C**. The HUD renderer should include the context status line as its first element. This means we don't push a separate context string and a HUD string -- we push one combined string. This reduces the number of system messages and ensures context status is always at the top of the HUD. + +--- + +## Key File Reference + +### OpenCode Core + +| File | Relevance to HUD | +|------|-------------------| +| `/workspace/opencode/packages/opencode/src/session/llm.ts` | `system.transform` hook invocation; system prompt assembly and caching | +| `/workspace/opencode/packages/opencode/src/session/prompt.ts` | Session loop where LLM calls happen; reminders injection | +| `/workspace/opencode/packages/opencode/src/session/compaction.ts` | Compaction event (`session.compacted`) for HUD updates | +| `/workspace/opencode/packages/opencode/src/session/todo.ts` | `todo.updated` event for task tracking in HUD | +| `/workspace/opencode/packages/opencode/src/file/index.ts` | `file.edited` event for active file tracking | +| `/workspace/opencode/packages/opencode/src/session/message-v2.ts` | `message.updated` event (already used by ContextTracker) | +| `/workspace/opencode/packages/plugin/src/index.ts` | Plugin SDK type definitions; `Hooks` interface | + +### Open-Memory Plugin + +| File | Relevance to HUD | +|------|-------------------| +| `/workspace/@alkdev/open-memory/src/index.ts` | Plugin entry point; extend `system.transform` and `event` hooks | +| `/workspace/@alkdev/open-memory/src/tools.ts` | Tool definitions; add HUD operations to memory router | +| `/workspace/@alkdev/open-memory/src/context/tracker.ts` | `ContextTracker` pattern; model for `HudManager` | +| `/workspace/@alkdev/open-memory/src/context/thresholds.ts` | Threshold constants for tiered disclosure | +| `/workspace/@alkdev/open-memory/src/history/queries.ts` | `bun:sqlite` read-only query pattern for HUD data | + +### New Files (Proposed) + +| File | Purpose | +|------|---------| +| `src/hud/manager.ts` | `HudManager` class -- in-memory map + file persistence | +| `src/hud/renderer.ts` | `renderHud()` function -- tiered disclosure rendering | +| `src/hud/sections.ts` | Section rendering functions (status, decisions, files, notes) | + +### Related Research + +| Document | Relevance | +|----------|-----------| +| `/workspace/@alkdev/open-memory/docs/research/01-compaction-architecture.md` | System prompt injection mechanics, hook behavior, caching | +| `/workspace/@alkdev/open-memory/docs/research/02-agent-definitions-pattern.md` | Declarative definition pattern (YAML frontmatter + body), `.opencode/` directory conventions | +| `/workspace/@alkdev/open-memory/docs/research/03-handlebars-bun-compatibility.md` | Template rendering decision (use template literals, not Handlebars) | + +--- + +*Research conducted 2026-04-22. Architecture based on open-memory v1.0.0 and OpenCode plugin SDK v1.1.3.*