From 28f2edec3e67d3662deea2567c7a6275348094f3 Mon Sep 17 00:00:00 2001 From: "glm-5.1" Date: Sat, 6 Jun 2026 05:26:24 +0000 Subject: [PATCH] docs: add research specs for core, storage, and flowgraph crates --- research/core.md | 334 +++++++++++++++++++++++++++++++ research/flow.md | 469 ++++++++++++++++++++++++++++++++++++++++++++ research/storage.md | 348 ++++++++++++++++++++++++++++++++ 3 files changed, 1151 insertions(+) create mode 100644 research/core.md create mode 100644 research/flow.md create mode 100644 research/storage.md diff --git a/research/core.md b/research/core.md new file mode 100644 index 0000000..66c026c --- /dev/null +++ b/research/core.md @@ -0,0 +1,334 @@ +# Alknet Core: Transport, Call Protocol, Auth, and DNS + +> Status: Research / Draft +> Last updated: 2026-06-05 + +## Overview + +`alknet-core` is the foundational crate providing pluggable transports, the bidirectional call protocol, Ed25519 authentication, and (future) DNS transport + naming. Everything else (storage, flowgraph, relay) builds on top of this. + +## Transport Layer + +### Architecture + +The transport layer produces a duplex byte stream (`AsyncRead + AsyncWrite + Unpin + Send`) that the SSH layer consumes via `russh::client::connect_stream()` or `russh::server::run_stream()`. SSH is completely unaware of what transport it runs over. + +### Transport Trait + +```rust +#[async_trait] +pub trait Transport: Send + Sync + 'static { + type Stream: AsyncRead + AsyncWrite + Unpin + Send + 'static; + async fn connect(&self) -> Result; + fn describe(&self) -> String; +} + +#[async_trait] +pub trait TransportAcceptor: Send + Sync + 'static { + type Stream: AsyncRead + AsyncWrite + Unpin + Send + 'static; + async fn accept(&self) -> Result<(Self::Stream, TransportInfo)>; +} + +#[derive(Debug, Clone)] +pub struct TransportInfo { + pub remote_addr: Option, + pub transport_kind: TransportKind, +} + +#[derive(Debug, Clone)] +pub enum TransportKind { + Tcp, + Tls { server_name: Option }, + Iroh { endpoint_id: String }, + Dns { domain: String }, // NEW + WebTransport { host: String }, // NEW (planned) +} +``` + +### Existing Transports + +| Transport | Client | Server | Stream Type | +|-----------|--------|--------|-------------| +| TcpTransport | `TcpStream::connect(addr)` | `TcpListener::accept()` | `TcpStream` | +| TlsTransport | `TlsStream` | `TlsStream` | tokio_rustls | +| IrohTransport | `endpoint.connect(peer, alpn)` then `conn.open_bi()` then `join(recv, send)` | `endpoint.accept()` then `conn.accept_bi()` then `join(recv, send)` | `tokio::io::Join` | +| AcmeTlsAcceptor | Auto-provision via Let's Encrypt | ACME cert provision + TLS accept | TlsStream | + +### Transport Chaining + +```bash +alknet connect --transport iroh --proxy socks5://127.0.0.1:1080 +alknet connect --transport tls --proxy socks5://127.0.0.1:1080 +``` + +`--proxy` routes outbound connections. Client: routes transport connection. Server: routes data-channel TCP targets. + +### Stealth Mode + +When `--stealth` is enabled with TLS transport on port 443: after TLS handshake, peek first bytes. If `SSH-2.0-`, run SSH. Otherwise, return `HTTP/1.1 404 Not Found\r\nServer: nginx\r\n\r\n` and close. Makes the server indistinguishable from an HTTPS site. + +## Call Protocol + +### Wire Format + +Every message is a length-prefixed JSON `EventEnvelope`: + +```rust +pub struct EventEnvelope { + pub r#type: String, // "call.requested", "call.responded", etc. + pub id: String, // Correlation key (requestId, topic, or "" for broadcasts) + pub payload: Value, // JSON payload — schema depends on event type +} + +// Frame: 4-byte big-endian length prefix + UTF-8 JSON body +``` + +This is the same format used by `@alkdev/pubsub` adapters. The envelope is transport-agnostic — it runs over SSH channels, WebTransport streams, iroh bidirectional streams, WebSocket, Worker postMessage, or DNS queries. + +Binary payloads are base64-encoded in the `payload` field. The envelope itself stays JSON for cross-language compatibility. + +### Call Protocol Events + +| Event | Direction | Purpose | +|-------|-----------|---------| +| `call.requested` | Caller → Handler | Initiate a call or subscription | +| `call.responded` | Handler → Caller | Deliver a result (one for calls, many for subscriptions) | +| `call.completed` | Handler → Caller | Signal end of subscription stream | +| `call.aborted` | Either side | Cancel the call/subscription | +| `call.error` | Handler → Caller | Signal an error | + +A call is just a subscribe that resolves after one event. Both `call()` and `subscribe()` send the same `call.requested` event. + +### Operation Paths + +``` +/{spoke}/{service}/{op} +``` + +- **spoke** — identity prefix of the node that exposes the operation +- **service** — logical service namespace (e.g., `fs`, `bash`, `agent`) +- **op** — specific operation (e.g., `readFile`, `exec`, `chat`) + +Examples: + +| Path | Meaning | +|------|---------| +| `/dev1/fs/readFile` | Spoke `dev1`, service `fs`, op `readFile` | +| `/hub/agent/chat` | Hub's own `agent` service, op `chat` | +| `/hub/sessions/list` | Hub's `sessions` service, op `list` | + +### PendingRequestMap + +Manages in-flight calls and subscriptions. Correlates `call.responded` events back to the original `call.requested`: + +```rust +pub struct PendingRequestMap { + pending: HashMap, +} + +enum PendingEntry { + Call { tx: oneshot::Sender>, timeout: Instant }, + Subscribe { tx: mpsc::Sender>, timeout: Option }, +} +``` + +### Operation Registry + +```rust +pub struct OperationSpec { + pub name: String, // "/fs/readFile", "/agent/chat" + pub namespace: String, // "fs", "agent" + pub op_type: OperationType, // Query, Mutation, Subscription + pub input_schema: Value, // JSON Schema for input + pub output_schema: Value, // JSON Schema for output + pub access_control: AccessControl, // Required scopes/resources +} + +pub enum OperationType { + Query, // Read-only, idempotent + Mutation, // Side effects + Subscription, // Streaming +} + +pub struct AccessControl { + pub required_scopes: Vec, + pub required_scopes_any: Option>, + pub resource_type: Option, + pub resource_action: Option, +} +``` + +Specs and handlers are separated — downstream consumers register both without modifying core: + +```rust +registry.register(OperationSpec { name: "/services/list", ... }, list_services_handler); +registry.register(OperationSpec { name: "/fs/readFile", ... }, fs_read_handler); +``` + +### Protocol Adapter Layer + +| Transport | Channel mechanism | Direction | +|-----------|-------------------|-----------| +| SSH | Reserved `direct_tcpip` destination `alknet-control:0` | Bidirectional over SSH channel | +| WebTransport | Bidirectional stream after CONNECT | Bidirectional over WT stream | +| iroh QUIC | `open_bi()` / `accept_bi()` | Bidirectional over QUIC stream | +| WebSocket | Single WS connection | Bidirectional over WS frames | +| Worker | `postMessage` | Bidirectional over structured clone | +| DNS | Query TXT records (client) / serve TXT records (server) | Request/response over DNS | + +### Hub/Spoke Architecture + +``` + ┌─────────────────────────────────┐ + │ Hub │ + │ │ + │ Hub-local services: │ + │ /hub/agent/chat │ + │ /hub/agent/complete │ + │ /hub/sessions/list │ + │ │ + │ Spoke registry: │ + │ /dev1/fs/* → dev1 connection │ + │ /browser-1/notify/* → WT conn │ + └──────┬───────┬──────────────────┘ + │ │ + ┌─────────▼┐ ┌───▼────────────┐ + │ Spoke │ │Browser Spoke │ + │ "dev1" │ │"browser-1" │ + │ /fs/* │ │/notify/* │ + └───────────┘ └────────────────┘ +``` + +Spokes register operations on connect: + +```json +{ + "type": "call.requested", + "id": "uuid-123", + "payload": { + "operationId": "/hub/services/register", + "input": { + "spoke": "dev1", + "operations": ["/fs/readFile", "/bash/exec"] + } + } +} +``` + +## Authentication + +Ed25519 keys for SSH authentication. A separate authentication mechanism for browsers where they sign a token using the same Ed25519 keys. Hot key rotation without server restart (mechanism in core for programmatic key updates). + +Peer credentials are stored in `peer_credentials` table (fingerprint-based lookup). Account credentials via `api_keys` table (SHA-256 hash for high-entropy keys). + +## DNS Transport (Planned) + +### Two DNS Concepts + +1. **DNS as Transport** — Encode `EventEnvelope` frames as DNS queries/responses. Censorship resistance. Request/response maps to `call.requested`/`call.responded` naturally. + +2. **DNS as Naming/Discovery** — Publish/resolve endpoint information via DNS TXT records (iroh-dns style). Smart contract provides on-chain `name → namespaceId + relays`. DNS transport carries the data flow when other transports are blocked. + +### DNS as Call Protocol Transport + +The call protocol is transport-agnostic. DNS becomes another adapter: + +``` +Transport Layer: + SSH channel → EventEnvelope frames → CallHandler + WebTransport → EventEnvelope frames → CallHandler + iroh QUIC stream → EventEnvelope frames → CallHandler + DNS query/response → EventEnvelope frames → CallHandler ← NEW +``` + +**Upstream (client → server)**: Encode `EventEnvelope` JSON as base32 DNS query labels. +**Downstream (server → client)**: Return `EventEnvelope` JSON in TXT record responses. +**Polling**: For `call.responded` after `call.requested`, client polls `requestId.alk.dev TXT?`. + +The `DnsTransportAdapter` implements the same adapter pattern as `@alkdev/pubsub`'s event targets, making DNS a first-class transport for control channel operations. + +### DNS as Full Transport (SSH Tunneling) + +Full-duplex SSH tunneling over DNS requires a framing protocol: +- Chunk SSH data into fixed-size frames (e.g., 220-byte frames with 4-byte header for seq/ack) +- Encode upstream in base32 subdomain labels +- Encode downstream in TXT records or CNAME targets +- Handle resequencing and retransmission + +This is higher latency (~1-50 KB/s) but works when all other transports are blocked. Fine for interactive SSH. Log a warning at connect time. + +### iroh-dns Relationship + +iroh-dns publishes `EndpointInfo` via `_iroh.. TXT` records. alknet can extend this: + +- Add `tunnel=dnst.example.com` attribute to indicate DNS transport availability +- Use iroh-dns `DnsResolver` for endpoint discovery +- When a client sees the `tunnel` attribute and QUIC is blocked, fall back to DNS transport + +### DnsTransport Implementation Sketch + +```rust +#[cfg(feature = "dns")] +mod dns; + +pub struct DnsTransport { + domain: String, // e.g. "t.alk.dev" + resolver_addr: SocketAddr, + protocol: DnsProtocol, // Udp, Tcp, Tls, Https + auth_token: Option, +} + +pub struct DnsAcceptor { + domain: String, + listen_addr: SocketAddr, + protocol: DnsProtocol, +} + +// DnsStream: virtual duplex backed by DNS poll/push +// Uses tokio::io::duplex() internally with a background task that: +// - Chunks outgoing bytes into DNS queries (client) or response records (server) +// - Reassembles incoming DNS payloads into the read buffer +// - Handles ACK/NACK for reliability +``` + +### DnsProtocol in iroh-dns + +iroh-dns already supports multiple DNS protocols: + +```rust +pub enum DnsProtocol { + Udp, // Classic DNS + Tcp, // DNS over TCP + Tls, // DNS over TLS (DoT) — RFC 7858 + Https, // DNS over HTTPS (DoH) — RFC 8484 +} +``` + +alknet's DNS transport should support all of these. DoH (port 443, looks like HTTPS) is particularly valuable for censorship resistance since it's indistinguishable from normal web traffic. + +## Design Decisions + +| ADR | Decision | Summary | +|-----|----------|---------| +| 001 | Pluggable transport | Transport trait produces stream, SSH consumes it | +| 003 | iroh stream join | `tokio::io::join` combines QUIC halves | +| 004 | SSH over transport | SSH never touches TCP/iroh/TLS directly | +| 008 | ACME/Let's Encrypt | Auto-provision TLS certs | +| 009 | Default iroh relay | n0 relay by default, `--iroh-relay` override | +| 010 | Transport chaining | `--proxy` works with all transports natively | +| 017 | Stealth mode | Peek first bytes, return 404 for non-SSH on port 443 | +| 018 | Control channel for pubsub | Reserved destination for event bus | +| 019 | Proxy dual semantics | `--proxy` routes transport on client, data on server | +| 023 | Unified auth | Shared Ed25519 key material across auth mechanisms | +| 024 | Bidirectional call protocol | Both sides can call, generalized from ADR-018 | +| 025 | Handler/spec separation | Downstream registers operations without modifying core | + +## References + +- `@alkdev/pubsub` — TypeScript event target adapters and `EventEnvelope` +- `@alkdev/operations` — TypeScript call protocol, `OperationSpec`, registry +- `@alkdev/flowgraph` — TypeScript operation graph and call graph (planned Rust port) +- `@alkdev/storage` — TypeScript metagraph, identity, ACL (planned Rust port as `alknet-storage`) +- iroh-dns — DNS resolver and endpoint info (naming/discovery) +- iroh-live-relay — WebTransport relay (planned transport reference) +- irpc — iroh streaming RPC (postcard-only, Rust-to-Rust) \ No newline at end of file diff --git a/research/flow.md b/research/flow.md new file mode 100644 index 0000000..17ecb1a --- /dev/null +++ b/research/flow.md @@ -0,0 +1,469 @@ +# Alknet Flowgraph: Operation Graph, Call Graph, and Graph Operations + +> Status: Research / Draft +> Last updated: 2026-06-05 + +## Overview + +`alknet-flowgraph` is a Rust crate providing graph data structures and operations, mapping the TypeScript `@alkdev/flowgraph` package's call-graph and operation-graph concepts to `petgraph::DiGraph`. It works with `alknet-storage` for persistence and `alknet-core` for call protocol event processing. + +## Core Abstraction + +`petgraph::DiGraph` replaces graphology. The mapping is nearly 1:1 for the operations used: + +| TypeScript (graphology) | Rust (petgraph) | +|------------------------|-----------------| +| `graph.addNode(key, attrs)` | `graph.add_node(attrs)` returns `NodeIndex` | +| `graph.addEdge(source, target, attrs)` | `graph.add_edge(source, target, attrs)` returns `EdgeIndex` | +| `graph.getAttribute(key)` | `graph[node]` | +| `graph.forEachNode()` | `graph.node_indices().for_each()` | +| `graph.inNeighbors(node)` | `graph.neighbors_directed(node, Direction::Incoming)` | +| `graph.outNeighbors(node)` | `graph.neighbors_directed(node, Direction::Outgoing)` | +| `graph.hasCycle()` | `petgraph::algo::is_cyclic_directed(&graph)` | +| `graph.topologicalSort()` | `petgraph::algo::toposort(&graph)` returns `Result, Cycle>` | +| `graph.export()` | serde serialization | +| `FlowGraph.fromJSON(data)` | serde deserialization | + +### Key Difference: Node Keys + +graphology uses string node keys (`"call-001"`). petgraph uses `NodeIndex` (u32). We maintain a `HashMap` for node-key-to-index lookups, mirroring the `key` column in the `nodes` SQLite table. + +```rust +pub struct FlowGraph +where + N: NodeAttributes, + E: EdgeAttributes, +{ + graph: DiGraph, + key_to_index: HashMap, +} + +pub trait NodeAttributes: Clone + Serialize + DeserializeOwned + Debug + Send + Sync { + fn key(&self) -> &str; + fn set_key(&mut self, key: String); +} + +pub trait EdgeAttributes: Clone + Serialize + DeserializeOwned + Debug + Send + Sync { + fn edge_type(&self) -> &str; +} +``` + +## Operation Graph (Static) + +Built from `OperationSpec`s at startup. Answers structural questions about the operation space: type compatibility, cycle detection, reachability. + +### Construction + +```rust +impl FlowGraph { + pub fn from_specs(specs: &[OperationSpec]) -> Result { + let mut graph = Self::new(); + for spec in specs { + graph.add_operation(spec.clone()); + } + for (source, target) in graph.compute_type_edges(specs) { + graph.add_typed_edge(&source, &target, TypeCompat::compatible(/*...*/))?; + } + if graph.has_cycles() { + return Err(CycleError); + } + Ok(graph) + } +} +``` + +### Type Compatibility + +Compare `output_schema` (source) against `input_schema` (target) using `jsonschema`: + +```rust +pub fn type_compat( + output_schema: &Value, + input_schema: &Value, +) -> TypeCompatResult { + // 1. Exact match → compatible + // 2. Subtype match (output has extra fields) → compatible + // 3. Unknown on either side → skip (no edge) + // 4. Structural mismatch → incompatible edge (added with compatible: false) +} +``` + +### Node Attributes + +```rust +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct OperationNodeAttrs { + pub name: String, + pub namespace: String, + pub op_type: OperationType, + pub input_schema: Value, + pub output_schema: Value, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub enum OperationType { + Query, + Mutation, + Subscription, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct OperationEdgeAttrs { + pub edge_type: String, // "typed" + pub compatible: bool, + pub detail: Option, +} +``` + +### Queries + +```rust +// petgraph delegations +pub fn topological_order(&self) -> Result, CycleError> +pub fn has_cycles(&self) -> bool +pub fn find_cycles(&self) -> Vec> +pub fn ancestors(&self, node_key: &str) -> Vec +pub fn descendants(&self, node_key: &str) -> Vec +pub fn predecessors(&self, node_key: &str) -> Vec +pub fn successors(&self, node_key: &str) -> Vec +pub fn reachable_from(&self, node_keys: &[String]) -> HashSet +``` + +## Call Graph (Dynamic) + +Populated at runtime from call protocol events. Every `call.requested` adds a node, every `call.responded`/`call.error`/`call.aborted` updates its status. + +### Construction from Events + +```rust +impl FlowGraph { + pub fn from_call_events(events: &[CallEventMapValue]) -> Self { + let mut graph = Self::new(); + for event in events { + graph.update_from_event(event); + } + graph + } + + pub fn update_from_event(&mut self, event: &CallEventMapValue) { + match event { + CallEvent::Requested(e) => { + self.add_call(CallNodeAttrs { + request_id: e.request_id.clone(), + operation_id: e.operation_id.clone(), + status: CallStatus::Pending, + parent_request_id: e.parent_request_id.clone(), + input: e.input.clone(), + ..Default::default() + }); + } + CallEvent::Responded(e) => { + self.update_status(&e.request_id, CallStatus::Completed, None); + } + CallEvent::Error(e) => { + self.update_status(&e.request_id, CallStatus::Failed, Some(e.clone())); + } + CallEvent::Aborted(e) => { + self.update_status(&e.request_id, CallStatus::Aborted, None); + } + CallEvent::Completed(e) => { + self.update_status(&e.request_id, CallStatus::Completed, None); + } + } + } +} +``` + +### Real-time Population + +```rust +// Subscribe to call protocol events for live graph construction +let call_graph = FlowGraph::::new(); + +pubsub.subscribe("call.requested", |event| { + call_graph.update_from_event(&event); +}); +pubsub.subscribe("call.responded", |event| { + call_graph.update_from_event(&event); +}); +// ... etc for all call event types +``` + +### Node Attributes + +```rust +#[derive(Clone, Serialize, Deserialize, Debug, Default)] +pub struct CallNodeAttrs { + pub request_id: String, + pub operation_id: String, + pub status: CallStatus, + pub parent_request_id: Option, + pub input: Value, + pub output: Option, + pub error: Option, + pub identity: Option, + pub started_at: Option, + pub completed_at: Option, +} + +#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)] +pub enum CallStatus { + Pending, + Running, + Completed, + Failed, + Aborted, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub struct CallEdgeAttrs { + pub edge_type: EdgeType, +} + +#[derive(Clone, Serialize, Deserialize, Debug)] +pub enum EdgeType { + Triggered, + DependsOn, +} +``` + +### Status Lifecycle + +``` + call.requested + │ + ▼ + ┌─────────┐ + │ pending │ + └────┬────┘ + │ + handler starts + │ + ▼ + ┌─────────┐ + ┌────│ running │────┐ + │ └────┬────┘ │ + call.aborted │ call.aborted + │ │ │ + ▼ │ ▼ + ┌─────────┐ │ ┌─────────┐ + │ aborted │ │ │ aborted │ + └─────────┘ │ └─────────┘ + │ + ┌─────────┼─────────┐ + │ │ │ + call.responded │ call.error + │ │ │ + ▼ │ ▼ + ┌───────────┐ │ ┌────────┐ + │ completed │ │ │ failed │ + └───────────┘ │ └────────┘ + │ + call.completed + │ + ▼ + ┌───────────┐ + │ completed │ + └───────────┘ +``` + +### Abort Cascading + +```rust +// Abort cascade: get all descendants of a call +let descendants = call_graph.descendants(&request_id); +// The protocol handler aborts each descendant via PendingRequestMap::abort() +``` + +### Observability Queries + +| Query | Method | Returns | +|-------|--------|---------| +| Get running calls | `filter_by_status(CallStatus::Running)` | Node keys with running status | +| Get failed calls | `filter_by_status(CallStatus::Failed)` | Node keys with failed status | +| Get top-level calls | `get_roots()` | Nodes with no `parent_request_id` | +| Get children of call | `children(&request_id)` | Direct children via `triggered` edges | +| Get call duration | `duration(&request_id)` | `completed_at - started_at` | +| Get call lineage | `lineage(&request_id)` | Ancestor chain from root to this call | + +### Serialization and Persistence + +```rust +// Serialize via serde +let json = serde_json::to_value(&call_graph)?; +let restored: FlowGraph = serde_json::from_value(json)?; + +// Persist via alknet-storage +storage.insert_call_graph("session-abc", &call_graph)?; +storage.load_call_graph("session-abc")?; +``` + +## Graph Operations (petgraph mapping) + +All graph operations used in `@alkdev/flowgraph` map directly to petgraph: + +| Flowgraph method | petgraph function | +|------------------|-------------------| +| `addNode(key, attrs)` | `add_node(attrs)` + `key_to_index.insert(key, idx)` | +| `addEdge(source, target, attrs)` | `add_edge(source_idx, target_idx, attrs)` | +| `addDirectedEdge(source, target, attrs)` | `add_edge(source_idx, target_idx, attrs)` | +| `getNodeAttributes(key)` | `graph[NodeIndex]` | +| `getEdgeAttributes(key)` | `graph[EdgeIndex]` | +| `getSource(key)` / `getTarget(key)` | `graph.edge_endpoints(EdgeIndex)` | +| `inDegree(key)` | `graph.neighbors_directed(idx, Incoming).count()` | +| `outDegree(key)` | `graph.neighbors_directed(idx, Outgoing).count()` | +| `inNeighbors(key)` | `graph.neighbors_directed(idx, Incoming)` | +| `outNeighbors(key)` | `graph.neighbors_directed(idx, Outgoing)` | +| `hasEdge(source, target)` | `graph.contains_edge(source_idx, target_idx)` | +| `forEachNode(callback)` | `graph.node_indices().for_each()` | +| `forEachEdge(callback)` | `graph.edge_indices().for_each()` | +| `findCycle()` | `is_cyclic_directed(&graph)` | +| `topologicalSort()` | `toposort(&graph, None)` | +| `export()` / `toJSON()` | `serde_json::to_value(&graph)` | +| `fromJSON()` | `serde_json::from_value(json)` | + +### Cycle Detection + +The operation graph rejects cycles at construction time. The call graph allows cycles in the parent-child hierarchy only via `parentRequestId` (which should not create actual cycles — a call cannot be its own ancestor). + +```rust +pub fn add_call(&mut self, attrs: CallNodeAttrs) -> Result<(), CycleError> { + if let Some(parent) = &attrs.parent_request_id { + // Check if adding triggered edge would create a cycle + if self.would_create_cycle(parent, &attrs.request_id) { + return Err(CycleError); + } + } + // ... +} +``` + +### DAG Invariants + +- **Operation graph**: DAG-only enforced at construction. `add_typed_edge` throws `CycleError` if a cycle would result. +- **Call graph**: DAG-only by design (a call cannot be its own ancestor). `add_call` with a `parentRequestId` that would create a cycle throws `CycleError`. +- **No parallel edges**: `multi: false` — at most one edge per (source, target) pair. +- **No self-loops**: `allow_self_loops: false` — an operation cannot depend on its own output. + +## Integration with alknet-storage + +Call graphs and operation graphs are stored as metagraph instances: + +```rust +// Create call-graph type definition +let call_graph_type = GraphType { + name: "call-graph".to_string(), + config: GraphConfig { graph_type: Directed, multi: false, allow_self_loops: false }, + scope: Scope::System, + ..Default::default() +}; + +// Store a call graph instance +let graph = storage.create_graph("call-graph", "session-abc")?; + +// Add call nodes +storage.add_node(graph.id, "call-001", &call_attrs)?; + +// Query via petgraph +let pg: FlowGraph = storage.load_call_graph("session-abc")?; +let running = pg.filter_by_status(CallStatus::Running); +``` + +The `alknet-storage` crate handles persistence (SQLite via honker). The `alknet-flowgraph` crate handles in-memory graph operations (petgraph). The bridge is serialization: `FlowGraph` serializes to/from `serde_json::Value`, which `alknet-storage` stores in the `nodes.attributes` and `edges.attributes` columns. + +## Integration with alknet-core (Call Protocol) + +```rust +// The call protocol's EventEnvelope drives call graph construction +use alknet_core::call::PendingRequestMap; +use alknet_flowgraph::FlowGraph; + +let mut call_graph = FlowGraph::::new(); + +// Wire up call protocol events to graph updates +call_map.on_requested(|event| { + call_graph.update_from_event(&CallEvent::Requested(event)); +}); + +call_map.on_responded(|event| { + call_graph.update_from_event(&CallEvent::Responded(event)); + // Persist incrementally to storage + storage.update_node(event.request_id, &call_graph)?; +}); + +call_map.on_error(|event| { + call_graph.update_from_event(&CallEvent::Error(event)); +}); + +call_map.on_completed(|event| { + call_graph.update_from_event(&CallEvent::Completed(event)); +}); + +// Abort cascading +call_map.on_aborted(|event| { + let descendants = call_graph.descendants(&event.request_id); + for desc in descendants { + call_map.abort(&desc); + } + call_graph.update_from_event(&CallEvent::Aborted(event)); +}); +``` + +## Type Compatibility Between TS and Rust + +| TypeScript (flowgraph) | Rust (alknet-flowgraph) | +|------------------------|-------------------------| +| `graphology.DirectedGraph` | `petgraph::DiGraph` | +| `CallNodeAttrs` (TypeBox) | `CallNodeAttrs` (serde struct) | +| `CallEdgeAttrs` (TypeBox) | `CallEdgeAttrs` (serde struct) | +| `CallStatus` (enum) | `CallStatus` (Rust enum) | +| `EdgeType` (enum) | `EdgeType` (Rust enum) | +| `OperationNodeAttrs` | `OperationNodeAttrs` (serde struct) | +| `OperationEdgeAttrs` | `OperationEdgeAttrs` (serde struct) | +| `OperationType` (enum) | `OperationType` (Rust enum) | +| `Identity` | `Identity` (serde struct) | +| `AccessControl` | `AccessControl` (serde struct) | +| `typeCompat()` | `type_compat()` using jsonschema | +| `Value.Check()` (TypeBox) | `jsonschema::validate()` | +| `addCall()` | `add_call()` | +| `updateStatus()` with state machine | `update_status()` with `is_valid_transition()` | +| `addDependency()` | `add_dependency()` | +| `descendants()` | petgraph DFS | + +## Crate Dependency Map + +```toml +[dependencies] +petgraph = "0.x" # Core graph data structure +serde = { version = "1", features = ["derive"] } +serde_json = "1" +jsonschema = "0.x" # Type compatibility checks +thiserror = "1" +uuid = { version = "1", features = ["v4"] } +chrono = { version = "0.x", features = ["serde"] } + +[dev-dependencies] +tokio = { version = "1", features = ["full"] } +``` + +## Design Decisions + +| Decision | Rationale | +|----------|-----------| +| petgraph over custom graph | Nearly 1:1 mapping to graphology operations; well-maintained; fast | +| `HashMap` for key lookups | Matches SQLite `key` column pattern; O(1) lookup by string key | +| serde_json for attributes | Matches SQLite `attributes TEXT JSON` column; dynamic validation via jsonschema | +| Separate crates for flowgraph and storage | Flowgraph is pure in-memory graph ops; storage is SQLite persistence; different dependency sets | +| `NodeAttributes` / `EdgeAttributes` traits | Generic over attribute types, matching flowgraph's type parameter pattern | +| DAG enforcement at construction | Matches TypeScript flowgraph: `fromSpecs()` throws `CycleError` | +| `filter_by_status` is O(n) | Matches TypeScript: small graphs (tens to hundreds of nodes), no index needed | + +## References + +- `@alkdev/flowgraph` — TypeScript implementation (call-graph, operation-graph) +- `@alkdev/operations` — `OperationSpec`, `CallHandler`, `PendingRequestMap` +- `/workspace/petgraph` — Graph data structure crate +- `/workspace/jsonschema` — JSON Schema validation crate +- `/workspace/@alkdev/storage/docs/architecture/metagraph-module.md` — TypeBox Module pattern +- `/workspace/@alkdev/storage/docs/architecture/sqlite-host.md` — SQLite table definitions +- `/workspace/@alkdev/storage/docs/architecture/acl.md` — ACL as metagraph \ No newline at end of file diff --git a/research/storage.md b/research/storage.md new file mode 100644 index 0000000..06c365e --- /dev/null +++ b/research/storage.md @@ -0,0 +1,348 @@ +# Alknet Storage: Metagraph, Identity, ACL, and Honker Integration + +> Status: Research / Draft +> Last updated: 2026-06-05 + +## Overview + +`alknet-storage` is a Rust crate providing SQLite-backed graph storage, identity management, access control, and reactivity via honker. It mirrors the TypeScript `@alkdev/storage` package's design (`sqlite-host.md`, `metagraph-module.md`, `acl.md`) while leveraging Rust's type system and petgraph's performance. + +## Crate Decomposition + +``` +alknet-storage +├── metagraph/ — GraphType, NodeType, EdgeType definitions and persistence +├── identity/ — accounts, organizations, peer_credentials, api_keys, audit_logs +├── acl/ — PrincipalNode, DelegatesEdge, access control graph +├── honker/ — honker integration: notify, stream, queue, event bridge +├── graph/ — GraphInstance, Node, Edge CRUD with schema validation +└── schema/ — JSON Schema definitions (serde + jsonschema for runtime validation) +``` + +## Metagraph Data Model + +The metagraph is a three-level type system (mirrors `@alkdev/storage` exactly): + +1. **GraphType** — A class of graphs (e.g., "call-graph", "acl", "task-dependencies"). Defines structural constraints (directed/undirected/mixed, allows self-loops, multi-edges). +2. **NodeType** — A category of node within a graph type (e.g., "call", "account", "task"). Each node type has a JSON Schema that validates the `attributes` of nodes belonging to that type. +3. **EdgeType** — A category of edge within a graph type (e.g., "triggered", "can_read", "depends_on"). Each edge type has a JSON Schema for its attributes. Optionally constrains which source/target node types are valid. + +**Graph instances** belong to a graph type and contain **Nodes** and **Edges** conforming to those type definitions. + +### Rust Types + +```rust +pub struct GraphType { + pub id: String, + pub name: String, // "call-graph", "acl" + pub description: String, + pub config: GraphConfig, // directed/undirected/mixed, multi, self-loops + pub version: u32, + pub scope: Scope, // System, Tenant, User + pub metadata: serde_json::Value, +} + +pub struct GraphConfig { + pub graph_type: GraphDirection, // Directed, Undirected, Mixed + pub multi: bool, + pub allow_self_loops: bool, +} + +pub enum Scope { + System, + Tenant, + User, +} + +pub struct NodeType { + pub id: String, + pub graph_type_id: String, + pub name: String, // "call", "account" + pub description: String, + pub schema: serde_json::Value, // JSON Schema for node attributes +} + +pub struct EdgeType { + pub id: String, + pub graph_type_id: String, + pub name: String, // "triggered", "can_read" + pub description: String, + pub schema: serde_json::Value, // JSON Schema for edge attributes + pub allowed_source_types: Vec, // [] = no restriction + pub allowed_target_types: Vec, +} + +pub struct Graph { + pub id: String, + pub graph_type_id: String, + pub name: String, + pub description: String, + pub status: GraphStatus, // Active, Archived, Draft + pub owner_id: Option, + pub project_id: Option, + pub metadata: serde_json::Value, +} + +pub enum GraphStatus { + Active, + Archived, + Draft, +} + +pub struct Node { + pub id: String, + pub graph_id: String, + pub key: String, // Consumer-defined identity within the graph + pub attributes: serde_json::Value, // Validated by node type schema + pub metadata: serde_json::Value, +} + +pub struct Edge { + pub id: String, + pub graph_id: String, + pub key: Option, // Null for anonymous edges + pub source_node_key: String, + pub target_node_key: String, + pub attributes: serde_json::Value, // Validated by edge type schema + pub undirected: bool, + pub metadata: serde_json::Value, +} +``` + +### SQLite Tables (mirrors `sqlite-host.md`) + +Common columns on all tables: `id TEXT PK`, `metadata TEXT JSON DEFAULT '{}'`, `created_at INTEGER TIMESTAMP DEFAULT (strftime('%s','now'))`, `updated_at INTEGER TIMESTAMP DEFAULT (strftime('%s','now'))`. + +**graph_types**: `id`, `name TEXT UNIQUE`, `description TEXT DEFAULT ''`, `config TEXT JSON NOT NULL`, `version INTEGER NOT NULL DEFAULT 1`, `scope TEXT NOT NULL DEFAULT 'system'` + +**node_types**: `id`, `graph_type_id TEXT FK → graph_types.id CASCADE`, `name TEXT NOT NULL`, `description TEXT DEFAULT ''`, `schema TEXT JSON NOT NULL`. Unique constraint: `(graph_type_id, name)`. + +**edge_types**: `id`, `graph_type_id TEXT FK → graph_types.id CASCADE`, `name TEXT NOT NULL`, `description TEXT DEFAULT ''`, `schema TEXT JSON NOT NULL`, `allowed_source_types TEXT JSON DEFAULT '[]'`, `allowed_target_types TEXT JSON DEFAULT '[]'`. Unique constraint: `(graph_type_id, name)`. + +**graphs**: `id`, `graph_type_id TEXT FK → graph_types.id SET NULL`, `name TEXT NOT NULL`, `description TEXT DEFAULT ''`, `status TEXT NOT NULL DEFAULT 'draft'`, `owner_id TEXT`, `project_id TEXT`. Indexes on `(owner_id)`, `(project_id)`, `(owner_id, project_id)`. + +**nodes**: `id`, `graph_id TEXT FK → graphs.id CASCADE`, `key TEXT NOT NULL`, `attributes TEXT JSON NOT NULL DEFAULT '{}'`. Unique constraint: `(graph_id, key)`. No `node_type_id` column (ADR-020). + +**edges**: `id`, `graph_id TEXT FK → graphs.id CASCADE`, `key TEXT`, `source_node_key TEXT NOT NULL`, `target_node_key TEXT NOT NULL`, `attributes TEXT JSON NOT NULL DEFAULT '{}'`, `undirected INTEGER DEFAULT 0`. Unique constraint: `(graph_id, key)`. FK: `source_node_key`, `target_node_key` reference `(nodes.graph_id, nodes.key)` with CASCADE delete (ADR-022). + +### System DB vs Tenant DB (ADR-040) + +- **System DB** (`system.db`): Identity tables (accounts, organizations, peer_credentials, api_keys, audit_logs) + system-scoped graph types. +- **Tenant DB** (`tenant-{orgId}.db`): Metagraph tables (graph_types, node_types, edge_types, graphs, nodes, edges) + tenant-scoped graph types. + +No FK constraints across database files. Consumer enforces referential integrity at application layer. + +## Identity Tables + +Mirrors `sqlite-host.md` identity tables with the same column definitions and FK cascades: + +**accounts**: `email TEXT UNIQUE NOT NULL`, `display_name TEXT`, `access_level TEXT NOT NULL DEFAULT 'user'` (admin/user/service), `status TEXT NOT NULL DEFAULT 'active'` (active/suspended/deactivated). + +**organizations**: `name TEXT UNIQUE NOT NULL`, `slug TEXT UNIQUE NOT NULL`, `owner_id TEXT FK → accounts.id RESTRICT`. + +**organization_members**: `org_id TEXT FK → organizations.id CASCADE`, `account_id TEXT FK → accounts.id CASCADE`, `membership_level TEXT NOT NULL` (owner/admin/member). Unique constraint: `(org_id, account_id)`. + +**api_keys**: `owner_id TEXT FK → accounts.id CASCADE`, `key_hash TEXT UNIQUE NOT NULL`, `name TEXT`, `enabled INTEGER NOT NULL DEFAULT 1`, `expires_at INTEGER TIMESTAMP`, `revoked_at INTEGER TIMESTAMP`, `rotated_to_id TEXT`, `last_used_at INTEGER TIMESTAMP`. + +**peer_credentials**: `owner_id TEXT FK → accounts.id CASCADE`, `credential_type TEXT NOT NULL` (ssh_key/cert_authority), `fingerprint TEXT UNIQUE NOT NULL`, `public_key_data TEXT NOT NULL`, `name TEXT`, `enabled INTEGER NOT NULL DEFAULT 1`, `expires_at INTEGER TIMESTAMP`, `revoked_at INTEGER TIMESTAMP`. + +**audit_logs**: `action TEXT NOT NULL`, `owner_id TEXT FK → accounts.id RESTRICT`, `credential_id TEXT`, `credential_type TEXT`, `org_id TEXT FK → organizations.id SET NULL`, `details TEXT JSON`. + +## Access Control (ACL) as Metagraph + +Mirrors `@alkdev/storage acl.md`: + +### AclGraph Module + +```rust +// Graph config: directed, multi=false, allowSelfLoops=false +pub const ACL_GRAPH_CONFIG: GraphConfig = GraphConfig { + graph_type: GraphDirection::Directed, + multi: false, + allow_self_loops: false, +}; + +// Node types +pub const PRINCIPAL_NODE: &str = "principal"; +pub const RESOURCE_NODE: &str = "resource"; + +// Edge types +pub const CAN_READ_EDGE: &str = "can_read"; +pub const CAN_WRITE_EDGE: &str = "can_write"; +pub const CAN_EXECUTE_EDGE: &str = "can_execute"; +pub const BELONGS_TO_EDGE: &str = "belongs_to"; +pub const DELEGATES_EDGE: &str = "delegates"; + +// PrincipalNode attributes +pub struct PrincipalNodeAttrs { + pub identity_type: IdentityType, // Account, Org, Service, Role + pub identity_id: String, // FK to accounts.id or organizations.id + pub scopes: Vec, + pub resources: Option>>, +} + +pub enum IdentityType { + Account, + Org, + Service, + Role, +} + +// DelegatesEdge attributes +pub struct DelegatesEdgeAttrs { + pub narrowed_scopes: Vec, // Subset of delegator's scopes + pub narrowable: bool, // Can the delegate further narrow? +} +``` + +### Principal-Agent Hierarchy + +- **Account** nodes represent individual users +- **Org** nodes represent organizations +- **Service** nodes represent automated agents (LLM workers, spoke credentials) +- **Role** nodes represent named permission sets + +Delegation edges (`delegates`) carry `narrowed_scopes` — the delegate can only exercise scopes that are a subset of the delegator's. Liability flows upward; permissions flow downward with narrowing. + +### BelongsToEdge (Derived from org_members) + +ADR-045: The `organization_members` SQL table is the authoritative source. When membership changes, the consumer writes the SQL row first, then creates or removes the ACL `belongs_to` edge. The edge is derived, not the source of truth. + +### Operation-Level ACL + +`OperationSpec.access_control` maps to ACL graph traversal at runtime: + +```rust +pub fn check_access( + acl_graph: &Graph, + principal_key: &str, + operation_spec: &OperationSpec, +) -> bool { + // Traverse from PrincipalNode to ResourceNode + // Check if any path satisfies required_scopes (AND) and required_scopes_any (OR) + // Honor delegation chains with scope narrowing +} +``` + +## Honker Integration + +### Reactivity Pattern (ADR-047) + +Every mutation is atomic with a notification: + +```rust +// Insert a node and notify in one transaction +tx.execute( + "INSERT INTO nodes (id, graph_id, key, attributes) VALUES (?, ?, ?, ?)", + &[&node_id, &graph_id, &key, &attrs_json], +)?; +tx.stream_publish("nodes:created", &node_attrs_json)?; +``` + +This mirrors the TypeScript pattern from `sqlite-host.md` but in Rust, using honker's SQLite extension functions: + +```rust +use honker::Database; + +let db = Database::open("tenant.db")?; + +// Transactional: business write + event stream publish commit together +let mut tx = db.transaction()?; +tx.execute("INSERT INTO nodes (id, graph_id, key, attributes) VALUES (?, ?, ?, ?)", ...)?; +tx.stream_publish("nodes:created", &attrs)?; +tx.commit()?; + +// Subscribe to changes +let stream = db.stream("nodes:created"); +async for event in stream.subscribe("alknet-node-watcher") { + // event is a serde_json::Value +} +``` + +### Honker Features Used + +| Feature | Use case | +|---------|----------| +| `stream_publish` / `subscribe` | Durable pub/sub for node/edge/membership changes with per-consumer offsets | +| `notify` / `listen` | Ephemeral pub/sub for real-time control channel events | +| `queue` / `claim` / `ack` | Task queue for async operations (key rotation, ACL evaluation) | +| `scheduler` | Periodic tasks (session cleanup, audit log pruning) | + +### Database Concurrency + +- WAL mode (default) for concurrent reads during writes +- Single writer per `.db` file +- `busy_timeout=5000` default +- `PRAGMA data_version` polling for cross-process wake (honker pattern) +- `max_readers=4` concurrent read connections in the reader pool + +## JSON Schema Validation + +TypeBox from TypeScript maps to `serde_json::Value` + `jsonschema` in Rust: + +| TypeScript | Rust | +|-----------|------| +| `Type.Object({...})` | `serde_json::json!({...})` as JSON Schema | +| `Value.Check(schema, data)` | `jsonschema::validate(&schema, &data)` | +| `Type.Module({...})` | JSON Schema with `$defs` stored in DB | +| `Type.Composite([A, B])` | Merge + intersect via `serde_json` merge logic | + +The `jsonschema` crate provides runtime validation analogous to TypeBox's `Value.Check()`. Schema definitions are stored as `serde_json::Value` in the `schema` column of `node_types` and `edge_types` tables. + +## Crate Dependency Map + +```toml +[dependencies] +honker = "0.x" # SQLite extension with pub/sub/queue +serde = { version = "1", features = ["derive"] } +serde_json = "1" +jsonschema = "0.x" # JSON Schema validation (runtime) +petgraph = "0.x" # Graph data structure (shared with alknet-flowgraph) +rusqlite = { version = "0.x", features = ["bundled"] } # SQLite access (via honker) +uuid = { version = "1", features = ["v4"] } +chrono = "0.x" +thiserror = "1" +tokio = { version = "1", features = ["full"] } +``` + +## Multi-Tenant Replication Path + +For the private use case: single `.db` files, honker for reactivity, no cross-database FK constraints. + +For the distributed use case (later): + +1. **Smart contracts** (Base L2) own namespace identity → `ownerId` field on `graphs` table +2. **alknet-relay** gossips namespace availability via iroh-gossip or call protocol subscriptions +3. **ACL inference** — Contract `collaborators` → ACL graph `DelegatesEdge` entries +4. **Honker streams** — `stream_subscribe("nodes:modified")` carries mutations to relay subscribers + +Replication mindset from the start: **every write is atomic with a notification**. The honker stream event is the replication unit. A future replicator reads `_honker_stream_*` tables and propagates changes to subscribed relays. + +## Design Decisions (mapped from TypeScript ADRs) + +| Original ADR | Decision | Rust adaptation | +|-------------|----------|-----------------| +| 002 | Metagraph over domain tables | Same 6-table schema, same graph type/node type/edge type model | +| 008 | Common columns pattern | `id`, `metadata`, `created_at`, `updated_at` on all tables | +| 019 | JSON text for schema columns | `serde_json::Value` stored as TEXT in SQLite | +| 020 | No nodeTypeId on nodes | Node type enforced at application layer | +| 022 | Composite FKs for node refs | `source_node_key` + `target_node_key` with cascade | +| 034 | ACL as metagraph | AclGraph is a metagraph instance | +| 038 | SQLite-first, PG removed | SQLite only via honker | +| 040 | System DB + tenant DB | Two `.db` files | +| 041 | Identity tables in storage | Same tables, same constraints | +| 045 | org_members authoritative | SQL table is source of truth, BelongsToEdge is derived | +| 047 | Honker event target | honker stream/notify as pub/sub mechanism | +| 049 | Identity schema restructuring | Separate credential tables, no Gitea columns | +| 050 | SHA-256 for API key hashing | Fast hash for high-entropy machine keys | + +## References + +- `@alkdev/storage` — TypeScript metagraph, identity, ACL implementation +- `@alkdev/flowgraph` — TypeScript call-graph and operation-graph (maps to petgraph in Rust) +- `@alkdev/operations` — TypeScript OperationSpec, CallHandler, registry +- `/workspace/honker` — SQLite extension with pub/sub, streams, queues +- `/workspace/polyglot` — SQL transpiler (future: schema migration validation) +- `/workspace/petgraph` — Graph data structure library (used in alknet-flowgraph) +- `/workspace/jsonschema` — JSON Schema validation (Rust, replaces TypeBox at runtime) +- `/workspace/iroh/iroh-dns` — DNS resolver and endpoint info \ No newline at end of file