From d149932e2aa1cde15cba50d1ca37c78bcc16253d Mon Sep 17 00:00:00 2001 From: "glm-5.2" Date: Wed, 24 Jun 2026 10:02:03 +0000 Subject: [PATCH] tasks: decompose review #004 findings into 4 fix tasks + review gate W1 (call/protocol/abort-cascade-wiring): wire AbortCascade into CallAdapter handle_stream for EVENT_ABORTED. W2 (core/endpoint-client-fingerprint): extract TLS client cert fingerprint in dispatch_quinn/dispatch_iroh. W3 (vault/mnemonic-debug-redaction): replace Mnemonic derive(Debug) with redacting impl. W4 (core/auth-apikey-resources, level: research): decide whether ApiKeyEntry should carry resources, then implement or drop from spec. review-post-impl-fixes gates on all four. Graph: 33 tasks, 12 gens. --- tasks/call/protocol/abort-cascade-wiring.md | 130 ++++++++++++++++++++ tasks/core/auth-apikey-resources.md | 117 ++++++++++++++++++ tasks/core/endpoint-client-fingerprint.md | 118 ++++++++++++++++++ tasks/review-post-impl-fixes.md | 96 +++++++++++++++ tasks/vault/mnemonic-debug-redaction.md | 110 +++++++++++++++++ 5 files changed, 571 insertions(+) create mode 100644 tasks/call/protocol/abort-cascade-wiring.md create mode 100644 tasks/core/auth-apikey-resources.md create mode 100644 tasks/core/endpoint-client-fingerprint.md create mode 100644 tasks/review-post-impl-fixes.md create mode 100644 tasks/vault/mnemonic-debug-redaction.md diff --git a/tasks/call/protocol/abort-cascade-wiring.md b/tasks/call/protocol/abort-cascade-wiring.md new file mode 100644 index 0000000..8af80a2 --- /dev/null +++ b/tasks/call/protocol/abort-cascade-wiring.md @@ -0,0 +1,130 @@ +--- +id: call/protocol/abort-cascade-wiring +name: Wire AbortCascade into CallAdapter inbound event path (ADR-016) +status: pending +depends_on: [call/protocol/abort-cascade] +scope: narrow +risk: medium +impact: component +level: implementation +--- + +## Description + +`AbortCascade::cascade_abort` is implemented and unit-tested in +`crates/alknet-call/src/protocol/abort.rs` (20 tests cover depth-3 cascades, +mixed Call/Subscribe entries, both `AbortPolicy` variants, and the +"unknown root silently discarded" rule), but no caller invokes it. +`CallAdapter::handle_stream` (adapter.rs:210–213) only dispatches +`EVENT_REQUESTED`; an inbound `call.aborted` event is logged at `debug!` +and dropped. As a result, ADR-016's cascade is a documented property +that does not hold at runtime — when a wire client aborts a parent +request, the parent's composed descendants keep running to completion, +which is exactly the wasted-work / unwanted-side-effects case ADR-016 +was written to prevent. + +This task adds the missing bolt between the two halves. + +### Wire visibility (read before implementing) + +ADR-016 Decision 2 (decisions/016-...:220–224) and the abort-cascade +task (tasks/call/protocol/abort-cascade.md:68–74) are explicit: +**composed child request_ids are internal**. The client only sees +`call.aborted` for the root ID it sent; the server cascades internally. +So the wiring should: + +1. Receive the inbound `call.aborted` for `root_request_id`. +2. Call `AbortCascade::cascade_abort(root_request_id, AbortPolicy::AbortDependents)` + — the wire caller does not choose the policy (ADR-016 Decision 6: + the root gets the default `AbortDependents`; `ContinueRunning` is a + handler-level opt-in for children, not a wire field). +3. Drop each descendant's pending entry (which cancels its future via + Rust's async drop semantics — no `call.aborted` is sent on the wire + for descendants). +4. Drop the root's pending entry (the trigger event already came from + the wire; the server mirrors the abort locally). + +Do **not** send `call.aborted` frames back to the client for descendant +IDs — that would leak internal composition structure to the wire. + +### Implementation sketch + +In `CallAdapter::handle_stream` (adapter.rs:200–228), replace the +`continue` branch with a match on event type: + +```rust +match envelope.r#type.as_str() { + EVENT_REQUESTED => { /* existing dispatch path */ } + EVENT_ABORTED => { + let request_id = envelope.id.clone(); + let mut pending = connection.pending().lock(); + let mut cascade = AbortCascade::new(&mut pending); + let aborted = cascade.cascade_abort(&request_id, AbortPolicy::AbortDependents); + // also abort the root itself (cascade_abort does not touch the root) + pending.handle_aborted(&request_id); + if !aborted.is_empty() { + tracing::debug!(count = aborted.len(), "abort cascade evicted descendants"); + } + } + other => { + debug!(event_type = %other, id = %envelope.id, "ignoring non-requested/non-aborted event on inbound stream"); + } +} +``` + +`AbortCascade` already takes `&mut PendingRequestMap`; `CallConnection::pending()` +returns `&Arc>` (connection.rs:53–55). Import +`AbortCascade` and `AbortPolicy` from `super::abort` and +`crate::registry::context` respectively. + +### Integration test (the test that would have caught the gap) + +Add an integration test in `adapter.rs`'s `tests` module that exercises +the full path: + +1. Build a `CallAdapter` with a registry containing one parent op + (`parent/run`) whose handler calls `env.invoke("child", "run", ...)`. +2. Register `child/run` in the same registry. +3. Open a stub `Connection`, construct a `CallConnection`, manually + register a pending entry for the parent's request_id, and simulate + a composed child by registering a second pending entry with + `parent_request_id: Some(parent_id)`. +4. Send an `EventEnvelope::aborted(parent_id)` frame through + `handle_stream`. +5. Assert both the parent and child entries are gone from + `PendingRequestMap`. + +The existing unit tests on `AbortCascade` cover the tree-walking logic; +this test only needs to confirm the wiring — that an inbound abort +frame actually reaches `cascade_abort`. + +## Acceptance Criteria + +- [ ] `CallAdapter::handle_stream` handles `EVENT_ABORTED` (not just `EVENT_REQUESTED`) +- [ ] Inbound `call.aborted` triggers `AbortCascade::cascade_abort` with `AbortPolicy::AbortDependents` +- [ ] Root request's pending entry is also removed (cascade_abort skips the root) +- [ ] No `call.aborted` frames are sent on the wire for descendant IDs (internal-only cascade) +- [ ] Non-requested, non-aborted events still log at `debug!` and continue +- [ ] Integration test: parent abort removes parent + child from `PendingRequestMap` +- [ ] Integration test: abort for unknown request_id is a no-op (no panic, no removal) +- [ ] `cargo test -p alknet-call` succeeds +- [ ] `cargo clippy -p alknet-call --all-targets` succeeds with no warnings + +## References + +- docs/reviews/004-post-implementation-sanity-check.md — W1 (full finding) +- docs/architecture/decisions/016-abort-cascade-for-nested-calls.md — ADR-016 (Decision 2: server-side cascade; Decision 6: policy is handler-set, not wire-set) +- docs/architecture/crates/call/call-protocol.md:497 — spec requiring the CallAdapter to walk the tree +- tasks/call/protocol/abort-cascade.md — completed task that built `AbortCascade` in isolation +- crates/alknet-call/src/protocol/abort.rs — existing `AbortCascade` impl +- crates/alknet-call/src/protocol/adapter.rs:200–228 — `handle_stream` (the site to modify) + +## Notes + +> The abort-cascade task (call/protocol/abort-cascade) built and tested +> `AbortCascade` but its acceptance criteria did not include wiring it +> into `CallAdapter::handle_stream`. The call-adapter task's acceptance +> criteria likewise omitted "inbound `call.aborted` triggers cascade." +> This task closes that integration gap — all the hard logic already +> exists and is tested; this task adds the ~30-line bolt and the one +> integration test that would have caught the gap. \ No newline at end of file diff --git a/tasks/core/auth-apikey-resources.md b/tasks/core/auth-apikey-resources.md new file mode 100644 index 0000000..b6a6ddf --- /dev/null +++ b/tasks/core/auth-apikey-resources.md @@ -0,0 +1,117 @@ +--- +id: core/auth-apikey-resources +name: Reconcile ApiKeyEntry.resources — add field to type and populate in resolve_api_key, or drop from spec +status: pending +depends_on: [] +scope: narrow +risk: low +impact: component +level: research +--- + +## Description + +Three-way mismatch between spec, type, and implementation for +resource-scoped ACLs on API-key-authenticated identities: + +- **Spec** (`docs/architecture/crates/core/auth.md:153`): + > "Token: ... return `Identity { id: prefix, scopes: entry.scopes, + > resources: entry.resources }`." + The spec references `entry.resources`. + +- **Type** (`crates/alknet-core/src/config.rs:55–62`): `ApiKeyEntry` has + fields `prefix, hash, scopes, description, expires_at` — there is no + `resources` field. So `entry.resources` in the spec cannot be + implemented as written. + +- **Implementation** (`config.rs:113–117`): `resolve_api_key` constructs + the resolved `Identity` with `resources: std::collections::HashMap::new()` + — resources are always empty, regardless of what the API key grants. + +The same gap exists in `resolve_identity_from_fingerprint` +(config.rs:69–79), which also returns `resources: HashMap::new()`. + +### Impact + +Latent today: no operation in the workspace uses resource-based ACLs +against a token- or fingerprint-resolved identity. The +`AccessControl::resource_type` / `resource_action` fields exist in +`OperationSpec` (spec.rs:32–37) and are tested (spec.rs:284–303), but +those tests always hand-construct `Identity.resources` directly — +never via the resolver path. The moment an operation declares a +resource-scoped ACL and a caller authenticates via API key, the ACL +check will fail with "missing resource" even if the key was granted +that resource in config — because `resources` is always empty. + +### This is a research/decision task, not an implementation task + +The decomposer rule applies: **the architecture is ambiguous** on +whether API keys should grant resource-scoped access. Two valid designs +exist; pick one and document it before implementing. Do not implement +until the decision is made. + +**Option A — add `resources` to `ApiKeyEntry`** (matches current spec): +- Add `pub resources: HashMap>` to `ApiKeyEntry`. +- Update `resolve_api_key` to populate `Identity.resources` from + `entry.resources`. +- Update `resolve_identity_from_fingerprint` similarly — either add a + `resources` field to the fingerprint config path, or document that + fingerprint auth grants scopes only (resources empty). +- Update `auth.md`'s token resolution example to match the new field. +- Define the TOML schema for `resources` in `AuthPolicy` (when a TOML + schema is added — currently config is built in code, not parsed). +- Resource-scoped ACLs then work for both auth paths. + +**Option B — drop `resources` from the spec for API keys**: +- Remove `entry.resources` from `auth.md:153`. +- Document that API keys grant scopes only; resource-scoped access + requires a different identity source (e.g., a future OAuth/JWT + provider that carries resource claims). +- `Identity.resources` stays in the type (it's used by hand-constructed + identities in tests and by `CompositionAuthority::as_identity` for + internal calls) but token/fingerprint resolvers always return empty. +- Resource-scoped ACLs against token identities return `Forbidden` — + this becomes a documented limitation, not a bug. + +### Deliverable + +Produce a short decision note (a paragraph in `auth.md` under +"Identity Resolution" — or a new ADR if the decision feels +consequential enough) that picks A or B and justifies it. Then either +implement the chosen option in the same task (if small) or split a +follow-up `level: implementation` task gated on this one. + +The decision should consider: do any planned operations (in the +upcoming alknet-ssh, alknet-fs, alknet-git crates) need resource-scoped +ACLs on API-key identities? If yes, A. If resource ACLs are only ever +applied to handler-internal composition identities +(`CompositionAuthority`), B is fine and simpler. + +## Acceptance Criteria + +- [ ] Decision made: Option A or Option B +- [ ] Decision documented in `auth.md` (or a new ADR if consequential) +- [ ] If Option A: `ApiKeyEntry.resources` added, `resolve_api_key` populates `Identity.resources`, `resolve_identity_from_fingerprint` handling decided and documented, `auth.md:153` matches the new shape +- [ ] If Option B: `auth.md:153` corrected to drop `entry.resources`, limitation documented +- [ ] Either way: a test covering the chosen behavior (token resolves with resources, or token resolves with empty resources + documented limitation) +- [ ] `cargo test -p alknet-core` succeeds +- [ ] `cargo clippy -p alknet-core --all-targets` succeeds with no warnings + +## References + +- docs/reviews/004-post-implementation-sanity-check.md — W4 (full finding) +- docs/architecture/crates/core/auth.md:152–153 — spec text referencing `entry.resources` +- crates/alknet-core/src/config.rs:55–62 — `ApiKeyEntry` (missing `resources`) +- crates/alknet-core/src/config.rs:69–118 — both resolvers returning empty `resources` +- crates/alknet-call/src/registry/spec.rs:77–103 — `AccessControl::check` resource path (the consumer that would fail) +- crates/alknet-call/src/registry/context.rs:58–65 — `CompositionAuthority::as_identity` (the internal-call path that does populate `resources`) + +## Notes + +> This is a `level: research` task because the fix is small but the +> decision is not. The decomposer principle: if architecture is +> ambiguous, do not proceed with implementation — escalate. Make the +> decision first, then implement. If the decision is A and the +> implementation is more than ~30 lines, split a follow-up +> `level: implementation` task (`core/auth-apikey-resources-impl`) +> depending on this one. \ No newline at end of file diff --git a/tasks/core/endpoint-client-fingerprint.md b/tasks/core/endpoint-client-fingerprint.md new file mode 100644 index 0000000..1bd7202 --- /dev/null +++ b/tasks/core/endpoint-client-fingerprint.md @@ -0,0 +1,118 @@ +--- +id: core/endpoint-client-fingerprint +name: Extract TLS client certificate fingerprint in endpoint dispatch (ADR-004) +status: pending +depends_on: [] +scope: narrow +risk: medium +impact: component +level: implementation +--- + +## Description + +Both dispatch functions in `crates/alknet-core/src/endpoint.rs` hard-code +`tls_client_fingerprint: None` when calling `build_auth_context` +(endpoint.rs:306 and 396). As a result, `AuthContext.identity` (the +endpoint-resolved identity) is always `None` at the endpoint layer, and +all identity resolution is deferred to handler-level code. The +endpoint-level auth resolution path described in +`docs/architecture/crates/core/auth.md:159–171` is non-functional: + +> "QUIC connection arrives → TLS handshake → Extract TLS client +> certificate fingerprint (if presented) → If fingerprint present: +> `IdentityProvider::resolve_from_fingerprint()` → `auth.identity = +> Some(identity)` → Construct `AuthContext { identity, alpn, remote_addr, +> tls_client_fingerprint }`" + +This matters most for P2P nodes using RFC 7250 raw Ed25519 keys (the +"default for most alknet nodes" per OQ-12), where the connection-level +identity *is* the TLS client cert — there is no separate protocol-level +credential to extract. Without endpoint-level fingerprint extraction, +a raw-key peer connecting to an `alknet/call` endpoint cannot be +identified by fingerprint at the endpoint layer. + +### Quinn path + +`extract_quinn_alpn` (endpoint.rs:316–326) already downcasts +`connection.handshake_data()` to `quinn::crypto::rustls::HandshakeData`. +The same `HandshakeData` struct exposes the peer's client certificate +chain when one was presented. Extract the chain, hash the leaf cert's +DER to a `SHA256:`-prefixed fingerprint string (matching the format +`AuthPolicy::resolve_identity_from_fingerprint` expects — see +auth.md:152), and pass it to `build_auth_context` in place of `None`. + +Note: `rustls::ServerConfig` is currently built with +`with_no_client_auth()` (endpoint.rs:450, 463, 473), so the server does +not *request* client certs. To actually receive a client cert, the +server config must use `with_client_auth()` or an equivalent that +requests but does not require client certs (raw-public-key peers +present their Ed25519 key as the "client cert" in RFC 7250 mode). This +is the one design decision to make in this task: whether to switch from +`with_no_client_auth()` to a "request-but-don't-require" mode, or to +leave `with_no_client_auth()` and accept that fingerprints only flow +when the client opts to present a cert unbidden. The RFC 7250 raw-key +path (the `RawKeyCertResolver` at endpoint.rs:565–595) already +advertises `only_raw_public_keys() -> true`, which is the server-side +half of RFC 7250; the client-side presentation is set by the client's +`rustls::ClientConfig`, not by the server. Read ADR-004 and OQ-12 +before deciding. + +### Iroh path + +iroh's `Connection` exposes the peer's `NodeId` (the raw Ed25519 +public key) via the connection's TLS session metadata. In iroh's model +the `NodeId` *is* the fingerprint — it's the raw-public-key identity. +Extract it and format as a `NodeId:`-prefixed string (or `SHA256:` of +the public key bytes — match whatever `AuthPolicy`'s fingerprint set +is expected to contain). Look at `iroh::endpoint::Connection` methods +and the `iroh::tls::Lts` / peer-certificate accessor for the exact API. + +### Fingerprint format + +`AuthPolicy::resolve_identity_from_fingerprint` (config.rs:69–79) does +a literal `HashSet::contains()` check — it does not normalize. So +whatever format the extractor produces must be the same format the +operator configures in `authorized_fingerprints`. The existing +fingerprint test (auth.rs:145–153) uses `"SHA256:abc123"` as a +placeholder. Pick a concrete format and document it in `auth.md` (the +spec is currently silent on the exact string format). Suggested: +`SHA256:` for X.509, `ed25519:` +for raw keys — but confirm against any existing fingerprint producer +in the codebase before committing. + +## Acceptance Criteria + +- [ ] `dispatch_quinn` extracts client cert fingerprint from `HandshakeData` when present +- [ ] `dispatch_iroh` extracts peer `NodeId` (or equivalent raw-key fingerprint) when present +- [ ] `build_auth_context` receives `Some(fingerprint)` when a client cert was presented, `None` otherwise +- [ ] `AuthContext.identity` is `Some(identity)` when the fingerprint resolves via `IdentityProvider`, `None` otherwise (no regression for the no-cert case) +- [ ] Server config decision (request-but-don't-require vs. no-client-auth) is made and documented +- [ ] Fingerprint string format is chosen, documented in `auth.md`, and consistent between extractor and `AuthPolicy::authorized_fingerprints` config +- [ ] Unit test: quinn path with a presented client cert → `auth.tls_client_fingerprint` is `Some(...)` +- [ ] Unit test: quinn path with no client cert → `auth.tls_client_fingerprint` is `None` (existing behavior preserved) +- [ ] Unit test: iroh path → `auth.tls_client_fingerprint` is `Some(NodeId-format)` when peer identity is available +- [ ] `cargo test -p alknet-core --all-features` succeeds +- [ ] `cargo clippy -p alknet-core --all-features --all-targets` succeeds with no warnings + +## References + +- docs/reviews/004-post-implementation-sanity-check.md — W2 (full finding) +- docs/architecture/crates/core/auth.md:159–171 — endpoint-level resolution flow spec +- docs/architecture/crates/core/auth.md:152 — fingerprint format used by `resolve_identity_from_fingerprint` +- docs/architecture/decisions/004-auth-as-shared-core.md — ADR-004 (hybrid resolution) +- docs/architecture/open-questions.md — OQ-12 (TLS identity provisioning) +- crates/alknet-core/src/endpoint.rs:306, 396 — the two `None` sites to fix +- crates/alknet-core/src/endpoint.rs:316–326 — `extract_quinn_alpn` (pattern to follow for `HandshakeData` downcast) +- crates/alknet-core/src/endpoint.rs:565–595 — `RawKeyCertResolver` (RFC 7250 server-side half) + +## Notes + +> If the server-config decision (request-but-don't-require client auth) +> is too large for this task's scope, split it: implement extraction +> first (this task, gated on the cert being presented *if* one arrives), +> then a follow-up task switches the server config to actually request +> client certs. The extraction code is correct either way — it returns +> `None` when no cert was presented, which is the current behavior, so +> landing extraction first is a safe no-op until the server config +> changes. \ No newline at end of file diff --git a/tasks/review-post-impl-fixes.md b/tasks/review-post-impl-fixes.md new file mode 100644 index 0000000..a3bdcba --- /dev/null +++ b/tasks/review-post-impl-fixes.md @@ -0,0 +1,96 @@ +--- +id: review-post-impl-fixes +name: Review the four post-implementation sanity-check #004 fixes for spec conformance +status: pending +depends_on: [call/protocol/abort-cascade-wiring, core/endpoint-client-fingerprint, vault/mnemonic-debug-redaction, core/auth-apikey-resources] +scope: moderate +risk: low +impact: phase +level: review +--- + +## Description + +Review the four fixes produced from review #004's findings (W1–W4) +before they are considered closed. Confirm each fix matches the +resolution described in `docs/reviews/004-post-implementation-sanity- +check.md`, does not introduce new spec drift, and is adequately tested. + +### Per-fix review checklist + +**W1 — `call/protocol/abort-cascade-wiring`**: +- `CallAdapter::handle_stream` handles `EVENT_ABORTED` (not just + `EVENT_REQUESTED`). +- Cascade uses `AbortPolicy::AbortDependents` (the wire caller does not + choose the policy — ADR-016 Decision 6). +- No `call.aborted` frames are sent back to the wire for descendant IDs + (ADR-016 Decision 2: server-side cascade; composed child request_ids + are internal). +- Root entry is also removed (cascade_abort skips the root by design). +- Integration test exercises the full path: inbound abort frame → + `PendingRequestMap` entries gone for parent + child. + +**W2 — `core/endpoint-client-fingerprint`**: +- `dispatch_quinn` and `dispatch_iroh` extract a fingerprint when one + is presented (not hard-coded `None`). +- `AuthContext.identity` is populated via `resolve_from_fingerprint` + when the fingerprint resolves. +- Fingerprint string format is documented in `auth.md` and consistent + with `AuthPolicy::authorized_fingerprints`. +- No regression: no-client-cert case still produces + `tls_client_fingerprint: None` and `identity: None`. +- Server-config decision (request-but-don't-require vs. no-client-auth) + is documented. + +**W3 — `vault/mnemonic-debug-redaction`**: +- `Mnemonic` has a manual redacting `Debug` impl; `#[derive(Debug)]` + is gone. +- `format!("{:?}", mnemonic)` does not contain any phrase word. +- `Seed` checked — no `Debug` impl leaks `bytes`. + +**W4 — `core/auth-apikey-resources`**: +- Decision (Option A or B) is documented in `auth.md` or a new ADR. +- Implementation (if any) matches the decision. +- `auth.md:153` no longer references `entry.resources` if Option B was + chosen; or `ApiKeyEntry.resources` exists and is populated if Option + A was chosen. +- Test covers the chosen behavior. + +### Cross-cutting checks + +- `cargo build --workspace --all-features` succeeds. +- `cargo test --workspace --all-features` succeeds (no regressions). +- `cargo clippy --workspace --all-features --all-targets` clean. +- No new spec/code drift introduced (reconcile any spec text touched + against the implementation). +- Update `docs/reviews/004-post-implementation-sanity-check.md`'s + status from `open` to `resolved` once all four findings are confirmed + fixed. + +## Acceptance Criteria + +- [ ] W1 fix confirmed: inbound `call.aborted` cascades to descendants +- [ ] W2 fix confirmed: endpoint extracts TLS client fingerprint +- [ ] W3 fix confirmed: `Mnemonic` `Debug` redacts the phrase +- [ ] W4 fix confirmed: `ApiKeyEntry.resources` reconciled with spec (or spec corrected) +- [ ] `cargo build --workspace --all-features` succeeds +- [ ] `cargo test --workspace --all-features` succeeds +- [ ] `cargo clippy --workspace --all-features --all-targets` succeeds with no warnings +- [ ] Review #004 status updated to `resolved` in its frontmatter + +## References + +- docs/reviews/004-post-implementation-sanity-check.md — the review being closed +- tasks/call/protocol/abort-cascade-wiring.md — W1 fix task +- tasks/core/endpoint-client-fingerprint.md — W2 fix task +- tasks/vault/mnemonic-debug-redaction.md — W3 fix task +- tasks/core/auth-apikey-resources.md — W4 fix task + +## Notes + +> This review task mirrors the pattern of `vault/review-vault-sync`, +> `core/review-core`, and `call/review-call`: a `level: review` gate +> at the end of a fix batch, with `scope: moderate`, `risk: low`, +> `impact: phase`. It does not need to re-derive the findings — review +> #004 already did that work. It only needs to confirm the fixes land +> correctly and the workspace stays green. \ No newline at end of file diff --git a/tasks/vault/mnemonic-debug-redaction.md b/tasks/vault/mnemonic-debug-redaction.md new file mode 100644 index 0000000..ba63b1e --- /dev/null +++ b/tasks/vault/mnemonic-debug-redaction.md @@ -0,0 +1,110 @@ +--- +id: vault/mnemonic-debug-redaction +name: Replace Mnemonic derive(Debug) with redacting impl to prevent seed phrase leak +status: pending +depends_on: [] +scope: single +risk: low +impact: isolated +level: implementation +--- + +## Description + +`Mnemonic` (crates/alknet-vault/src/mnemonic.rs:35) currently derives +`Debug`: + +```rust +#[derive(Debug)] +pub struct Mnemonic { + inner: Bip39Mnemonic, + phrase: String, +} +``` + +The derived `Debug` prints both fields, including `phrase: "abandon +abandon abandon ..."`. The crate's own module doc (mnemonic.rs:8) says +"Seed material is protected with `Zeroize`" and the `Mnemonic::phrase()` +accessor (line 82) warns "Handle with care — this is the root of trust +for all derived keys." The `Zeroize + Drop` impls (lines 88–99) wipe +the phrase from memory on drop — but `Debug` will happily hand the +phrase to any `tracing::debug!`, `format!("{:?}")`, panic backtrace, +or error-context printer that touches a `Mnemonic` value. + +This is inconsistent with the rest of the crate's secret handling: +- `DerivedKey` has a custom redacting `Debug` (protocol.rs:48–56) + printing `private_key: "[REDACTED]"`. +- `EncryptionKey` has a custom redacting `Debug` (encryption.rs:132–139). +- `Capabilities` has a redacting `Debug` (types.rs:112–118). +- `Secret` has a redacting `Debug` (types.rs:51–55). +- `Mnemonic` ... derives `Debug` and prints the phrase. + +The root of trust should never have a `Debug` impl that can print it. +A future debugging session that adds `tracing::debug!(?mnemonic, ...)` +would land the seed phrase in a log file. + +### Resolution + +Replace `#[derive(Debug)]` with a manual impl that redacts, matching +the pattern used for `DerivedKey`: + +```rust +impl std::fmt::Debug for Mnemonic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Mnemonic") + .field("phrase", &"[REDACTED]") + .finish() + } +} +``` + +Also check `Seed` (mnemonic.rs:107–129) — it derives +`#[derive(Clone, Zeroize)]` with `#[zeroize(drop)]` but does not derive +`Debug`. Verify it has no `Debug` impl that prints `bytes`; if it does +(or if a future `derive(Debug)` would), add a redacting impl there too. + +### Test + +Add a test asserting `format!("{:?}", mnemonic)` does not contain any +word from the generated phrase. Pattern after +`test_derived_key_debug_redacts_private_key` (protocol.rs:106–118): + +```rust +#[test] +fn test_mnemonic_debug_redacts_phrase() { + let mnemonic = Mnemonic::generate(24).unwrap(); + let debug_output = format!("{:?}", mnemonic); + assert!( + debug_output.contains("[REDACTED]"), + "Debug must show [REDACTED] for phrase, got: {debug_output}" + ); + for word in mnemonic.phrase().split_whitespace() { + assert!( + !debug_output.contains(word), + "Debug must not leak phrase word '{word}', got: {debug_output}" + ); + } +} +``` + +## Acceptance Criteria + +- [ ] `Mnemonic` no longer derives `Debug`; has a manual redacting `Debug` impl +- [ ] `format!("{:?}", mnemonic)` contains `"[REDACTED]"` and no phrase word +- [ ] `Seed` checked — either has no `Debug` impl, or has a redacting one +- [ ] Unit test: `test_mnemonic_debug_redacts_phrase` passes +- [ ] Existing tests still pass (`cargo test -p alknet-vault`) +- [ ] `cargo clippy -p alknet-vault --all-targets` succeeds with no warnings + +## References + +- docs/reviews/004-post-implementation-sanity-check.md — W3 (full finding) +- crates/alknet-vault/src/protocol.rs:48–56 — `DerivedKey` redacting `Debug` (pattern to follow) +- crates/alknet-vault/src/encryption.rs:132–139 — `EncryptionKey` redacting `Debug` +- crates/alknet-core/src/types.rs:51–55 — `Secret` redacting `Debug` + +## Notes + +> Small fix, but eliminates a latent root-of-trust leak. The same +> pattern (custom redacting `Debug`) is already established in three +> other places in this codebase — this task brings `Mnemonic` in line. \ No newline at end of file