From 0d54eba41e8ef94974f735ed5001cb972fcbc2f5 Mon Sep 17 00:00:00 2001 From: "glm-5.1" Date: Fri, 12 Jun 2026 11:28:31 +0000 Subject: [PATCH] Update architecture specs to reflect live deployment findings and fix two bugs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture updates based on gaps discovered during live deployment testing: - ADR-023: HTTP/2 client-facing support via ALPN-based protocol detection. The spec previously said HTTP/2 was out of scope, but the deployment revealed that modern browsers negotiate HTTP/2 via ALPN. The proxy now correctly detects the negotiated ALPN protocol and uses the appropriate HTTP server builder (http2::Builder for h2, auto::Builder for http/1.1). Upstream connections remain HTTP/1.1. Host resolution now falls back to URI host for HTTP/2 :authority pseudo-headers. - ADR-024: ANSI-disabled logging. All tracing-subscriber layers now use with_ansi(false) to prevent ANSI escape codes in log output, which broke fail2ban regex matching in Docker deployments. Also documents the fail2ban regex anchor fix (^RATE_LIMIT → RATE_LIMIT). Bug fixes found by architecture review: - Fix missing ALPN protocols in manual TLS mode. build_manual_server_config and build_multi_domain_server_config did not set alpn_protocols, meaning manual TLS mode could not support HTTP/2. Added h2 and http/1.1 ALPN entries to both functions (acme-tls/1 only in ACME mode). - Fix missing with_ansi(false) in JSON log format. The init_json function with file output did not disable ANSI on stdout or file layers, which would break fail2ban in production JSON logging mode. Other spec updates: - All document statuses updated from draft to reviewed - proxy.md: documented Server header removal, upstream HTTPS client, two-phase timeout enforcement, HTTP/2 host resolution, connect timeout - tls.md: documented ALPN configuration differing by mode (ACME vs manual) - overview.md: added HTTP/2 client-facing support to scope, updated crate deps (hyper-rustls, rustls-native-certs, hyper-util), clarified out-of-scope - config.md: fixed http_port type (u16→u32) to match implementation, added ANSI-disabled note for LoggingConfig - operations.md: documented ANSI-disabled logging, fail2ban regex anchor - open-questions.md: updated OQ-09 resolution (connect timeout fully implemented), OQ-10 (C2 bug is fixed) --- docs/architecture/README.md | 21 ++-- docs/architecture/config.md | 10 +- .../decisions/023-http2-client-facing.md | 75 +++++++++++ .../decisions/024-ansi-disabled-logging.md | 53 ++++++++ docs/architecture/open-questions.md | 26 ++-- docs/architecture/operations.md | 14 ++- docs/architecture/overview.md | 21 +++- docs/architecture/proxy.md | 118 +++++++++++++----- docs/architecture/tls.md | 29 ++++- src/logging/mod.rs | 2 + src/tls/config.rs | 10 ++ 11 files changed, 313 insertions(+), 66 deletions(-) create mode 100644 docs/architecture/decisions/023-http2-client-facing.md create mode 100644 docs/architecture/decisions/024-ansi-disabled-logging.md diff --git a/docs/architecture/README.md b/docs/architecture/README.md index 2e99eaa..f5c38cf 100644 --- a/docs/architecture/README.md +++ b/docs/architecture/README.md @@ -1,5 +1,5 @@ --- -status: draft +status: reviewed last_updated: 2026-06-12 --- @@ -7,7 +7,8 @@ last_updated: 2026-06-12 ## Current State -**Phase 0 (Exploration) — Complete.** Phase 1 (Architecture) — In progress. +**Phase 1 (Implementation) — Complete.** The proxy is deployed and running in a +Docker container, replacing our vulnerable nginx 1.24.0 installation. This project replaces our vulnerable nginx 1.24.0 installation with a memory-safe Rust/axum reverse proxy. The primary motivation is CVE-2026-42945 @@ -16,17 +17,19 @@ memory corruption bugs in nginx's C codebase. The proxy supports multiple domains from initial release (git.alk.dev and alk.dev), with per-domain host-based routing and a single multi-domain SAN -certificate via ACME. +certificate via ACME. HTTP/2 is supported on the client-facing side (between +the client and the proxy) with ALPN-based protocol detection. Upstream +connections remain HTTP/1.1. ## Architecture Documents | Document | Status | Description | |----------|--------|-------------| -| [overview.md](overview.md) | Draft | Vision, scope, crate dependencies, exports | -| [proxy.md](proxy.md) | Draft | Reverse proxy handler, request flow, header injection | -| [tls.md](tls.md) | Draft | TLS termination, ACME, manual certs, SNI | -| [config.md](config.md) | Draft | TOML config format, static/dynamic split, ArcSwap reload | -| [operations.md](operations.md) | Draft | Rate limiting, logging, health check, systemd, shutdown | +| [overview.md](overview.md) | Reviewed | Vision, scope, crate dependencies, exports | +| [proxy.md](proxy.md) | Reviewed | Reverse proxy handler, request flow, header injection | +| [tls.md](tls.md) | Reviewed | TLS termination, ACME, manual certs, SNI, ALPN | +| [config.md](config.md) | Reviewed | TOML config format, static/dynamic split, ArcSwap reload | +| [operations.md](operations.md) | Reviewed | Rate limiting, logging, health check, systemd, shutdown | ## ADR Table @@ -54,6 +57,8 @@ certificate via ACME. | [020](decisions/020-container-deployment.md) | Container Deployment Model | Accepted | | [021](decisions/021-x-forwarded-for-edge-proxy.md) | X-Forwarded-For Edge Proxy Model | Accepted | | [022](decisions/022-health-check-scope.md) | Health Check Scope — Local Port and Admin Socket Only | Accepted | +| [023](decisions/023-http2-client-facing.md) | HTTP/2 Client-Facing Support | Accepted | +| [024](decisions/024-ansi-disabled-logging.md) | ANSI-Disabled Logging for Container Deployments | Accepted | ## Open Questions diff --git a/docs/architecture/config.md b/docs/architecture/config.md index ec4da66..32f0115 100644 --- a/docs/architecture/config.md +++ b/docs/architecture/config.md @@ -1,6 +1,6 @@ --- -status: draft -last_updated: 2026-06-11 +status: reviewed +last_updated: 2026-06-12 --- # Configuration @@ -100,6 +100,10 @@ Immutable after startup. Changes require a process restart. | `format` | `"text"` or `"json"` | Log output format | | `log_file_path` | `String` | Path to log file. When set, structured logs are written to this file in addition to stdout/stderr. Strongly recommended for fail2ban integration in container deployments (see ADR-020). Default: not set (file logging disabled) | +**Note**: All log output uses `with_ansi(false)` to disable ANSI escape codes. +This is critical for fail2ban regex matching and Docker log output (see ADR-024). +Both text and JSON formats produce plain-text output without color codes. + **Note**: The entire `LoggingConfig` (including `log_file_path`) is static and requires a process restart to change. Log file path changes require reopening file handles, which is complex and low-value for Phase 1. Log rotation (Phase 2) @@ -110,7 +114,7 @@ will be handled via signal-based or built-in rotation. | Field | Type | Description | |-------|------|-------------| | `bind_addr` | `String` | IP address to bind to (must be explicit, no `0.0.0.0`; see ADR-016) | -| `http_port` | `u16` | Port for HTTP→HTTPS redirect (default: `80`; set to `0` to disable) | +| `http_port` | `u32` | Port for HTTP→HTTPS redirect (default: `80`; set to `0` to disable; valid values: 0 or 1–65535) | | `https_port` | `u16` | Port for TLS listener (default: `443`) | | `tls.mode` | `"acme"` or `"manual"` | Certificate provisioning mode | | `tls.acme_domains` | `Vec` | Domains for ACME SAN certificate (ACME mode only) | diff --git a/docs/architecture/decisions/023-http2-client-facing.md b/docs/architecture/decisions/023-http2-client-facing.md new file mode 100644 index 0000000..dd9edf3 --- /dev/null +++ b/docs/architecture/decisions/023-http2-client-facing.md @@ -0,0 +1,75 @@ +# ADR-023: HTTP/2 Client-Facing Support + +## Status + +Accepted + +## Context + +The original architecture spec excluded HTTP/2 proxying from scope, stating "HTTP/2 +or HTTP/3 proxying (services that need these run their own native Rust servers)." +This was interpreted as excluding HTTP/2 entirely — both for client connections +and upstream connections. + +During deployment testing, we discovered that modern browsers and HTTP clients +negotiate HTTP/2 via ALPN during the TLS handshake. The initial implementation +used `hyper_util::server::conn::auto::Builder` which failed to properly detect +HTTP/2 over TLS connections because its `ReadVersion` mechanism doesn't work +reliably with `tokio-rustls` `TlsStream` wrappers. + +This caused two problems: +1. HTTP/2 clients received degraded performance (no multiplexing) or connection + failures +2. In HTTP/2, the host is conveyed via the `:authority` pseudo-header, which + hyper represents as the URI host rather than a `Host` header — causing 400 + errors for HTTP/2 clients + +## Decision + +The proxy now supports HTTP/2 on the **client-facing** side (between the client +and the proxy). This is distinct from HTTP/2 proxying to upstream services, +which remains out of scope. + +**Implementation:** + +1. **ALPN-based protocol detection**: After the TLS handshake, the proxy reads + the negotiated ALPN protocol from `tls_stream.get_ref().1.alpn_protocol()`. + If the ALPN is `h2`, the connection uses + `hyper::server::conn::http2::Builder`; otherwise, it uses + `hyper_util::server::conn::auto::Builder` with HTTP/1.1 + upgrade support. + +2. **Host header fallback**: The proxy handler now falls back to + `req.uri().host()` when the `Host` header is absent. In HTTP/2, the + `:authority` pseudo-header is represented as the URI host in hyper, so this + correctly handles both HTTP/1.1 (where `Host` is always present) and HTTP/2 + (where `:authority` maps to URI host). + +3. **ALPN advertisement**: The TLS `ServerConfig` advertises `h2` and + `http/1.1` as ALPN protocols, plus `acme-tls/1` for ACME challenges. + +**Upstream connections remain HTTP/1.1.** The proxy communicates with upstream +services over HTTP/1.1 (or HTTPS/1.1 when `upstream_scheme = "https"`). HTTP/2 +to upstreams is out of scope for Phase 1. + +## Consequences + +**Positive:** +- Modern browsers and HTTP/2 clients work correctly with the proxy +- HTTP/2 multiplexing improves client-facing performance (multiple requests over + a single connection) +- ALPN-based detection is the standard mechanism for HTTP/2 negotiation over TLS +- Host header fallback correctly handles both HTTP/1.1 and HTTP/2 + +**Negative:** +- Slightly more complex TLS listener code (ALPN protocol detection, dual + builder paths) +- The distinction between "HTTP/2 to the proxy" and "HTTP/2 to upstream" must + be clearly documented to avoid confusion +- `ConnectInfoService` is typed to `Request` rather than the generic + `Request`, which is a correct but slightly less flexible implementation + +## References + +- [proxy.md](../proxy.md) — request flow and host-based routing +- [tls.md](../tls.md) — TLS termination and ALPN configuration +- [overview.md](../overview.md) — scope and out-of-scope items \ No newline at end of file diff --git a/docs/architecture/decisions/024-ansi-disabled-logging.md b/docs/architecture/decisions/024-ansi-disabled-logging.md new file mode 100644 index 0000000..3816e15 --- /dev/null +++ b/docs/architecture/decisions/024-ansi-disabled-logging.md @@ -0,0 +1,53 @@ +# ADR-024: ANSI-Disabled Logging for Container Deployments + +## Status + +Accepted + +## Context + +During deployment, the proxy's log output contained ANSI escape codes (color +codes) because `tracing-subscriber`'s default `fmt::layer()` enables ANSI +output when connected to a terminal. In a Docker container, `docker logs` +captures stdout/stderr, and the log file written to +`/var/log/reverse-proxy/access.log` is also a plain text file. + +ANSI escape codes in logs cause two problems: +1. **fail2ban regex failure**: The fail2ban filter regex expects plain text with + a `RATE_LIMIT` prefix. ANSI codes embedded in the log line before the prefix + break pattern matching, causing fail2ban to miss rate limit events entirely. +2. **Docker log readability**: `docker logs` output is cluttered with escape + sequences when not running in a terminal that supports them. + +## Decision + +All `tracing-subscriber` fmt layers now use `with_ansi(false)`: + +- **File layer**: Always plain text, no ANSI codes +- **Stdout layer**: Always plain text, no ANSI codes +- **JSON layer**: Always plain text (JSON format doesn't benefit from colors) + +This applies to both text and JSON log formats, in both file and stdout +destinations. + +Additionally, the fail2ban regex was corrected: the `^` anchor was removed from +the `failregex` pattern because log lines have a timestamp/level prefix before +the `RATE_LIMIT` keyword. The corrected pattern matches `RATE_LIMIT` anywhere +in the line rather than only at the start. + +## Consequences + +**Positive:** +- fail2ban regex matching works reliably in all environments +- Log output is clean and parseable regardless of environment +- No behavioral difference between Docker, systemd, and terminal environments + +**Negative:** +- Loss of color-coding in terminal output during development (acceptable + trade-off for reliability; developers can use `RUST_LOG` filtering instead) + +## References + +- [operations.md](../operations.md) — logging and fail2ban integration +- [ADR-007](007-custom-log-format.md) — custom structured log format +- [ADR-020](020-container-deployment.md) — container deployment model \ No newline at end of file diff --git a/docs/architecture/open-questions.md b/docs/architecture/open-questions.md index 831a55e..814302a 100644 --- a/docs/architecture/open-questions.md +++ b/docs/architecture/open-questions.md @@ -1,5 +1,5 @@ --- -status: draft +status: reviewed last_updated: 2026-06-12 --- @@ -114,15 +114,14 @@ last_updated: 2026-06-12 - **Origin**: Implementation review finding W4, ADR-015, ADR-017 - **Status**: resolved - **Priority**: medium -- **Resolution**: This is an implementation gap, not an architectural unknown. - The architecture already specifies a 5-second default connect timeout - separate from the request timeout (ADR-015, ADR-017), and `SiteConfig` - already includes `upstream_connect_timeout_secs`. The implementation must - wire this field to hyper's `connect_timeout` parameter. If hyper's API - doesn't expose a separate connect timeout, a two-phase `tokio::time::timeout` - approach should be used for Phase 2. For Phase 1, the connect timeout field - exists in config but is not enforced — this is a documented known gap. No ADR - needed; the decision was already made in ADR-015. +- **Resolution**: Implemented using a two-phase `tokio::time::timeout` approach. + The inner timeout uses the per-site `upstream_connect_timeout_secs` (default + 5s) for the connect + first-byte phase, and the outer timeout uses + `upstream_request_timeout_secs` (default 60s) for the full request/response + cycle. Additionally, `HttpConnector::set_connect_timeout()` enforces the + TCP-level connect timeout on both HTTP and HTTPS clients. The implementation + is in `handler.rs` and `create_http_client()`/`create_https_client()`. + No new ADR needed; the decision was already made in ADR-015. - **Cross-references**: ADR-015, ADR-017 ### ~~OQ-10: Should ACME contact email be a required config field?~~ @@ -134,9 +133,10 @@ last_updated: 2026-06-12 specifies `acme_contact` as a required field in ACME mode (config.md validation rule 19). The field is defined in the `ListenerConfig` table and shown in TOML examples. Let's Encrypt requires a contact email for production - certificate requests. The implementation bug (C2: `contact: vec![]`) must be - fixed to use the configured `acme_contact` value. No new ADR needed — the - decision is already documented in config.md and tls.md. + certificate requests. The implementation bug (C2: `contact: vec![]`) has been + fixed — `acme_contact` is now correctly wired from config to the ACME state + machine. No new ADR needed — the decision is already documented in config.md + and tls.md. - **Cross-references**: ADR-004 ### ~~OQ-11: How should `X-Forwarded-Proto` be derived per-listener?~~ diff --git a/docs/architecture/operations.md b/docs/architecture/operations.md index a68101c..eb57862 100644 --- a/docs/architecture/operations.md +++ b/docs/architecture/operations.md @@ -1,5 +1,5 @@ --- -status: draft +status: reviewed last_updated: 2026-06-12 --- @@ -134,6 +134,11 @@ Logs are written to two destinations simultaneously: - **stdout/stderr**: Always-on, for `docker logs`, `journalctl`, and development use. Structured in the same format as the file output. +Both output destinations use `with_ansi(false)` to disable ANSI escape codes. +This is critical for fail2ban log parsing (ANSI codes break regex matching) and +for clean output in Docker containers where a terminal is not attached. See +ADR-024. + The `tracing-subscriber` layer configuration supports both simultaneously via `Layer` composition. @@ -155,6 +160,12 @@ volumes: A corresponding fail2ban filter definition and jail configuration are provided as part of the deployment documentation. +**Filter regex note**: The fail2ban `failregex` pattern matches `RATE_LIMIT` +without a `^` anchor because log lines have a timestamp/level prefix before the +`RATE_LIMIT` keyword. The pattern `RATE_LIMIT client_ip= host=\S+ path=\S+ status=\d+` +matches the rate limit event anywhere in the line, which correctly handles the +structured log format. + ### Log Levels | Level | Use | @@ -580,6 +591,7 @@ All design decisions are documented as ADRs in [decisions/](decisions/). | [013](decisions/013-health-check-port.md) | Health check on separate local port | Localhost-only HTTP health check, configurable port | | [014](decisions/014-unix-socket-reload.md) | Unix domain socket config reload API | Programmatic reload with success/failure feedback | | [020](decisions/020-container-deployment.md) | Container deployment model | Defense-in-depth via container isolation; file-primary logging | +| [024](decisions/024-ansi-disabled-logging.md) | ANSI-disabled logging | All log output uses `with_ansi(false)` for fail2ban and Docker compatibility | ## Open Questions diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index 7d0d50a..7e16d18 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -1,5 +1,5 @@ --- -status: draft +status: reviewed last_updated: 2026-06-12 --- @@ -44,6 +44,9 @@ details. (SAN certificate) deployment models (ADR-019) - TLS termination with ACME (Let's Encrypt) and manual certificate management - Cipher suite restriction matching nginx scope (ECDHE-AES-GCM + TLS 1.3) + - HTTP/2 support on the client-facing side (between client and proxy), + with ALPN-based protocol detection (ADR-023). Upstream connections + remain HTTP/1.1. - HTTP → HTTPS redirect - Host-based routing to multiple upstream services - Reverse proxy to Gitea at `127.0.0.1:3000` (git.alk.dev) @@ -55,7 +58,7 @@ details. - Configurable bind addresses (must be explicit, no `0.0.0.0`) - Local health check endpoint on separate port (default: 9900, localhost only) - Unix domain socket admin API for config reload with feedback - - Graceful shutdown (SIGTERM handling) + - Graceful shutdown (SIGTERM handling with in-flight request drain) - Systemd unit file - Dual licensing: MIT OR Apache-2.0 @@ -70,8 +73,10 @@ details. ### Out of Scope -- HTTP/2 or HTTP/3 proxying (services that need these run their own native - Rust servers — e.g., `api.alk.dev` runs its own HTTP/2+ server) +- HTTP/2 or HTTP/3 **proxying to upstreams** — the proxy communicates with + upstreams over HTTP/1.1 (or HTTPS/1.1). HTTP/2 **from clients** is supported + (see ADR-023). Services that need HTTP/2+ to their backends can handle + termination themselves. - Load balancing or round-robin upstream selection - WebSocket proxying (can be added later if needed) - Static file serving @@ -143,11 +148,14 @@ loopback, LAN, and tunnel endpoints for multi-host deployments. |-------|---------|---------|-------| | `axum` | 0.8 | HTTP framework | Routing, middleware, extractors | | `tokio` | 1 (full) | Async runtime | Multi-threaded runtime | -| `hyper` | 1 | HTTP protocol | Used via axum, and directly for proxy `Client` | +| `hyper` | 1 | HTTP protocol | Used via axum, and directly for HTTP/2 server builder | +| `hyper-util` | 0.1 | Hyper utilities | Client builder, TokioExecutor, auto::Builder | | `tower` | 0.5 | Middleware ecosystem | Service trait, layers | | `rustls` | 0.23 | TLS implementation | `aws_lc_rs` crypto provider | | `tokio-rustls` | 0.26 | Async TLS I/O | Wraps TCP with TLS | | `rustls-acme` | 0.12 | ACME client | Let's Encrypt auto-provisioning and renewal | +| `hyper-rustls` | 0.27 | HTTPS client | Upstream HTTPS connections with rustls TLS | +| `rustls-native-certs` | 0.8 | Native cert loading | System root certificates for upstream HTTPS validation | ### Supporting @@ -206,6 +214,9 @@ All design decisions are documented as ADRs in [decisions/](decisions/). | [019](decisions/019-multi-config-listeners.md) | Multi-config listeners | `[[listeners]]` supporting both dedicated-IP and shared-IP deployment models | | [020](decisions/020-container-deployment.md) | Container deployment model | Defense-in-depth via container isolation; file-primary logging; flexible upstream addressing | | [021](decisions/021-x-forwarded-for-edge-proxy.md) | X-Forwarded-For edge proxy model | Replace, don't append — proxy is the edge, no trusted upstream proxies | +| [022](decisions/022-health-check-scope.md) | Health check scope — local port and admin socket only | No `/health` route on main listener; health check is port 9900/admin socket only | +| [023](decisions/023-http2-client-facing.md) | HTTP/2 client-facing support | ALPN-based protocol detection; HTTP/2 to clients, HTTP/1.1 to upstreams | +| [024](decisions/024-ansi-disabled-logging.md) | ANSI-disabled logging | All log output uses `with_ansi(false)` for fail2ban and Docker compatibility | ## Open Questions diff --git a/docs/architecture/proxy.md b/docs/architecture/proxy.md index c76dd13..9447bfb 100644 --- a/docs/architecture/proxy.md +++ b/docs/architecture/proxy.md @@ -1,5 +1,5 @@ --- -status: draft +status: reviewed last_updated: 2026-06-12 --- @@ -21,7 +21,16 @@ general-purpose proxy library (ADR-002, ADR-010). ## Architecture ``` -Incoming HTTPS request +Incoming HTTPS request (HTTP/1.1 or HTTP/2) + │ + ▼ +┌─────────────────────────────────────────────┐ +│ TLS Listener │ +│ ALPN protocol detection: │ +│ - h2 → hyper http2::Builder │ +│ - http/1.1 (or none) → auto::Builder │ +│ ConnectInfo from peer_addr │ +└───────┬──────────────────────────────────────┘ │ ▼ ┌─────────────────┐ @@ -29,8 +38,9 @@ Incoming HTTPS request │ (Host-based) │ │ │ │ match Host │ -│ header on │ -│ incoming req │ +│ header or │ +│ URI :authority │ +│ on incoming req │ └───────┬─────────┘ │ ▼ @@ -45,9 +55,9 @@ Incoming HTTPS request │ Injection │ │ │ │ X-Real-IP │ ← connect_info remote_addr -│ X-Forwarded-For │ ← append to existing or set -│ X-Forwarded-Proto │ ← "https" (or "http" on port 80) -│ Host │ ← original host header (already set) +│ X-Forwarded-For │ ← replace (edge proxy model) +│ X-Forwarded-Proto │ ← "https" (always, on TLS listener) +│ Host │ ← original host (already set) └───────┬─────────┘ │ ▼ @@ -66,6 +76,7 @@ Incoming HTTPS request │ original req │ │ 2. Forward req │ │ to upstream │ +│ (HTTP/1.1) │ │ 3. Stream │ │ response back │ └─────────────────┘ @@ -75,19 +86,27 @@ Incoming HTTPS request ### 1. Host-Based Routing -The axum router uses a `Host` extractor to match incoming requests to site -definitions from `DynamicConfig`. Sites are defined per-listener in the TOML -configuration for organizational purposes, but at runtime they are collected -into a single global routing table. The proxy looks up the `Host` header in -this global table and either proxies to the upstream or returns 404. +The axum router matches incoming requests to site definitions from +`DynamicConfig`. Sites are defined per-listener in the TOML configuration for +organizational purposes, but at runtime they are collected into a single global +routing table. The proxy looks up the host in this global table and either +proxies to the upstream or returns 404. -Host matching is **case-insensitive** per RFC 7230 §2.7.3. The `Host` header -is normalized to lowercase before matching. Site `host` values in -configuration are normalized to lowercase during validation. +Host matching is **case-insensitive** per RFC 7230 §2.7.3. The host is +normalized to lowercase before matching. Site `host` values in configuration are +normalized to lowercase during validation. The `Host` header port component (e.g., `git.alk.dev:443`) is stripped before matching. Site `host` values must not include ports. +**HTTP/2 host resolution**: In HTTP/2, the host is conveyed via the +`:authority` pseudo-header rather than the `Host` header. Hyper represents this +as the URI host. The proxy handler resolves the host by first checking the +`Host` header, then falling back to `req.uri().host()`. This correctly handles +both HTTP/1.1 (which always has a `Host` header) and HTTP/2 (which uses +`:authority`/URI host). If neither is present, the proxy returns 400 Bad +Request. See ADR-023. + The proxy does not filter or restrict paths. All paths and query strings on a known host are forwarded to the upstream without modification. @@ -124,8 +143,9 @@ The proxy handler constructs a new request to the upstream: address, preserving the original path and query string 2. Copy the request method, headers, and body from the original 3. Inject proxy headers (X-Real-IP, X-Forwarded-For, X-Forwarded-Proto) -4. Send the request via a shared hyper Client instance -5. Stream the response back to the client (chunk-by-chunk, not buffered) +4. Remove hop-by-hop headers (Connection, Keep-Alive, Transfer-Encoding, etc.) +5. Send the request via a shared hyper Client instance +6. Stream the response back to the client (chunk-by-chunk, not buffered) If the client disconnects while the upstream is still sending, the upstream connection is closed and the event is logged at `debug` level. If the @@ -135,12 +155,23 @@ The proxy handler constructs a new request to the upstream: The hyper Client is created once at startup and shared via axum's `State`. It must be configured with (see ADR-017 for rationale): - Connection pooling (hyper default behavior) -- HTTP/1.1 only for upstream connections (HTTP/2 proxying is out of scope) +- HTTP/1.1 only for upstream connections (HTTP/2 proxying to upstreams is out + of scope; see ADR-023 for the distinction between client-facing HTTP/2 and + upstream HTTP/2) - No redirect following (proxies should not follow redirects) +- Separate connect timeout and request timeout (see ADR-015, ADR-017) + +Two client instances are created at startup: +- **HTTP client**: For upstream connections using `http://` scheme +- **HTTPS client**: For upstream connections using `https://` scheme (using + `hyper-rustls` with system native TLS root certificates for certificate + validation) Per-site timeout overrides are available via `upstream_connect_timeout_secs` and `upstream_request_timeout_secs` in `SiteConfig` (see ADR-015). When not -specified, defaults of 5s connect and 60s request are used. +specified, defaults of 5s connect and 60s request are used. Both timeouts are +enforced using `tokio::time::timeout`, with the connect timeout nested inside +the request timeout to ensure the overall deadline is respected. ### 4. Header Handling @@ -162,6 +193,12 @@ These headers are connection-specific and must not be forwarded to the upstream. Removing `Proxy-Authorization` and `Proxy-Authenticate` prevents credential leakage. +**Response headers removed:** + +- `Server`: The upstream's `Server` header is intentionally removed as a + defense-in-depth measure. The proxy does not add its own `Server` header + either. This hides upstream server identity from clients. + **Headers added or modified:** See the Proxy Header Injection section above for the full list of proxy headers @@ -174,9 +211,10 @@ See the Proxy Header Injection section above for the full list of proxy headers **Response headers:** -Upstream response headers are forwarded as-is to the client, with the following +Upstream response headers are forwarded to the client with the following exceptions: - Hop-by-hop headers listed above are removed +- The `Server` header is removed (defense-in-depth: hiding upstream identity) - The proxy does not add a `Server` header to responses ### 5. Error Handling @@ -189,13 +227,15 @@ information. No upstream error details are included. Response format: | Upstream Condition | Response | Body | Notes | |-------------------|----------|------|-------| -| Upstream reachable | Stream response as-is | (upstream body) | Headers, status, body all forwarded | +| Upstream reachable | Stream response as-is | (upstream body) | Headers, status, body all forwarded (minus hop-by-hop and Server headers) | | Upstream unreachable | 502 Bad Gateway | `Bad Gateway` | Logged at `warn` level | -| Upstream timeout | 504 Gateway Timeout | `Gateway Timeout` | Logged at `warn` level | +| Upstream connect timeout | 504 Gateway Timeout | `Gateway Timeout` | Connect phase timed out; logged at `warn` level | +| Upstream request timeout | 504 Gateway Timeout | `Gateway Timeout` | Full request timed out; logged at `warn` level | +| Upstream TLS validation failure | 502 Bad Gateway | `Bad Gateway` | Upstream HTTPS cert validation failed | | Request body too large | 413 Payload Too Large | `Payload Too Large` | From `DefaultBodyLimit` middleware | | Rate limit exceeded | 429 Too Many Requests | `Too Many Requests` | Logged at `info` level | | Unknown Host header | 404 Not Found | `Not Found` | No matching site definition | -| Missing Host header | 400 Bad Request | `Bad Request` | Required for routing | +| Missing Host header (and no URI host) | 400 Bad Request | `Bad Request` | Required for routing; HTTP/2 clients use `:authority` | ### 6. HTTP → HTTPS Redirect @@ -219,18 +259,30 @@ Each listener has its own HTTP redirect on its own bind address. ## Upstream Connection The upstream connection scheme defaults to `http://` since the proxy and backend -services typically run on the same host (e.g., `127.0.0.1:3000`). The -`upstream_scheme` field in each site's configuration allows specifying `https://` -for upstreams that require TLS (e.g., separate hosts or secure internal services). +services typically run on the same host (e.g., `127.0.0.1:3000`) or the same +Docker network (e.g., `gitea:3000`). The `upstream_scheme` field in each site's +configuration allows specifying `https://` for upstreams that require TLS +(e.g., separate hosts or secure internal services). For the initial deployment, upstream connections use plain HTTP (e.g., -`git.alk.dev` → `127.0.0.1:3000`, `alk.dev` → `127.0.0.1:8080`) since TLS -between the proxy and backend services on loopback is unnecessary. +`git.alk.dev` → `gitea:3000`, `alk.dev` → `app:8080`) since TLS between the +proxy and backend services on the same Docker network or loopback is +unnecessary. When `upstream_scheme` is `"https"`, the proxy validates the upstream's TLS certificate using the system's native TLS root certificates (via `rustls` root -cert store). Certificate validation failures result in a 502 Bad Gateway -response. No certificate pinning or custom CA support is provided in Phase 1. +cert store loaded by `rustls-native-certs`). Certificate validation failures +result in a 502 Bad Gateway response. No certificate pinning or custom CA +support is provided in Phase 1. + +Two shared hyper Client instances handle upstream connections: +- **HTTP client** (`Client`): For `http://` upstreams +- **HTTPS client** (`Client, Body>`): For + `https://` upstreams, using `hyper-rustls` with system native certificates + +Both clients enforce the per-site connect timeout (default 5s) at the TCP level +via `HttpConnector::set_connect_timeout()` and the overall request timeout +(default 60s) via `tokio::time::timeout`. ## Body Size Limit @@ -253,6 +305,7 @@ All design decisions are documented as ADRs in [decisions/](decisions/). | [017](decisions/017-upstream-connection-defaults.md) | Upstream connection defaults | HTTP/1.1, no redirects, connection pooling | | [018](decisions/018-body-size-limit.md) | Request body size limit | 100 MB default matching nginx, Gitea push compatibility | | [021](decisions/021-x-forwarded-for-edge-proxy.md) | X-Forwarded-For edge proxy model | Replace, don't append — proxy is the edge, no trusted upstream proxies | +| [023](decisions/023-http2-client-facing.md) | HTTP/2 client-facing support | ALPN-based protocol detection; HTTP/2 to clients, HTTP/1.1 to upstreams | ## Open Questions @@ -263,4 +316,7 @@ questions affecting this document have been resolved: ADR-015: per-site timeout overrides with defaults) - ~~**OQ-08**: Should the `/health` path use a less common endpoint to avoid upstream collision?~~ (resolved — ADR-022: no `/health` route on the main - listener; health checking is via port 9900 and admin socket only) \ No newline at end of file + listener; health checking is via port 9900 and admin socket only) +- ~~**OQ-09**: How should `upstream_connect_timeout_secs` be enforced?~~ + (resolved — two-phase timeout with `tokio::time::timeout`; connect timeout + nested inside request timeout; TCP-level `set_connect_timeout` on connector) \ No newline at end of file diff --git a/docs/architecture/tls.md b/docs/architecture/tls.md index 04f3570..8b9aedb 100644 --- a/docs/architecture/tls.md +++ b/docs/architecture/tls.md @@ -1,6 +1,6 @@ --- -status: draft -last_updated: 2026-06-11 +status: reviewed +last_updated: 2026-06-12 --- # TLS Termination @@ -175,15 +175,33 @@ maps SNI hostnames to certificate/key pairs loaded from disk. For ACME mode, the `ServerConfig` is built with `with_cert_resolver()`, passing the `ResolvesServerCertAcme` resolver. The ACME configuration includes the domains listed in that listener's `acme_domains`, and the resolver manages the -certificate. The ACME TLS-ALPN-01 protocol identifier (`acme-tls/1`) must be -registered in the `alpn_protocols` list so the server can respond to -TLS-ALPN-01 challenges. +certificate. + +The TLS `ServerConfig` advertises ALPN protocols to enable HTTP/2 negotiation. +The ALPN configuration differs by TLS mode: + +- **ACME mode**: `h2`, `http/1.1`, and `acme-tls/1`. The `acme-tls/1` entry is + required for TLS-ALPN-01 challenge verification during certificate provisioning. +- **Manual mode** (single-cert and multi-domain/SNI): `h2` and `http/1.1` only. + The `acme-tls/1` entry is not included because manual mode does not use ACME + challenges. + +After the TLS handshake, the proxy inspects the negotiated ALPN protocol to +select the appropriate HTTP server: `h2` triggers +`hyper::server::conn::http2::Builder`, while `http/1.1` (or no ALPN) triggers +`hyper_util::server::conn::auto::Builder`. See ADR-023 for details. Both modes use the `aws_lc_rs` crypto provider with safe default protocol versions (TLS 1.2 and TLS 1.3). ## SNI-Based Certificate Selection +After the TLS handshake, the proxy inspects the negotiated ALPN protocol to +determine whether to serve the connection as HTTP/2 or HTTP/1.1. If the client +negotiated `h2` via ALPN, the proxy uses `hyper::server::conn::http2::Builder`; +otherwise, it uses `hyper_util::server::conn::auto::Builder` with HTTP/1.1 +and upgrade support. See ADR-023 for details. + ### Dedicated-IP Single-Domain (Multi-Config) In the dedicated-IP model, each listener binds to its own IP address and serves @@ -305,6 +323,7 @@ All design decisions are documented as ADRs in [decisions/](decisions/). | [011](decisions/011-multi-domain-tls.md) | Multi-domain TLS config | Single SAN certificate covering all domains via rustls-acme | | [012](decisions/012-cipher-suite-restriction.md) | Restrict cipher suites | Match nginx scope: four ECDHE-AES-GCM suites for TLS 1.2, all TLS 1.3 suites | | [019](decisions/019-multi-config-listeners.md) | Multi-config listeners | `[[listeners]]` supporting both dedicated-IP and shared-IP deployment models | +| [023](decisions/023-http2-client-facing.md) | HTTP/2 client-facing support | ALPN-based protocol detection; `h2` and `http/1.1` advertised | ## Open Questions diff --git a/src/logging/mod.rs b/src/logging/mod.rs index 2075170..50e8607 100644 --- a/src/logging/mod.rs +++ b/src/logging/mod.rs @@ -39,9 +39,11 @@ fn init_json(env_filter: EnvFilter, log_file_path: &Option, level: Level let file_env_filter = make_env_filter(level); let stdout_layer = tracing_subscriber::fmt::layer() .json() + .with_ansi(false) .with_filter(env_filter); let file_layer = tracing_subscriber::fmt::layer() .json() + .with_ansi(false) .with_writer(file_writer) .with_filter(file_env_filter); tracing_subscriber::registry() diff --git a/src/tls/config.rs b/src/tls/config.rs index 9e671b4..3d75442 100644 --- a/src/tls/config.rs +++ b/src/tls/config.rs @@ -67,6 +67,11 @@ pub fn build_manual_server_config(cert_path: &str, key_path: &str) -> Result