Compare commits
60 Commits
fix/acme-c
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 6400c90cb3 | |||
| 75d9c263cb | |||
| ccb574c259 | |||
| 4ee9486561 | |||
| 9730d155d2 | |||
| 64a651242c | |||
| cf3f00fc53 | |||
| a8155d92f9 | |||
| 717ee8e6cd | |||
| dbedb8846c | |||
| f6e6e15ebf | |||
| d9b3a436f1 | |||
| 855c0f1d67 | |||
| 8ff8c71783 | |||
| c2201707bb | |||
| 603d722ad0 | |||
| 21186b8265 | |||
| 1ae06b0478 | |||
| 8ac39d9cd8 | |||
| d338dcab38 | |||
| 77117c29eb | |||
| 9a3b8831c7 | |||
| 245d2a69ff | |||
| 01e3b1cd9a | |||
| 5ea0bee73f | |||
| d224d7b409 | |||
| 16ec84eea2 | |||
| b0f83669c0 | |||
| eb46d9825a | |||
| 075624368b | |||
| 80b90b5716 | |||
| 636807d26e | |||
| 159eeda266 | |||
| 66cd116d54 | |||
| 42b74f92af | |||
| e2440f2edb | |||
| 4c6b55a780 | |||
| db982e9c4d | |||
| e6d22bdcb8 | |||
| ad9b9b9b78 | |||
| 77ea1160de | |||
| 1ba1d2a4de | |||
| 05fea1a8e2 | |||
| 54f1725173 | |||
| 80d1fd0fb3 | |||
| 4f537c80d2 | |||
| c8ab794ef3 | |||
| 0d54eba41e | |||
| c2eefddb4f | |||
| 9ebb8ee7a8 | |||
| da28ea749d | |||
| cfba7491ae | |||
| cbcd746c9f | |||
| 8f3c56e6bc | |||
| 9b3fe23499 | |||
| 516efb0403 | |||
| 0c769e682e | |||
| 1da01a2336 | |||
| 6cb0f8e6fe | |||
| 280fe782a1 |
156
AGENTS.md
Normal file
156
AGENTS.md
Normal file
@@ -0,0 +1,156 @@
|
||||
# AGENTS.md
|
||||
|
||||
Guidance for LLM agents (and humans) working on this project.
|
||||
|
||||
## Project Overview
|
||||
|
||||
A memory-safe reverse proxy built with Rust/axum that replaces vulnerable nginx
|
||||
installations. Terminates TLS, routes requests by Host header to upstream
|
||||
services, enforces rate limits, and injects proxy headers. See `README.md` and
|
||||
`docs/architecture/` for full details.
|
||||
|
||||
## Build & Run
|
||||
|
||||
```bash
|
||||
cargo build # debug build
|
||||
cargo build --release # release build
|
||||
cargo test # run all tests (unit + integration)
|
||||
cargo test -- --nocapture # run tests with stdout visible
|
||||
cargo clippy # lint
|
||||
reverse-proxy --config config.toml # run (defaults to /etc/reverse-proxy/config.toml)
|
||||
reverse-proxy --validate --config config.toml # validate config only
|
||||
```
|
||||
|
||||
For a static binary with no libc dependency:
|
||||
|
||||
```bash
|
||||
cargo build --release --target x86_64-unknown-linux-musl
|
||||
```
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── main.rs # Entry point, server startup, listener binding
|
||||
├── cli.rs # CLI parsing (clap), config loading, validation
|
||||
├── lib.rs # Library root, module declarations
|
||||
├── config/
|
||||
│ ├── static_config.rs # StaticConfig — immutable, requires restart
|
||||
│ ├── dynamic_config.rs# DynamicConfig — hot-reloadable via ArcSwap
|
||||
│ ├── validation.rs # Config validation rules (called at startup and reload)
|
||||
│ └── test_fixtures.rs # Test config generation helpers
|
||||
├── proxy/
|
||||
│ ├── handler.rs # Core reverse proxy handler (forward requests to upstream)
|
||||
│ ├── headers.rs # Proxy header injection (X-Real-IP, X-Forwarded-For, etc.)
|
||||
│ ├── body_limit.rs # Request body size limiting middleware
|
||||
│ ├── error.rs # Error response types (502, 504, 429, etc.)
|
||||
│ └── mod.rs # Router construction, client creation
|
||||
├── tls/
|
||||
│ ├── acceptor.rs # TLS acceptor setup (manual + ACME)
|
||||
│ ├── acme.rs # ACME certificate provisioning via rustls-acme
|
||||
│ ├── config.rs # TLS ServerConfig construction, cipher suites
|
||||
│ └── redirect.rs # HTTP → HTTPS 301 redirect listener
|
||||
├── rate_limit/
|
||||
│ ├── mod.rs # Rate limiting middleware, eviction task
|
||||
│ └── bucket.rs # Token bucket implementation (IPv4 /32, IPv6 /64)
|
||||
├── admin/
|
||||
│ ├── socket.rs # Unix domain socket admin API (reload, status)
|
||||
│ └── mod.rs
|
||||
├── health.rs # Health check endpoint on localhost:9900
|
||||
├── logging/
|
||||
│ ├── mod.rs # Logging init (file + stdout, ANSI disabled)
|
||||
│ └── format.rs # Structured log format (REQUEST, RATE_LIMIT, etc.)
|
||||
├── server.rs # HTTPS listener serving with ALPN detection
|
||||
├── shutdown.rs # Graceful shutdown (SIGTERM, SIGINT) + SIGHUP reload
|
||||
└── utils.rs # Shared utilities
|
||||
```
|
||||
|
||||
## Key Architecture Concepts
|
||||
|
||||
- **StaticConfig vs DynamicConfig**: Static config (bind addresses, TLS,
|
||||
ports) requires a restart. Dynamic config (sites, rate limits, body limits)
|
||||
can be reloaded at runtime via SIGHUP or admin socket, using `ArcSwap` for
|
||||
lock-free reads.
|
||||
- **Multi-listener**: `[[listeners]]` in TOML — each listener has its own bind
|
||||
address, TLS config, and site routing. Sites are collected into a global
|
||||
routing table at runtime.
|
||||
- **Edge proxy model**: The proxy is the edge — X-Forwarded-For is replaced
|
||||
(not appended), X-Real-IP is set from the connection's remote address.
|
||||
- **No `/health` on public listener**: Health checking is localhost:9900 only.
|
||||
The main listener does not intercept any paths.
|
||||
- **HTTP/2 client-facing only**: ALPN detects h2 vs http/1.1. Upstream
|
||||
connections are always HTTP/1.1.
|
||||
- **IPv6 rate limiting**: IPv6 addresses are normalized to /64 prefixes so
|
||||
addresses within the same /64 share a token bucket.
|
||||
|
||||
## Config Format
|
||||
|
||||
TOML. See `docs/architecture/config.md` for full schema. Key validation rules:
|
||||
|
||||
- `bind_addr` must be explicit (no `0.0.0.0`) unless `allow_wildcard_bind` is
|
||||
enabled via config or `--allow-wildcard-bind` CLI flag (OR logic)
|
||||
- Site `host` values must be unique across all listeners
|
||||
- `upstream` must be in `host:port` format (e.g., `gitea:3000`, `127.0.0.1:3000`)
|
||||
- ACME mode requires `acme_domains` (non-empty) and `acme_contact` (valid
|
||||
`mailto:` URI)
|
||||
- Manual mode requires `cert_path` and `key_path` pointing to readable files
|
||||
- `rate_limit.requests_per_second` and `rate_limit.burst` must be > 0
|
||||
- `body.limit_bytes` must be > 0
|
||||
- `http_port` must be 0 (disabled) or 1–65535; `https_port` must be 1–65535
|
||||
- `health_check_port` must not conflict with any listener's http_port or
|
||||
https_port on the same bind address
|
||||
|
||||
## Testing
|
||||
|
||||
Tests use `rcgen` for self-signed certificate generation and `reqwest` for
|
||||
HTTP client requests. Integration tests are in `tests/integration_test.rs`
|
||||
with helpers in `tests/helpers/`.
|
||||
|
||||
```bash
|
||||
cargo test # all tests
|
||||
cargo test --test integration # integration tests only
|
||||
cargo test --lib # unit tests only
|
||||
```
|
||||
|
||||
## Code Style
|
||||
|
||||
- No comments unless explicitly requested
|
||||
- Error handling uses `anyhow` for application code and `thiserror` for
|
||||
library error types
|
||||
- Structured logging with `tracing` — always `with_ansi(false)`
|
||||
- Config types implement `serde::Deserialize` for TOML parsing
|
||||
- All network operations use `tokio` async runtime
|
||||
|
||||
## Deployment Files
|
||||
|
||||
`deploy/` contains production-ready deployment configs:
|
||||
|
||||
- `Dockerfile` — multi-stage build (rust:alpine → alpine)
|
||||
- `docker-compose.yml` — complete setup with Gitea example
|
||||
- `reverse-proxy.service` — systemd unit file with security hardening
|
||||
- `fail2ban/` — filter and jail config for rate limit log parsing
|
||||
|
||||
See `deploy/README.md` for step-by-step setup instructions.
|
||||
|
||||
## Common Modifications
|
||||
|
||||
### Adding a new site
|
||||
|
||||
Add a `[[listeners.sites]]` entry to config and reload:
|
||||
|
||||
```bash
|
||||
echo "reload" | socat - UNIX-CONNECT:/run/reverse-proxy/admin.sock
|
||||
```
|
||||
|
||||
### Changing rate limits
|
||||
|
||||
Update `[rate_limit]` in config and reload (no restart needed).
|
||||
|
||||
### Changing bind address or TLS config
|
||||
|
||||
These are in StaticConfig — require a full process restart.
|
||||
|
||||
### Adding per-site timeouts
|
||||
|
||||
Set `upstream_connect_timeout_secs` and `upstream_request_timeout_secs` on a
|
||||
site definition. Defaults are 5s connect, 60s request.
|
||||
219
LICENSE
Normal file
219
LICENSE
Normal file
@@ -0,0 +1,219 @@
|
||||
Dual Licensing: MIT OR Apache-2.0
|
||||
|
||||
You may use this software under either of the following licenses:
|
||||
|
||||
=== MIT License ===
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 alkdev
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
=== Apache License, Version 2.0 ===
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to the Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by the Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement You may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work on an "AS IS"
|
||||
BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2026 alkdev
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
414
README.md
Normal file
414
README.md
Normal file
@@ -0,0 +1,414 @@
|
||||
# reverse-proxy
|
||||
|
||||
A memory-safe reverse proxy built with Rust and axum, designed to replace
|
||||
vulnerable nginx installations for TLS-terminated host-based routing.
|
||||
|
||||
## Why
|
||||
|
||||
nginx's C codebase has a long history of memory corruption vulnerabilities, and
|
||||
the discovery rate is accelerating. CVE-2026-42945 ("NGINX Rift") is an
|
||||
unauthenticated RCE via the `rewrite` module with a public PoC and active
|
||||
exploitation — and 6 of 7 recent nginx CVEs are memory corruption bugs that
|
||||
Rust eliminates by construction.
|
||||
|
||||
This proxy targets a specific use case: TLS termination, host-based routing,
|
||||
and request forwarding to upstream services. It is not a general-purpose web
|
||||
server or load balancer.
|
||||
|
||||
## Features
|
||||
|
||||
- **TLS termination** — ACME (Let's Encrypt) with automatic provisioning and
|
||||
renewal, or manual certificates
|
||||
- **HTTP/2** — ALPN-based protocol detection on the client-facing side; upstream
|
||||
connections use HTTP/1.1
|
||||
- **Multi-site routing** — host-based routing to multiple upstream services from
|
||||
a single process
|
||||
- **Multiple listeners** — dedicated-IP (one IP per domain) or shared-IP
|
||||
(SAN certificate) deployment models
|
||||
- **Rate limiting** — per-IP token bucket with fail2ban-compatible structured
|
||||
logging (IPv6 rate limited per /64 prefix)
|
||||
- **Proxy headers** — X-Real-IP, X-Forwarded-For (edge proxy model), X-Forwarded-Proto
|
||||
- **Hot config reload** — SIGHUP or admin Unix domain socket with success/failure
|
||||
feedback
|
||||
- **Health check** — localhost-only endpoint on a separate port (default: 9900)
|
||||
- **HTTP → HTTPS redirect** — per-listener redirect on port 80
|
||||
- **Graceful shutdown** — SIGTERM with in-flight request drain
|
||||
- **systemd integration** — `Type=notify` with `sd_notify`
|
||||
- **Container-ready** — Docker deployment with health check and fail2ban volume
|
||||
mount
|
||||
- **Restricted cipher suites** — ECDHE-AES-GCM for TLS 1.2, all TLS 1.3 suites
|
||||
(matching nginx scope)
|
||||
|
||||
## Quick Start
|
||||
|
||||
### Build
|
||||
|
||||
```bash
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
Produces a static binary at `target/release/reverse-proxy`. For a fully static
|
||||
binary (no libc dependency), build with the `x86_64-unknown-linux-musl` target.
|
||||
|
||||
### Minimal Config
|
||||
|
||||
Create `/etc/reverse-proxy/config.toml`:
|
||||
|
||||
```toml
|
||||
health_check_port = 9900
|
||||
|
||||
[logging]
|
||||
level = "info"
|
||||
format = "text"
|
||||
|
||||
[rate_limit]
|
||||
requests_per_second = 10
|
||||
burst = 20
|
||||
|
||||
[body]
|
||||
limit_bytes = 104857600
|
||||
|
||||
[[listeners]]
|
||||
bind_addr = "0.0.0.0"
|
||||
|
||||
[listeners.tls]
|
||||
mode = "acme"
|
||||
acme_domains = ["example.com"]
|
||||
acme_cache_dir = "/var/lib/reverse-proxy/acme-cache"
|
||||
acme_directory = "staging"
|
||||
acme_contact = "mailto:admin@example.com"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "example.com"
|
||||
upstream = "127.0.0.1:8080"
|
||||
```
|
||||
|
||||
> **Note:** `bind_addr = "0.0.0.0"` requires the `--allow-wildcard-bind` flag or
|
||||
> `allow_wildcard_bind = true` in config. This is intentional — see
|
||||
> [Explicit bind address](#explicit-bind-address).
|
||||
|
||||
### Run
|
||||
|
||||
```bash
|
||||
reverse-proxy --config /etc/reverse-proxy/config.toml
|
||||
```
|
||||
|
||||
Or with Docker (see [Deployment](#deployment)).
|
||||
|
||||
### Validate Config
|
||||
|
||||
```bash
|
||||
reverse-proxy --config /etc/reverse-proxy/config.toml --validate
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Configuration uses TOML and is split into **static** (requires restart) and
|
||||
**dynamic** (hot-reloadable) sections.
|
||||
|
||||
### Static Config (requires restart)
|
||||
|
||||
| Field | Default | Description |
|
||||
|-------|---------|-------------|
|
||||
| `listeners` | (required) | TLS listener definitions |
|
||||
| `allow_wildcard_bind` | `false` | Allow `0.0.0.0` bind addresses |
|
||||
| `health_check_port` | `9900` | Local health check port (`0` to disable) |
|
||||
| `admin_socket_path` | `/run/reverse-proxy/admin.sock` | Admin Unix socket (empty string to disable) |
|
||||
| `shutdown_timeout_secs` | `30` | Graceful shutdown timeout |
|
||||
| `logging.level` | `"info"` | Log level |
|
||||
| `logging.format` | `"text"` | Log format (`"text"` or `"json"`) |
|
||||
| `logging.log_file_path` | (not set) | Path to log file for fail2ban |
|
||||
|
||||
### Dynamic Config (hot-reloadable via SIGHUP or admin socket)
|
||||
|
||||
| Field | Default | Description |
|
||||
|-------|---------|-------------|
|
||||
| `sites[].host` | (required) | Hostname to match |
|
||||
| `sites[].upstream` | (required) | Upstream `host:port` address |
|
||||
| `sites[].upstream_scheme` | `"http"` | Upstream protocol (`"http"` or `"https"`) |
|
||||
| `sites[].upstream_connect_timeout_secs` | `5` | TCP connect timeout |
|
||||
| `sites[].upstream_request_timeout_secs` | `60` | Full request timeout |
|
||||
| `rate_limit.requests_per_second` | (required) | Per-IP request rate |
|
||||
| `rate_limit.burst` | (required) | Burst capacity |
|
||||
| `body.limit_bytes` | (required) | Max request body size |
|
||||
|
||||
### TLS Modes
|
||||
|
||||
**ACME** (automatic Let's Encrypt certificates):
|
||||
|
||||
```toml
|
||||
[[listeners]]
|
||||
bind_addr = "203.0.113.10"
|
||||
|
||||
[listeners.tls]
|
||||
mode = "acme"
|
||||
acme_domains = ["git.example.com", "example.com"]
|
||||
acme_cache_dir = "/var/lib/reverse-proxy/acme-cache"
|
||||
acme_directory = "production"
|
||||
acme_contact = "mailto:admin@example.com"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "git.example.com"
|
||||
upstream = "gitea:3000"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "example.com"
|
||||
upstream = "app:8080"
|
||||
```
|
||||
|
||||
**Manual** (bring your own certificates):
|
||||
|
||||
```toml
|
||||
[[listeners]]
|
||||
bind_addr = "203.0.113.11"
|
||||
|
||||
[listeners.tls]
|
||||
mode = "manual"
|
||||
cert_path = "/etc/ssl/example.com/fullchain.pem"
|
||||
key_path = "/etc/ssl/example.com/privkey.pem"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "example.com"
|
||||
upstream = "127.0.0.1:8080"
|
||||
```
|
||||
|
||||
### Explicit Bind Address
|
||||
|
||||
By default, `bind_addr` must be an explicit IP address. `0.0.0.0` is rejected
|
||||
to prevent accidental exposure. For container deployments where the proxy binds
|
||||
inside the container and Docker handles port publishing, enable wildcard binding
|
||||
with either:
|
||||
|
||||
- Config: `allow_wildcard_bind = true`
|
||||
- CLI: `--allow-wildcard-bind`
|
||||
|
||||
Either source enables it (OR logic, not AND).
|
||||
|
||||
## Deployment
|
||||
|
||||
### Docker
|
||||
|
||||
```yaml
|
||||
services:
|
||||
reverse-proxy:
|
||||
build: .
|
||||
container_name: reverse-proxy
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "203.0.113.10:80:80"
|
||||
- "203.0.113.10:443:443"
|
||||
volumes:
|
||||
- /etc/reverse-proxy/config.toml:/etc/reverse-proxy/config.toml:ro
|
||||
- /var/lib/reverse-proxy/acme-cache:/var/lib/reverse-proxy/acme-cache
|
||||
- /var/log/reverse-proxy:/var/log/reverse-proxy
|
||||
- /run/reverse-proxy:/run/reverse-proxy
|
||||
networks:
|
||||
- proxy-net
|
||||
healthcheck:
|
||||
test: ["CMD", "wget", "-q", "--spider", "http://127.0.0.1:9900/health"]
|
||||
interval: 30s
|
||||
timeout: 5s
|
||||
retries: 3
|
||||
```
|
||||
|
||||
Container config must set `allow_wildcard_bind = true` and bind to `0.0.0.0`.
|
||||
|
||||
See [`deploy/docker-compose.yml`](deploy/docker-compose.yml) for a complete
|
||||
example including Gitea and PostgreSQL.
|
||||
|
||||
### systemd
|
||||
|
||||
Install the binary and service file:
|
||||
|
||||
```bash
|
||||
cp target/release/reverse-proxy /usr/local/bin/
|
||||
cp deploy/reverse-proxy.service /etc/systemd/system/
|
||||
```
|
||||
|
||||
Create config at `/etc/reverse-proxy/config.toml`, then:
|
||||
|
||||
```bash
|
||||
systemctl enable --now reverse-proxy
|
||||
```
|
||||
|
||||
See [`deploy/reverse-proxy.service`](deploy/reverse-proxy.service) for the
|
||||
unit file with security hardening options.
|
||||
|
||||
### fail2ban
|
||||
|
||||
Install the filter and jail config:
|
||||
|
||||
```bash
|
||||
cp deploy/fail2ban/filter.d/reverse-proxy.conf /etc/fail2ban/filter.d/
|
||||
cp deploy/fail2ban/jail.d/reverse-proxy.conf /etc/fail2ban/jail.d/
|
||||
systemctl restart fail2ban
|
||||
```
|
||||
|
||||
The filter matches `RATE_LIMIT` log lines from the proxy's structured log
|
||||
output. The jail bans IPs after 10 rate-limited requests within 60 seconds
|
||||
(adjust `maxretry` and `findtime` to taste).
|
||||
|
||||
Rate-limited requests produce log lines like:
|
||||
|
||||
```
|
||||
RATE_LIMIT client_ip=203.0.113.50 host=git.example.com path=/login status=429
|
||||
```
|
||||
|
||||
For Docker deployments, mount the log directory so fail2ban on the host can
|
||||
read it:
|
||||
|
||||
```yaml
|
||||
volumes:
|
||||
- /var/log/reverse-proxy:/var/log/reverse-proxy
|
||||
```
|
||||
|
||||
Enable file logging in config:
|
||||
|
||||
```toml
|
||||
[logging]
|
||||
log_file_path = "/var/log/reverse-proxy/access.log"
|
||||
```
|
||||
|
||||
## Admin Socket
|
||||
|
||||
The admin Unix domain socket supports two commands:
|
||||
|
||||
```bash
|
||||
# Reload config
|
||||
echo "reload" | socat - UNIX-CONNECT:/run/reverse-proxy/admin.sock
|
||||
|
||||
# Check status
|
||||
echo "status" | socat - UNIX-CONNECT:/run/reverse-proxy/admin.sock
|
||||
```
|
||||
|
||||
Responses are newline-terminated JSON:
|
||||
|
||||
```json
|
||||
{"status":"ok"}
|
||||
{"status":"ok","uptime_secs":1234,"sites":2}
|
||||
{"status":"error","message":"config validation failed: ..."}
|
||||
```
|
||||
|
||||
Config can also be reloaded with `kill -SIGHUP $(pidof reverse-proxy)`, but
|
||||
SIGHUP provides no feedback on success or failure.
|
||||
|
||||
## Health Check
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:9900/health
|
||||
```
|
||||
|
||||
Returns `200 OK` with an empty body. Bound to localhost only — not exposed on
|
||||
public ports.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌────────────────────────────────────┐
|
||||
│ reverse-proxy (Rust/axum) │
|
||||
config.toml ──────► │ StaticConfig + DynamicConfig │
|
||||
│ (ArcSwap for hot-reload) │
|
||||
│ │
|
||||
│ ┌─ Listener 1 ─────────────────┐ │
|
||||
bind_addr:80 ───► │ │ HTTP → 301 redirect │ │
|
||||
│ └────────────────────────────────┘ │
|
||||
│ │
|
||||
bind_addr:443 ───► │ │ TLS listener (tokio-rustls) │ │
|
||||
│ │ ├─ ACME or Manual TLS config │ │
|
||||
│ │ └─ axum router (per-listener) │ │
|
||||
│ │ ├─ Host → global site lookup │ │
|
||||
│ │ ├─ Rate limiting, headers │ │
|
||||
│ │ └─ Proxy to upstream │ │
|
||||
│ └────────────────────────────────┘ │
|
||||
│ │
|
||||
│ /health → 200 OK (port 9900) │
|
||||
│ Admin socket (Unix domain) │
|
||||
└────────────────────────────────────┘
|
||||
```
|
||||
|
||||
For full architecture documentation, see [`docs/architecture/`](docs/architecture/).
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── main.rs # Entry point, server startup
|
||||
├── cli.rs # CLI argument parsing
|
||||
├── lib.rs # Library root
|
||||
├── config/
|
||||
│ ├── static_config.rs # Immutable startup configuration
|
||||
│ ├── dynamic_config.rs# Hot-reloadable runtime configuration
|
||||
│ └── validation.rs # Config validation rules
|
||||
├── proxy/
|
||||
│ ├── handler.rs # Core reverse proxy handler
|
||||
│ ├── headers.rs # Proxy header injection
|
||||
│ ├── body_limit.rs # Request body size limiting
|
||||
│ └── error.rs # Error response types
|
||||
├── tls/
|
||||
│ ├── acceptor.rs # TLS acceptor setup
|
||||
│ ├── acme.rs # ACME certificate provisioning
|
||||
│ ├── config.rs # TLS configuration
|
||||
│ └── redirect.rs # HTTP → HTTPS redirect
|
||||
├── rate_limit/
|
||||
│ ├── mod.rs # Rate limiting middleware
|
||||
│ └── bucket.rs # Token bucket implementation
|
||||
├── admin/
|
||||
│ ├── socket.rs # Unix domain socket admin API
|
||||
│ └── mod.rs
|
||||
├── health.rs # Health check endpoint
|
||||
├── logging/
|
||||
│ ├── mod.rs # Logging initialization
|
||||
│ └── format.rs # Structured log formatting
|
||||
├── server.rs # HTTPS listener serving
|
||||
├── shutdown.rs # Graceful shutdown handling
|
||||
└── utils.rs # Shared utilities
|
||||
|
||||
deploy/
|
||||
├── Dockerfile
|
||||
├── docker-compose.yml
|
||||
├── reverse-proxy.service
|
||||
└── fail2ban/
|
||||
├── filter.d/reverse-proxy.conf
|
||||
└── jail.d/reverse-proxy.conf
|
||||
|
||||
docs/
|
||||
├── architecture/ # Full architecture documentation
|
||||
│ ├── overview.md
|
||||
│ ├── proxy.md
|
||||
│ ├── tls.md
|
||||
│ ├── config.md
|
||||
│ ├── operations.md
|
||||
│ └── decisions/ # Architecture Decision Records (ADRs)
|
||||
└── research/
|
||||
└── threat-landscape.md
|
||||
```
|
||||
|
||||
## Why Rust
|
||||
|
||||
6 of 7 recent nginx CVEs are memory corruption bugs (buffer overflows,
|
||||
use-after-free, out-of-bounds reads) — the exact class of bugs Rust eliminates
|
||||
by construction. Combined with rustls (pure Rust TLS, no OpenSSL dependency),
|
||||
this proxy provides a fundamentally safer baseline than nginx.
|
||||
|
||||
Rust does not eliminate logic bugs. Rate limiting, header injection prevention,
|
||||
and access control still require careful implementation. But it eliminates the
|
||||
entire category of vulnerabilities that make nginx's C codebase a persistent
|
||||
attack surface.
|
||||
|
||||
See [`docs/research/threat-landscape.md`](docs/research/threat-landscape.md)
|
||||
for the full vulnerability analysis that motivated this project.
|
||||
|
||||
## License
|
||||
|
||||
Licensed under either of
|
||||
|
||||
- Apache License, Version 2.0
|
||||
([http://www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0))
|
||||
- MIT License
|
||||
([http://opensource.org/licenses/MIT](http://opensource.org/licenses/MIT))
|
||||
|
||||
at your option.
|
||||
|
||||
Unless you explicitly state otherwise, any contribution intentionally submitted
|
||||
for inclusion in this project by you, as defined in the Apache-2.0 license, shall
|
||||
be dual licensed as above, without any additional terms or conditions.
|
||||
305
deploy/README.md
Normal file
305
deploy/README.md
Normal file
@@ -0,0 +1,305 @@
|
||||
# Deployment
|
||||
|
||||
Step-by-step setup guides for running reverse-proxy.
|
||||
|
||||
## Docker Deployment (Recommended)
|
||||
|
||||
This is the easiest way to get started and provides container-level isolation
|
||||
as a defense-in-depth measure.
|
||||
|
||||
### 1. Build the image
|
||||
|
||||
```bash
|
||||
cd /path/to/reverse-proxy
|
||||
docker build -t reverse-proxy .
|
||||
```
|
||||
|
||||
### 2. Create directories on the host
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /etc/reverse-proxy
|
||||
sudo mkdir -p /var/lib/reverse-proxy/acme-cache
|
||||
sudo mkdir -p /var/log/reverse-proxy
|
||||
sudo mkdir -p /run/reverse-proxy
|
||||
```
|
||||
|
||||
### 3. Create the config file
|
||||
|
||||
Create `/etc/reverse-proxy/config.toml`. For a basic single-domain setup with
|
||||
Let's Encrypt:
|
||||
|
||||
```toml
|
||||
allow_wildcard_bind = true
|
||||
health_check_port = 9900
|
||||
|
||||
[logging]
|
||||
level = "info"
|
||||
format = "text"
|
||||
log_file_path = "/var/log/reverse-proxy/access.log"
|
||||
|
||||
[rate_limit]
|
||||
requests_per_second = 10
|
||||
burst = 20
|
||||
|
||||
[body]
|
||||
limit_bytes = 104857600
|
||||
|
||||
[[listeners]]
|
||||
bind_addr = "0.0.0.0"
|
||||
http_port = 80
|
||||
https_port = 443
|
||||
|
||||
[listeners.tls]
|
||||
mode = "acme"
|
||||
acme_domains = ["yourdomain.example.com"]
|
||||
acme_cache_dir = "/var/lib/reverse-proxy/acme-cache"
|
||||
acme_directory = "production"
|
||||
acme_contact = "mailto:admin@yourdomain.example.com"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "yourdomain.example.com"
|
||||
upstream = "your-backend:8080"
|
||||
```
|
||||
|
||||
**Important:** Replace `yourdomain.example.com` with your actual domain and
|
||||
`your-backend:8080` with your upstream service address. For initial testing,
|
||||
use `acme_directory = "staging"` to avoid Let's Encrypt rate limits.
|
||||
|
||||
### 4. Set up Docker Compose
|
||||
|
||||
Copy and customize `docker-compose.yml`:
|
||||
|
||||
```bash
|
||||
cp deploy/docker-compose.yml /opt/reverse-proxy/docker-compose.yml
|
||||
```
|
||||
|
||||
Edit the compose file to:
|
||||
- Replace `203.0.113.10` with your server's public IP
|
||||
- Update upstream service definitions to match your infrastructure
|
||||
- Adjust the `DB_PASSWORD` environment variable (use Docker secrets or `.env`
|
||||
file, never commit real passwords)
|
||||
|
||||
### 5. Start the proxy
|
||||
|
||||
```bash
|
||||
cd /opt/reverse-proxy
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
### 6. Verify
|
||||
|
||||
```bash
|
||||
# Check container health
|
||||
docker compose ps
|
||||
|
||||
# Test health endpoint (from the host)
|
||||
curl -s http://127.0.0.1:9900/health
|
||||
|
||||
# Check logs
|
||||
docker compose logs reverse-proxy
|
||||
|
||||
# Test TLS
|
||||
curl -v https://yourdomain.example.com/
|
||||
```
|
||||
|
||||
### 7. Set up fail2ban
|
||||
|
||||
If you want automated IP banning for rate limit violations:
|
||||
|
||||
```bash
|
||||
sudo cp deploy/fail2ban/filter.d/reverse-proxy.conf /etc/fail2ban/filter.d/
|
||||
sudo cp deploy/fail2ban/jail.d/reverse-proxy.conf /etc/fail2ban/jail.d/
|
||||
sudo systemctl restart fail2ban
|
||||
```
|
||||
|
||||
Verify fail2ban is watching the logs:
|
||||
|
||||
```bash
|
||||
sudo fail2ban-client status reverse-proxy
|
||||
```
|
||||
|
||||
## Bare Metal / systemd Deployment
|
||||
|
||||
For running directly on a host without Docker.
|
||||
|
||||
### 1. Build
|
||||
|
||||
```bash
|
||||
cargo build --release
|
||||
# For a fully static binary (no libc dependency):
|
||||
# cargo build --release --target x86_64-unknown-linux-musl
|
||||
```
|
||||
|
||||
### 2. Install
|
||||
|
||||
```bash
|
||||
sudo cp target/release/reverse-proxy /usr/local/bin/
|
||||
sudo cp deploy/reverse-proxy.service /etc/systemd/system/
|
||||
```
|
||||
|
||||
### 3. Create config and directories
|
||||
|
||||
```bash
|
||||
sudo mkdir -p /etc/reverse-proxy
|
||||
sudo mkdir -p /var/lib/reverse-proxy/acme-cache
|
||||
sudo mkdir -p /var/log/reverse-proxy
|
||||
sudo mkdir -p /run/reverse-proxy
|
||||
```
|
||||
|
||||
Create `/etc/reverse-proxy/config.toml` (see example configs in the main
|
||||
README). With a bare metal deployment, use the server's actual IP as
|
||||
`bind_addr` instead of `0.0.0.0`:
|
||||
|
||||
```toml
|
||||
# Single-domain bare metal example
|
||||
health_check_port = 9900
|
||||
|
||||
[logging]
|
||||
level = "info"
|
||||
format = "text"
|
||||
log_file_path = "/var/log/reverse-proxy/access.log"
|
||||
|
||||
[rate_limit]
|
||||
requests_per_second = 10
|
||||
burst = 20
|
||||
|
||||
[body]
|
||||
limit_bytes = 104857600
|
||||
|
||||
[[listeners]]
|
||||
bind_addr = "203.0.113.10"
|
||||
http_port = 80
|
||||
https_port = 443
|
||||
|
||||
[listeners.tls]
|
||||
mode = "acme"
|
||||
acme_domains = ["yourdomain.example.com"]
|
||||
acme_cache_dir = "/var/lib/reverse-proxy/acme-cache"
|
||||
acme_directory = "production"
|
||||
acme_contact = "mailto:admin@yourdomain.example.com"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "yourdomain.example.com"
|
||||
upstream = "127.0.0.1:3000"
|
||||
```
|
||||
|
||||
### 4. Start
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable --now reverse-proxy
|
||||
```
|
||||
|
||||
### 5. Verify
|
||||
|
||||
```bash
|
||||
# Check service status
|
||||
systemctl status reverse-proxy
|
||||
|
||||
# Test health endpoint
|
||||
curl -s http://127.0.0.1:9900/health
|
||||
|
||||
# Check logs
|
||||
journalctl -u reverse-proxy -f
|
||||
```
|
||||
|
||||
### 6. Reload config
|
||||
|
||||
```bash
|
||||
# Via SIGHUP (no feedback)
|
||||
sudo kill -SIGHUP $(pidof reverse-proxy)
|
||||
|
||||
# Via admin socket (returns success/failure JSON)
|
||||
echo "reload" | socat - UNIX-CONNECT:/run/reverse-proxy/admin.sock
|
||||
|
||||
# Check status
|
||||
echo "status" | socat - UNIX-CONNECT:/run/reverse-proxy/admin.sock
|
||||
```
|
||||
|
||||
## Multi-Domain Setup
|
||||
|
||||
### Shared IP with SAN certificate
|
||||
|
||||
One IP, one listener, multiple domains on a single Let's Encrypt SAN certificate:
|
||||
|
||||
```toml
|
||||
[[listeners]]
|
||||
bind_addr = "203.0.113.10"
|
||||
|
||||
[listeners.tls]
|
||||
mode = "acme"
|
||||
acme_domains = ["git.example.com", "www.example.com"]
|
||||
acme_cache_dir = "/var/lib/reverse-proxy/acme-cache"
|
||||
acme_directory = "production"
|
||||
acme_contact = "mailto:admin@example.com"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "git.example.com"
|
||||
upstream = "127.0.0.1:3000"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "www.example.com"
|
||||
upstream = "127.0.0.1:8080"
|
||||
```
|
||||
|
||||
### Dedicated IP per domain
|
||||
|
||||
Multiple listeners, each with its own IP and certificate:
|
||||
|
||||
```toml
|
||||
[[listeners]]
|
||||
bind_addr = "203.0.113.10"
|
||||
|
||||
[listeners.tls]
|
||||
mode = "acme"
|
||||
acme_domains = ["git.example.com"]
|
||||
acme_cache_dir = "/var/lib/reverse-proxy/acme-cache-git"
|
||||
acme_directory = "production"
|
||||
acme_contact = "mailto:admin@example.com"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "git.example.com"
|
||||
upstream = "127.0.0.1:3000"
|
||||
|
||||
[[listeners]]
|
||||
bind_addr = "203.0.113.11"
|
||||
|
||||
[listeners.tls]
|
||||
mode = "manual"
|
||||
cert_path = "/etc/ssl/www.example.com/fullchain.pem"
|
||||
key_path = "/etc/ssl/www.example.com/privkey.pem"
|
||||
|
||||
[[listeners.sites]]
|
||||
host = "www.example.com"
|
||||
upstream = "127.0.0.1:8080"
|
||||
```
|
||||
|
||||
## HTTPS Upstream
|
||||
|
||||
If your upstream service uses TLS, set `upstream_scheme = "https"`:
|
||||
|
||||
```toml
|
||||
[[listeners.sites]]
|
||||
host = "secure.example.com"
|
||||
upstream = "10.0.0.5:8443"
|
||||
upstream_scheme = "https"
|
||||
```
|
||||
|
||||
The proxy validates upstream TLS certificates using the system's native
|
||||
certificate store.
|
||||
|
||||
## Security Notes
|
||||
|
||||
- The proxy binds to explicit IP addresses by default. `0.0.0.0` is rejected
|
||||
unless `--allow-wildcard-bind` or `allow_wildcard_bind = true` is set.
|
||||
This prevents accidental exposure on unintended interfaces.
|
||||
- The health check endpoint binds to `127.0.0.1` only and is never exposed on
|
||||
public ports.
|
||||
- The admin socket should be protected by file permissions. It defaults to
|
||||
`/run/reverse-proxy/admin.sock`.
|
||||
- Rate limiting is global per-IP (IPv4: /32, IPv6: /64) in the current
|
||||
version. Per-site rate limits may be added later.
|
||||
- All log output disables ANSI escape codes for fail2ban and container
|
||||
compatibility.
|
||||
- The `Server` header is stripped from upstream responses and not added by the
|
||||
proxy, reducing server fingerprinting.
|
||||
@@ -1,3 +1,3 @@
|
||||
[Definition]
|
||||
failregex = ^RATE_LIMIT client_ip=<HOST> host=\S+ path=\S+ status=\d+$
|
||||
failregex = RATE_LIMIT client_ip=<HOST> host=\S+ path=\S+ status=\d+
|
||||
ignoreregex =
|
||||
@@ -7,7 +7,8 @@ last_updated: 2026-06-12
|
||||
|
||||
## Current State
|
||||
|
||||
**Phase 0 (Exploration) — Complete.** Phase 1 (Architecture) — In progress.
|
||||
**Phase 1 (Implementation) — Complete.** The proxy is deployed and running in a
|
||||
Docker container, replacing our vulnerable nginx 1.24.0 installation.
|
||||
|
||||
This project replaces our vulnerable nginx 1.24.0 installation with a
|
||||
memory-safe Rust/axum reverse proxy. The primary motivation is CVE-2026-42945
|
||||
@@ -16,7 +17,9 @@ memory corruption bugs in nginx's C codebase.
|
||||
|
||||
The proxy supports multiple domains from initial release (git.alk.dev and
|
||||
alk.dev), with per-domain host-based routing and a single multi-domain SAN
|
||||
certificate via ACME.
|
||||
certificate via ACME. HTTP/2 is supported on the client-facing side (between
|
||||
the client and the proxy) with ALPN-based protocol detection. Upstream
|
||||
connections remain HTTP/1.1.
|
||||
|
||||
## Architecture Documents
|
||||
|
||||
@@ -24,7 +27,7 @@ certificate via ACME.
|
||||
|----------|--------|-------------|
|
||||
| [overview.md](overview.md) | Draft | Vision, scope, crate dependencies, exports |
|
||||
| [proxy.md](proxy.md) | Draft | Reverse proxy handler, request flow, header injection |
|
||||
| [tls.md](tls.md) | Draft | TLS termination, ACME, manual certs, SNI |
|
||||
| [tls.md](tls.md) | Reviewed | TLS termination, ACME, manual certs, SNI, ALPN |
|
||||
| [config.md](config.md) | Draft | TOML config format, static/dynamic split, ArcSwap reload |
|
||||
| [operations.md](operations.md) | Draft | Rate limiting, logging, health check, systemd, shutdown |
|
||||
|
||||
@@ -54,6 +57,11 @@ certificate via ACME.
|
||||
| [020](decisions/020-container-deployment.md) | Container Deployment Model | Accepted |
|
||||
| [021](decisions/021-x-forwarded-for-edge-proxy.md) | X-Forwarded-For Edge Proxy Model | Accepted |
|
||||
| [022](decisions/022-health-check-scope.md) | Health Check Scope — Local Port and Admin Socket Only | Accepted |
|
||||
| [023](decisions/023-http2-client-facing.md) | HTTP/2 Client-Facing Support | Accepted |
|
||||
| [024](decisions/024-ansi-disabled-logging.md) | ANSI-Disabled Logging for Container Deployments | Accepted |
|
||||
| [025](decisions/025-rate-limiter-ip-source.md) | Rate Limiter IP Source — ConnectInfo Only | Accepted |
|
||||
| [026](decisions/026-connector-timeout-ceiling.md) | Connector Timeout Ceiling for Per-Site Timeouts | Accepted |
|
||||
| [027](decisions/027-admin-socket-resource-limits.md) | Admin Socket Resource Limits | Accepted |
|
||||
|
||||
## Open Questions
|
||||
|
||||
@@ -69,10 +77,12 @@ See [open-questions.md](open-questions.md) for the full tracker.
|
||||
| ~~OQ-06~~ | ~~Should upstream timeouts be configurable per-site?~~ | ~~low~~ | **resolved** (ADR-015) |
|
||||
| ~~OQ-07~~ | ~~Should per-site TLS overrides be supported for mixed ACME/manual domains?~~ | ~~low~~ | **resolved** (ADR-019) |
|
||||
| ~~OQ-08~~ | ~~Should `/health` use a less common path to avoid upstream collision?~~ | ~~medium~~ | **resolved** (ADR-022: no `/health` route on main listener) |
|
||||
| ~~OQ-09~~ | ~~How should `upstream_connect_timeout_secs` be enforced?~~ | ~~medium~~ | **resolved** (implementation gap — ADR-015 already decides this) |
|
||||
| ~~OQ-09~~ | ~~How should `upstream_connect_timeout_secs` be enforced?~~ | ~~medium~~ | **resolved** (ADR-026: 30s connector ceiling) |
|
||||
| ~~OQ-10~~ | ~~Should ACME contact email be a required config field?~~ | ~~high~~ | **resolved** (already specified in config.md; implementation bug C2) |
|
||||
| ~~OQ-11~~ | ~~How should `X-Forwarded-Proto` be derived per-listener?~~ | ~~medium~~ | **resolved** (hardcoded `https` is correct for TLS-terminating proxy) |
|
||||
| ~~OQ-12~~ | ~~Should request access logging be mandatory or optional?~~ | ~~high~~ | **resolved** (mandatory, always-on per operations.md) |
|
||||
| OQ-13 | Should `acme_contact` support multiple email addresses? | low | open |
|
||||
| OQ-14 | Should rate limiter eviction interval and max age be configurable? | low | open |
|
||||
|
||||
## Document Lifecycle
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
status: draft
|
||||
last_updated: 2026-06-11
|
||||
last_updated: 2026-06-12
|
||||
---
|
||||
|
||||
# Configuration
|
||||
@@ -75,9 +75,9 @@ config.toml
|
||||
|
||||
## Static vs Dynamic Configuration
|
||||
|
||||
This split follows the pattern established in alknet (ADR-030) and adapted
|
||||
for our simpler use case. See ADR-019 for the rationale behind the
|
||||
`[[listeners]]` configuration format.
|
||||
This split follows the pattern established in alknet (alknet ADR-030, not
|
||||
this project) and adapted for our simpler use case. See ADR-019 for the
|
||||
rationale behind the `[[listeners]]` configuration format.
|
||||
|
||||
### StaticConfig
|
||||
|
||||
@@ -100,6 +100,10 @@ Immutable after startup. Changes require a process restart.
|
||||
| `format` | `"text"` or `"json"` | Log output format |
|
||||
| `log_file_path` | `String` | Path to log file. When set, structured logs are written to this file in addition to stdout/stderr. Strongly recommended for fail2ban integration in container deployments (see ADR-020). Default: not set (file logging disabled) |
|
||||
|
||||
**Note**: All log output uses `with_ansi(false)` to disable ANSI escape codes.
|
||||
This is critical for fail2ban regex matching and Docker log output (see ADR-024).
|
||||
Both text and JSON formats produce plain-text output without color codes.
|
||||
|
||||
**Note**: The entire `LoggingConfig` (including `log_file_path`) is static and
|
||||
requires a process restart to change. Log file path changes require reopening
|
||||
file handles, which is complex and low-value for Phase 1. Log rotation (Phase 2)
|
||||
@@ -110,16 +114,23 @@ will be handled via signal-based or built-in rotation.
|
||||
| Field | Type | Description |
|
||||
|-------|------|-------------|
|
||||
| `bind_addr` | `String` | IP address to bind to (must be explicit, no `0.0.0.0`; see ADR-016) |
|
||||
| `http_port` | `u16` | Port for HTTP→HTTPS redirect (default: `80`; set to `0` to disable) |
|
||||
| `http_port` | `u16` | Port for HTTP→HTTPS redirect (default: `80`; set to `0` to disable; valid values: 0 or 1–65535). Note: the implementation currently uses `u32`; this must be changed to `u16` to match the architecture spec (see Security Review W12). |
|
||||
| `https_port` | `u16` | Port for TLS listener (default: `443`) |
|
||||
| `tls.mode` | `"acme"` or `"manual"` | Certificate provisioning mode |
|
||||
| `tls.acme_domains` | `Vec<String>` | Domains for ACME SAN certificate (ACME mode only) |
|
||||
| `tls.acme_cache_dir` | `String` | ACME state cache directory |
|
||||
| `tls.acme_directory` | `"production"` or `"staging"` | Let's Encrypt directory |
|
||||
| `tls.acme_contact` | `String` | Contact email for ACME registration (e.g., `"mailto:admin@example.com"`). Required for production; Let's Encrypt rejects registrations without a contact email. See OQ-10. |
|
||||
| `tls.acme_contact` | `String` | Contact email for ACME registration (e.g., `"mailto:admin@example.com"`). Required for production; Let's Encrypt rejects registrations without a contact email. Must contain a non-empty email after `mailto:` with an `@` sign. See OQ-10, OQ-13. |
|
||||
| `tls.cert_path` | `String` | Certificate file path (manual mode only) |
|
||||
| `tls.key_path` | `String` | Private key file path (manual mode only) |
|
||||
|
||||
**Note on `X-Forwarded-Proto`**: The `X-Forwarded-Proto` header is derived
|
||||
from which listener port received the request: `https` for requests on the
|
||||
listener's `https_port`, `http` for requests on the `http_port`. In practice,
|
||||
since the HTTP listener sends a 301 redirect rather than proxying,
|
||||
`X-Forwarded-Proto` is always `"https"` for proxied requests. See proxy.md and
|
||||
OQ-11.
|
||||
|
||||
**Why listeners are static:** Each listener requires binding a TCP socket and
|
||||
constructing a TLS acceptor — operations that fundamentally require a restart.
|
||||
Changing a listener's bind address, TLS mode, or certificate configuration
|
||||
@@ -397,14 +408,23 @@ On startup, the config is validated:
|
||||
16. Site `host` values must be valid hostnames (not IP addresses, not
|
||||
including ports). Hostnames are normalized to lowercase during validation.
|
||||
17. `upstream` must be in `host:port` format where `port` is a required integer
|
||||
1–65535. Examples: `gitea:3000`, `127.0.0.1:3000`, `[::1]:3000`. Invalid
|
||||
examples: `gitea` (missing port), `http://gitea:3000` (includes scheme),
|
||||
`10.0.0.5` (missing port). The `upstream_scheme` field handles the protocol.
|
||||
1–65535 and the host part must be a valid DNS hostname or IP address.
|
||||
IPv6 addresses must use bracket notation (e.g., `[::1]:3000`). Values
|
||||
like `!!!bad!!!:3000` or `@#$%:8080` are rejected. The host part is
|
||||
validated as follows: bracket-enclosed values are parsed as IPv6
|
||||
addresses; otherwise the host part must parse as a valid `IpAddr` or
|
||||
pass `is_valid_hostname` validation (same rules as site `host` values).
|
||||
Examples: `gitea:3000`, `127.0.0.1:3000`, `[::1]:3000`. Invalid examples:
|
||||
`gitea` (missing port), `http://gitea:3000` (includes scheme), `10.0.0.5`
|
||||
(missing port), `!!!bad!!!:3000` (invalid host part). The
|
||||
`upstream_scheme` field handles the protocol.
|
||||
18. `upstream_scheme` values are case-sensitive: only `"http"` or `"https"`
|
||||
(lowercase). Default is `"http"`.
|
||||
19. In ACME mode, `tls.acme_contact` must be a valid `mailto:` URI
|
||||
(e.g., `"mailto:admin@example.com"`). Let's Encrypt requires a contact
|
||||
email for production certificate requests.
|
||||
19. In ACME mode, `tls.acme_contact` must be a valid `mailto:` URI with a
|
||||
non-empty email address containing an `@` sign
|
||||
(e.g., `"mailto:admin@example.com"`). Values like `"mailto:"` (empty
|
||||
email) or `"mailto:user"` (no `@`) are rejected. Let's Encrypt requires
|
||||
a contact email for production certificate requests.
|
||||
|
||||
On SIGHUP reload, the same validation applies. If the new config fails
|
||||
validation, the reload is rejected and the old config remains active. An error
|
||||
@@ -430,6 +450,8 @@ All design decisions are documented as ADRs in [decisions/](decisions/).
|
||||
| [016](decisions/016-explicit-bind-address.md) | Explicit bind address required | Rejects `0.0.0.0` to prevent accidental exposure |
|
||||
| [019](decisions/019-multi-config-listeners.md) | Multi-config listeners | `[[listeners]]` supporting both dedicated-IP and shared-IP deployment models |
|
||||
| [020](decisions/020-container-deployment.md) | Container deployment model | Flexible upstream addressing; `allow_wildcard_bind` override for containers |
|
||||
| [026](decisions/026-connector-timeout-ceiling.md) | Connector timeout ceiling | 30s ceiling on connector, per-site timeout via tokio::time::timeout |
|
||||
| [027](decisions/027-admin-socket-resource-limits.md) | Admin socket resource limits | 5s read timeout, 4096 byte line length limit |
|
||||
|
||||
## Open Questions
|
||||
|
||||
@@ -439,4 +461,8 @@ questions affecting this document:
|
||||
- ~~**OQ-04**: Should config reload support a Unix domain socket API in addition
|
||||
to SIGHUP?~~ (resolved — ADR-014: Unix domain socket admin API added)
|
||||
- ~~**OQ-07**: Should per-site TLS overrides be supported for mixed ACME/manual
|
||||
domains?~~ (resolved — ADR-019: `[[listeners]]` with per-listener TLS config)
|
||||
domains?~~ (resolved — ADR-019: `[[listeners]]` with per-listener TLS config)
|
||||
- **OQ-13**: Should `acme_contact` support multiple email addresses? (see
|
||||
[open-questions.md](open-questions.md))
|
||||
- **OQ-14**: Should rate limiter eviction interval and max age be configurable?
|
||||
(see [open-questions.md](open-questions.md))
|
||||
75
docs/architecture/decisions/023-http2-client-facing.md
Normal file
75
docs/architecture/decisions/023-http2-client-facing.md
Normal file
@@ -0,0 +1,75 @@
|
||||
# ADR-023: HTTP/2 Client-Facing Support
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
The original architecture spec excluded HTTP/2 proxying from scope, stating "HTTP/2
|
||||
or HTTP/3 proxying (services that need these run their own native Rust servers)."
|
||||
This was interpreted as excluding HTTP/2 entirely — both for client connections
|
||||
and upstream connections.
|
||||
|
||||
During deployment testing, we discovered that modern browsers and HTTP clients
|
||||
negotiate HTTP/2 via ALPN during the TLS handshake. The initial implementation
|
||||
used `hyper_util::server::conn::auto::Builder` which failed to properly detect
|
||||
HTTP/2 over TLS connections because its `ReadVersion` mechanism doesn't work
|
||||
reliably with `tokio-rustls` `TlsStream` wrappers.
|
||||
|
||||
This caused two problems:
|
||||
1. HTTP/2 clients received degraded performance (no multiplexing) or connection
|
||||
failures
|
||||
2. In HTTP/2, the host is conveyed via the `:authority` pseudo-header, which
|
||||
hyper represents as the URI host rather than a `Host` header — causing 400
|
||||
errors for HTTP/2 clients
|
||||
|
||||
## Decision
|
||||
|
||||
The proxy now supports HTTP/2 on the **client-facing** side (between the client
|
||||
and the proxy). This is distinct from HTTP/2 proxying to upstream services,
|
||||
which remains out of scope.
|
||||
|
||||
**Implementation:**
|
||||
|
||||
1. **ALPN-based protocol detection**: After the TLS handshake, the proxy reads
|
||||
the negotiated ALPN protocol from `tls_stream.get_ref().1.alpn_protocol()`.
|
||||
If the ALPN is `h2`, the connection uses
|
||||
`hyper::server::conn::http2::Builder`; otherwise, it uses
|
||||
`hyper_util::server::conn::auto::Builder` with HTTP/1.1 + upgrade support.
|
||||
|
||||
2. **Host header fallback**: The proxy handler now falls back to
|
||||
`req.uri().host()` when the `Host` header is absent. In HTTP/2, the
|
||||
`:authority` pseudo-header is represented as the URI host in hyper, so this
|
||||
correctly handles both HTTP/1.1 (where `Host` is always present) and HTTP/2
|
||||
(where `:authority` maps to URI host).
|
||||
|
||||
3. **ALPN advertisement**: The TLS `ServerConfig` advertises `h2` and
|
||||
`http/1.1` as ALPN protocols, plus `acme-tls/1` for ACME challenges.
|
||||
|
||||
**Upstream connections remain HTTP/1.1.** The proxy communicates with upstream
|
||||
services over HTTP/1.1 (or HTTPS/1.1 when `upstream_scheme = "https"`). HTTP/2
|
||||
to upstreams is out of scope for Phase 1.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
- Modern browsers and HTTP/2 clients work correctly with the proxy
|
||||
- HTTP/2 multiplexing improves client-facing performance (multiple requests over
|
||||
a single connection)
|
||||
- ALPN-based detection is the standard mechanism for HTTP/2 negotiation over TLS
|
||||
- Host header fallback correctly handles both HTTP/1.1 and HTTP/2
|
||||
|
||||
**Negative:**
|
||||
- Slightly more complex TLS listener code (ALPN protocol detection, dual
|
||||
builder paths)
|
||||
- The distinction between "HTTP/2 to the proxy" and "HTTP/2 to upstream" must
|
||||
be clearly documented to avoid confusion
|
||||
- `ConnectInfoService` is typed to `Request<Incoming>` rather than the generic
|
||||
`Request<B>`, which is a correct but slightly less flexible implementation
|
||||
|
||||
## References
|
||||
|
||||
- [proxy.md](../proxy.md) — request flow and host-based routing
|
||||
- [tls.md](../tls.md) — TLS termination and ALPN configuration
|
||||
- [overview.md](../overview.md) — scope and out-of-scope items
|
||||
53
docs/architecture/decisions/024-ansi-disabled-logging.md
Normal file
53
docs/architecture/decisions/024-ansi-disabled-logging.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# ADR-024: ANSI-Disabled Logging for Container Deployments
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
During deployment, the proxy's log output contained ANSI escape codes (color
|
||||
codes) because `tracing-subscriber`'s default `fmt::layer()` enables ANSI
|
||||
output when connected to a terminal. In a Docker container, `docker logs`
|
||||
captures stdout/stderr, and the log file written to
|
||||
`/var/log/reverse-proxy/access.log` is also a plain text file.
|
||||
|
||||
ANSI escape codes in logs cause two problems:
|
||||
1. **fail2ban regex failure**: The fail2ban filter regex expects plain text with
|
||||
a `RATE_LIMIT` prefix. ANSI codes embedded in the log line before the prefix
|
||||
break pattern matching, causing fail2ban to miss rate limit events entirely.
|
||||
2. **Docker log readability**: `docker logs` output is cluttered with escape
|
||||
sequences when not running in a terminal that supports them.
|
||||
|
||||
## Decision
|
||||
|
||||
All `tracing-subscriber` fmt layers now use `with_ansi(false)`:
|
||||
|
||||
- **File layer**: Always plain text, no ANSI codes
|
||||
- **Stdout layer**: Always plain text, no ANSI codes
|
||||
- **JSON layer**: Always plain text (JSON format doesn't benefit from colors)
|
||||
|
||||
This applies to both text and JSON log formats, in both file and stdout
|
||||
destinations.
|
||||
|
||||
Additionally, the fail2ban regex was corrected: the `^` anchor was removed from
|
||||
the `failregex` pattern because log lines have a timestamp/level prefix before
|
||||
the `RATE_LIMIT` keyword. The corrected pattern matches `RATE_LIMIT` anywhere
|
||||
in the line rather than only at the start.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
- fail2ban regex matching works reliably in all environments
|
||||
- Log output is clean and parseable regardless of environment
|
||||
- No behavioral difference between Docker, systemd, and terminal environments
|
||||
|
||||
**Negative:**
|
||||
- Loss of color-coding in terminal output during development (acceptable
|
||||
trade-off for reliability; developers can use `RUST_LOG` filtering instead)
|
||||
|
||||
## References
|
||||
|
||||
- [operations.md](../operations.md) — logging and fail2ban integration
|
||||
- [ADR-007](007-custom-log-format.md) — custom structured log format
|
||||
- [ADR-020](020-container-deployment.md) — container deployment model
|
||||
93
docs/architecture/decisions/025-rate-limiter-ip-source.md
Normal file
93
docs/architecture/decisions/025-rate-limiter-ip-source.md
Normal file
@@ -0,0 +1,93 @@
|
||||
# ADR-025: Rate Limiter IP Source Must Be ConnectInfo Only
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
The rate limiter identifies clients by IP address to enforce per-IP token bucket
|
||||
limits. The question is: what is the authoritative source of the client IP for
|
||||
rate limiting?
|
||||
|
||||
Two potential sources exist:
|
||||
|
||||
1. **`ConnectInfo<SocketAddr>`**: The TCP peer address, extracted from
|
||||
`TcpStream::peer_addr()` before TLS handshake and propagated to axum via
|
||||
`ConnectInfoService`. This is the real client IP at the TCP level.
|
||||
|
||||
2. **`X-Forwarded-For` header**: A client-supplied HTTP header that may contain
|
||||
an IP address. This header is set by the proxy handler *after* rate limiting
|
||||
(the rate limiter runs as middleware before the handler), so the value
|
||||
present during rate limit checks is whatever the client sent — completely
|
||||
untrusted.
|
||||
|
||||
ADR-021 already establishes that the proxy is an edge proxy and client-supplied
|
||||
`X-Forwarded-For` headers are untrusted. The proxy handler replaces
|
||||
`X-Forwarded-For` with the `ConnectInfo` IP before forwarding upstream
|
||||
specifically to prevent spoofing. However, ADR-021 only addresses the proxy
|
||||
handler's header injection — it does not specify the rate limiter's IP source.
|
||||
|
||||
The current implementation checks `X-Forwarded-For` *first* and falls back to
|
||||
`ConnectInfo`, which creates two attack vectors:
|
||||
|
||||
- **Rate limit bypass**: A client sends each request with a different random
|
||||
`X-Forwarded-For` value. Every request appears to come from a different IP,
|
||||
evading the per-IP token bucket entirely.
|
||||
- **Denial-of-service via IP spoofing**: A client sends requests with
|
||||
`X-Forwarded-For: <victim IP>`. The victim's bucket is depleted and their
|
||||
legitimate requests receive 429 responses.
|
||||
|
||||
## Decision
|
||||
|
||||
The rate limiter must use `ConnectInfo<SocketAddr>` as the **sole** source of
|
||||
client IP addresses. `X-Forwarded-For` must not be consulted for rate limiting
|
||||
purposes.
|
||||
|
||||
The rate limiter runs as middleware before the proxy handler. At that point in
|
||||
the request lifecycle, no proxy headers have been injected — any `X-Forwarded-For`
|
||||
header present is from the client and is untrusted (ADR-021).
|
||||
|
||||
`ConnectInfo<SocketAddr>` is always present because each listener populates it
|
||||
via `into_make_service_with_connect_info::<SocketAddr>()`. If `ConnectInfo` is
|
||||
absent (which should not happen in normal operation), the request should be
|
||||
rejected rather than falling back to an untrusted header.
|
||||
|
||||
## Rationale
|
||||
|
||||
- The proxy is the edge proxy (ADR-021). Client-supplied headers are
|
||||
untrusted at the edge.
|
||||
- Rate limiting is a security mechanism — it must use the most trustworthy
|
||||
IP source available. `ConnectInfo` is set by the kernel's TCP stack, not by
|
||||
the client.
|
||||
- The rate limiter's position in the middleware stack (before the handler)
|
||||
means it sees raw client headers, not the replaced `X-Forwarded-For` that the
|
||||
proxy handler injects.
|
||||
- Falling back to `X-Forwarded-For` when `ConnectInfo` is absent creates a
|
||||
downgrade attack — an attacker could potentially strip `ConnectInfo` from
|
||||
extensions to force the fallback path. Better to reject than to accept an
|
||||
untrusted IP.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
- Rate limiting cannot be bypassed via header spoofing
|
||||
- Rate limiting cannot be weaponized to DoS legitimate clients
|
||||
- Consistent with ADR-021's edge proxy trust model
|
||||
- No ambiguity about the IP source — one source, one code path
|
||||
|
||||
**Negative:**
|
||||
- If the proxy is ever placed behind a trusted CDN or load balancer,
|
||||
`ConnectInfo` will reflect the CDN's IP, not the end client's. This would
|
||||
require a "trusted proxies" configuration (already noted as a Phase 2
|
||||
consideration in ADR-021).
|
||||
- Requests without `ConnectInfo` are rejected. This should not happen in
|
||||
normal operation but adds a hard failure mode.
|
||||
|
||||
## References
|
||||
|
||||
- [proxy.md](../proxy.md) — Proxy header injection, request flow
|
||||
- [operations.md](../operations.md) — Rate limiting design
|
||||
- ADR-006 — Token bucket rate limiting
|
||||
- ADR-021 — X-Forwarded-For edge proxy model
|
||||
- Security Review C1 — Rate limiter X-Forwarded-For spoofing vulnerability
|
||||
90
docs/architecture/decisions/026-connector-timeout-ceiling.md
Normal file
90
docs/architecture/decisions/026-connector-timeout-ceiling.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# ADR-026: Connector Timeout Ceiling for Per-Site Timeouts
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
ADR-015 specifies per-site upstream connect timeout configuration with a default
|
||||
of 5 seconds. The proxy enforces connect timeouts using two mechanisms:
|
||||
|
||||
1. **`tokio::time::timeout`**: Wraps the entire `client.request()` call with the
|
||||
per-site `upstream_connect_timeout_secs` value.
|
||||
2. **`HttpConnector::set_connect_timeout()`**: Sets the TCP-level connect timeout
|
||||
on the hyper `HttpConnector` inside the shared client.
|
||||
|
||||
The problem: the HTTP connector's `set_connect_timeout()` is set once at client
|
||||
creation time and applies to all requests through that client. The current
|
||||
implementation hardcodes this to 5 seconds. Since the connector's internal
|
||||
timeout fires before `tokio::time::timeout`, any per-site connect timeout
|
||||
greater than 5 seconds is silently capped — the connector times out at 5s
|
||||
regardless of the configured value.
|
||||
|
||||
Three approaches exist:
|
||||
|
||||
1. **Raise the connector timeout to a high ceiling**: Set the connector's
|
||||
`set_connect_timeout` to a value higher than any reasonable per-site timeout
|
||||
(e.g., 30s). Let `tokio::time::timeout` enforce the actual per-site limit.
|
||||
The connector timeout becomes a safety ceiling, not the primary enforcement
|
||||
mechanism.
|
||||
|
||||
2. **Remove the connector timeout entirely**: Set `set_connect_timeout` to
|
||||
`None` and rely solely on `tokio::time::timeout`. This removes one layer
|
||||
of timeout enforcement but simplifies the model.
|
||||
|
||||
3. **Create per-site client instances**: Each site gets its own hyper Client
|
||||
with its own connector configured with the site's connect timeout. This is
|
||||
the most precise approach but creates many client instances and connection
|
||||
pools, increasing resource usage.
|
||||
|
||||
## Decision
|
||||
|
||||
Use approach 1: set the connector timeout to a high ceiling value (30 seconds)
|
||||
and let `tokio::time::timeout` enforce the actual per-site connect timeout.
|
||||
|
||||
The connector timeout serves as a safety ceiling — it ensures that even if the
|
||||
`tokio::time::timeout` wrapper fails or is misconfigured, TCP connections
|
||||
cannot hang indefinitely. The ceiling of 30s is well above the default 5s and
|
||||
any reasonable per-site override.
|
||||
|
||||
The `tokio::time::timeout` wrapper with the per-site value is the primary
|
||||
enforcement mechanism. It fires at the correct per-site threshold.
|
||||
|
||||
## Rationale
|
||||
|
||||
- The shared client architecture (ADR-017) means one connector timeout for all
|
||||
sites. Creating per-site clients would undermine connection pooling and
|
||||
increase resource usage.
|
||||
- A ceiling approach preserves the defense-in-depth benefit of two timeout
|
||||
layers while allowing per-site values to actually work.
|
||||
- 30s is a reasonable ceiling — no legitimate upstream connect should take
|
||||
longer than 30s. Sites that need a higher connect timeout can set the ceiling
|
||||
even higher if needed, but 30s covers all practical cases.
|
||||
- Removing the connector timeout (approach 2) removes the safety ceiling
|
||||
entirely. If `tokio::time::timeout` has a bug or is misapplied, TCP connects
|
||||
could hang indefinitely. The ceiling provides a backstop.
|
||||
- The HTTPS client uses `HttpsConnector<HttpConnector>`, which wraps the
|
||||
`HttpConnector`. The same ceiling applies to both HTTP and HTTPS clients.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
- Per-site connect timeouts work as documented (ADR-015)
|
||||
- Maintains defense-in-depth with two timeout layers
|
||||
- No change to the shared client / connection pooling architecture
|
||||
- Simple implementation: change one constant
|
||||
|
||||
**Negative:**
|
||||
- The connector timeout no longer matches the default connect timeout (5s
|
||||
default vs. 30s ceiling). Operators who read the connector timeout might
|
||||
be confused — documentation must make the ceiling role clear.
|
||||
- If a site needs a connect timeout > 30s, the ceiling must be raised. This
|
||||
is unlikely in practice but creates a hidden upper bound.
|
||||
|
||||
## References
|
||||
|
||||
- [proxy.md](../proxy.md) — Upstream connection, per-site timeouts
|
||||
- ADR-015 — Per-site upstream timeouts with defaults
|
||||
- ADR-017 — Upstream connection defaults
|
||||
- Security Review C3 — Connect timeout silently capped at 5s
|
||||
@@ -0,0 +1,77 @@
|
||||
# ADR-027: Admin Socket Resource Limits
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
The admin Unix domain socket (ADR-014) accepts connections from local
|
||||
processes and reads one newline-terminated command per connection. The current
|
||||
implementation has no read timeout and no line length limit.
|
||||
|
||||
Two attack vectors exist for processes with access to the admin socket
|
||||
(controlled by Unix file permissions):
|
||||
|
||||
1. **Connection hold**: A client connects and sends no data, keeping a
|
||||
connection and tokio task open indefinitely. An attacker with socket access
|
||||
can open many such connections to exhaust file descriptors or task slots.
|
||||
|
||||
2. **Unbounded memory allocation**: A client sends data without a newline,
|
||||
causing `read_line` to buffer indefinitely. This allows unbounded memory
|
||||
consumption from a single connection.
|
||||
|
||||
While the admin socket is protected by Unix file permissions (only processes
|
||||
with access to the socket file can connect), defense-in-depth warrants basic
|
||||
resource limits. The socket is a local diagnostic interface, not a
|
||||
high-performance endpoint — strict limits are appropriate.
|
||||
|
||||
## Decision
|
||||
|
||||
Apply two resource limits to admin socket connections:
|
||||
|
||||
1. **Read timeout**: 5 seconds per connection. If no complete command is
|
||||
received within 5 seconds, the connection is closed and a timeout error
|
||||
is logged at `debug` level.
|
||||
|
||||
2. **Line length limit**: 4096 bytes per command. If a client sends more
|
||||
than 4096 bytes without a newline, the connection is closed and the event
|
||||
is logged at `warn` level. The longest valid command (`reload`) is 6 bytes —
|
||||
4096 bytes provides ample room for future commands while preventing abuse.
|
||||
|
||||
Both limits apply per connection. The admin socket creates one tokio task per
|
||||
connection, so the limits also bound per-connection resource usage.
|
||||
|
||||
## Rationale
|
||||
|
||||
- Defense-in-depth: even with Unix permission protection, basic resource
|
||||
limits prevent accidental or deliberate resource exhaustion.
|
||||
- 5 seconds is generous for a local socket — a local process should be able
|
||||
to send a command within milliseconds. The timeout handles stuck clients and
|
||||
network issues on tunnel-mounted sockets.
|
||||
- 4096 bytes is 600x the longest current command. It accommodates future
|
||||
multi-field commands without allowing abuse.
|
||||
- These limits have no impact on legitimate admin socket usage — the current
|
||||
commands (`reload`, `status`) are tiny and complete instantly.
|
||||
|
||||
## Consequences
|
||||
|
||||
**Positive:**
|
||||
- No unbounded memory allocation from admin socket connections
|
||||
- No indefinite connection holding
|
||||
- Simple implementation: `tokio::time::timeout` + `tokio::io::take` (or
|
||||
`read_until` with a byte limit)
|
||||
- No impact on legitimate usage
|
||||
|
||||
**Negative:**
|
||||
- If a future admin command requires more than 4096 bytes (unlikely), the
|
||||
limit must be raised.
|
||||
- A 5-second timeout could cause issues if the admin socket is accessed via
|
||||
a slow network tunnel. In practice, this is unlikely — admin socket access
|
||||
is typically local.
|
||||
|
||||
## References
|
||||
|
||||
- ADR-014 — Unix domain socket config reload API
|
||||
- [operations.md](../operations.md) — Admin socket protocol
|
||||
- Security Review W4 — Admin socket has no read timeout or line length limit
|
||||
@@ -111,19 +111,21 @@ last_updated: 2026-06-12
|
||||
|
||||
### ~~OQ-09: How should `upstream_connect_timeout_secs` be enforced?~~
|
||||
|
||||
- **Origin**: Implementation review finding W4, ADR-015, ADR-017
|
||||
- **Origin**: Implementation review finding W4, ADR-015, ADR-017, Security
|
||||
Review C3
|
||||
- **Status**: resolved
|
||||
- **Priority**: medium
|
||||
- **Resolution**: This is an implementation gap, not an architectural unknown.
|
||||
The architecture already specifies a 5-second default connect timeout
|
||||
separate from the request timeout (ADR-015, ADR-017), and `SiteConfig`
|
||||
already includes `upstream_connect_timeout_secs`. The implementation must
|
||||
wire this field to hyper's `connect_timeout` parameter. If hyper's API
|
||||
doesn't expose a separate connect timeout, a two-phase `tokio::time::timeout`
|
||||
approach should be used for Phase 2. For Phase 1, the connect timeout field
|
||||
exists in config but is not enforced — this is a documented known gap. No ADR
|
||||
needed; the decision was already made in ADR-015.
|
||||
- **Cross-references**: ADR-015, ADR-017
|
||||
- **Resolution**: Implemented using a two-phase `tokio::time::timeout` approach.
|
||||
The inner timeout uses the per-site `upstream_connect_timeout_secs` (default
|
||||
5s) for the connect + first-byte phase, and the outer timeout uses
|
||||
`upstream_request_timeout_secs` (default 60s) for the full request/response
|
||||
cycle. The shared `HttpConnector` uses a 30-second connect timeout ceiling
|
||||
via `set_connect_timeout()` — this is a safety backstop, not the primary
|
||||
enforcement mechanism. The per-site `tokio::time::timeout` enforces the
|
||||
actual connect timeout. This ensures per-site values >5s work correctly
|
||||
(previously the hardcoded 5s connector timeout silently capped them). See
|
||||
ADR-026.
|
||||
- **Cross-references**: ADR-015, ADR-017, ADR-026
|
||||
|
||||
### ~~OQ-10: Should ACME contact email be a required config field?~~
|
||||
|
||||
@@ -134,9 +136,10 @@ last_updated: 2026-06-12
|
||||
specifies `acme_contact` as a required field in ACME mode (config.md
|
||||
validation rule 19). The field is defined in the `ListenerConfig` table and
|
||||
shown in TOML examples. Let's Encrypt requires a contact email for production
|
||||
certificate requests. The implementation bug (C2: `contact: vec![]`) must be
|
||||
fixed to use the configured `acme_contact` value. No new ADR needed — the
|
||||
decision is already documented in config.md and tls.md.
|
||||
certificate requests. The implementation bug (C2: `contact: vec![]`) has been
|
||||
fixed — `acme_contact` is now correctly wired from config to the ACME state
|
||||
machine. No new ADR needed — the decision is already documented in config.md
|
||||
and tls.md.
|
||||
- **Cross-references**: ADR-004
|
||||
|
||||
### ~~OQ-11: How should `X-Forwarded-Proto` be derived per-listener?~~
|
||||
@@ -168,4 +171,33 @@ last_updated: 2026-06-12
|
||||
disable access logging." The `log_request!` macro exists in the codebase
|
||||
but is not called — this is an implementation gap (W13), not an
|
||||
architectural question. No ADR needed; ADR-007 already covers the log format.
|
||||
- **Cross-references**: ADR-007
|
||||
- **Cross-references**: ADR-007
|
||||
|
||||
## Configuration
|
||||
|
||||
### OQ-13: Should `acme_contact` support multiple email addresses?
|
||||
|
||||
- **Origin**: Security Review S9, [config.md](config.md), [tls.md](tls.md)
|
||||
- **Status**: open
|
||||
- **Priority**: low
|
||||
- **Details**: `acme_contact` is currently a single `String`, but ACME supports
|
||||
multiple contact emails. The `AcmeTlsConfig.contact` field in the
|
||||
implementation is already `Vec<String>`, and the single config value is
|
||||
wrapped in `vec![...]`. Changing `acme_contact` to `Vec<String>` in the
|
||||
config schema would provide consistency with the ACME protocol. However,
|
||||
this is a config format change that requires migration documentation and
|
||||
backward compatibility considerations. For Phase 1, a single email is
|
||||
sufficient.
|
||||
- **Cross-references**: ADR-004
|
||||
|
||||
### OQ-14: Should rate limiter eviction interval and max age be configurable?
|
||||
|
||||
- **Origin**: Security Review S2, [operations.md](operations.md)
|
||||
- **Status**: open
|
||||
- **Priority**: low
|
||||
- **Details**: The eviction task interval (60s) and max age (300s) are
|
||||
currently hardcoded. In high-traffic deployments, a shorter interval or
|
||||
longer max age might be desirable. These would be dynamic config fields
|
||||
(hot-reloadable via ArcSwap) if added. For Phase 1, the hardcoded values
|
||||
are reasonable defaults.
|
||||
- **Cross-references**: ADR-006
|
||||
@@ -32,6 +32,12 @@ The rate limiter runs as axum middleware before the proxy handler. It uses a
|
||||
token bucket algorithm per client IP, matching nginx's `limit_req burst`
|
||||
semantics.
|
||||
|
||||
The client IP for rate limiting is determined **exclusively** from
|
||||
`ConnectInfo<SocketAddr>` — the TCP peer address set before TLS handshake.
|
||||
Client-supplied `X-Forwarded-For` headers must not be consulted because the
|
||||
rate limiter runs before the proxy handler injects trusted headers. See
|
||||
ADR-025.
|
||||
|
||||
Rate limits are global per-IP in Phase 1 (not per-site). A request from IP
|
||||
address X counts against the same bucket regardless of which site it targets.
|
||||
Per-site rate limits may be added in Phase 2.
|
||||
@@ -134,9 +140,19 @@ Logs are written to two destinations simultaneously:
|
||||
- **stdout/stderr**: Always-on, for `docker logs`, `journalctl`, and
|
||||
development use. Structured in the same format as the file output.
|
||||
|
||||
Both output destinations use `with_ansi(false)` to disable ANSI escape codes.
|
||||
This is critical for fail2ban log parsing (ANSI codes break regex matching) and
|
||||
for clean output in Docker containers where a terminal is not attached. See
|
||||
ADR-024.
|
||||
|
||||
The `tracing-subscriber` layer configuration supports both simultaneously via
|
||||
`Layer` composition.
|
||||
|
||||
Both output destinations must respect the `format` config value: when
|
||||
`format = "json"`, both file and stdout output must use JSON formatting.
|
||||
When `format = "text"`, both use text formatting. The format must not be
|
||||
silently ignored in any output path (see Security Review C4).
|
||||
|
||||
### File Logging and fail2ban
|
||||
|
||||
File logging is the primary integration point for fail2ban. A log file on a
|
||||
@@ -155,6 +171,12 @@ volumes:
|
||||
A corresponding fail2ban filter definition and jail configuration are provided
|
||||
as part of the deployment documentation.
|
||||
|
||||
**Filter regex note**: The fail2ban `failregex` pattern matches `RATE_LIMIT`
|
||||
without a `^` anchor because log lines have a timestamp/level prefix before the
|
||||
`RATE_LIMIT` keyword. The pattern `RATE_LIMIT client_ip=<HOST> host=\S+ path=\S+ status=\d+`
|
||||
matches the rate limit event anywhere in the line, which correctly handles the
|
||||
structured log format.
|
||||
|
||||
### Log Levels
|
||||
|
||||
| Level | Use |
|
||||
@@ -275,6 +297,11 @@ rationale.
|
||||
one newline-terminated command, receives one newline-terminated JSON
|
||||
response, then the server closes the connection.
|
||||
- **Message framing**: Newline-delimited (`\n`). Responses end with `\n`.
|
||||
- **Resource limits** (see ADR-027):
|
||||
- Read timeout: 5 seconds. Connections that send no complete command within
|
||||
5 seconds are closed. The timeout is logged at `debug` level.
|
||||
- Line length limit: 4096 bytes. Connections that send more than 4096 bytes
|
||||
without a newline are closed. The event is logged at `warn` level.
|
||||
- **Commands**:
|
||||
- `reload` — Re-read config file, validate, and swap DynamicConfig. Returns
|
||||
`{"status": "ok"}` or `{"status": "error", "message": "..."}`.
|
||||
@@ -298,9 +325,17 @@ On SIGTERM or SIGINT, the proxy performs a graceful shutdown:
|
||||
2. **Close idle keep-alive connections** — Send `Connection: close` on any idle
|
||||
connections in the keep-alive pool.
|
||||
3. **Wait for in-flight requests** — Up to `shutdown_timeout_secs` (default: 30)
|
||||
for active requests to complete. Server tasks are joined (not aborted) so
|
||||
that in-flight requests can drain normally. Only after the timeout expires
|
||||
are remaining tasks aborted.
|
||||
for active requests to complete. The proxy tracks in-flight requests using
|
||||
an atomic counter: each request **must** increment the counter when it
|
||||
begins and decrement when it completes (via guard drop). The increment
|
||||
must happen before the request task is spawned — if the counter is not
|
||||
incremented, the drain logic is broken (see Security Review C2). During
|
||||
drain, the proxy polls the counter every 100ms and exits early
|
||||
when it reaches zero. If the timeout expires before all requests complete,
|
||||
the proxy logs how many in-flight requests remain and proceeds to
|
||||
force-close. Server tasks are joined (not aborted) so that in-flight
|
||||
requests can drain normally. Only after the timeout expires are remaining
|
||||
tasks aborted.
|
||||
4. **Force-close remaining connections** — After the timeout, any remaining
|
||||
connections are forcefully closed via TCP RST.
|
||||
5. **Cancel background tasks** — ACME renewal tasks, rate limiter eviction task,
|
||||
@@ -580,11 +615,14 @@ All design decisions are documented as ADRs in [decisions/](decisions/).
|
||||
| [013](decisions/013-health-check-port.md) | Health check on separate local port | Localhost-only HTTP health check, configurable port |
|
||||
| [014](decisions/014-unix-socket-reload.md) | Unix domain socket config reload API | Programmatic reload with success/failure feedback |
|
||||
| [020](decisions/020-container-deployment.md) | Container deployment model | Defense-in-depth via container isolation; file-primary logging |
|
||||
| [024](decisions/024-ansi-disabled-logging.md) | ANSI-disabled logging | All log output uses `with_ansi(false)` for fail2ban and Docker compatibility |
|
||||
| [025](decisions/025-rate-limiter-ip-source.md) | Rate limiter IP source | ConnectInfo only, never client-supplied X-Forwarded-For |
|
||||
| [027](decisions/027-admin-socket-resource-limits.md) | Admin socket resource limits | 5s read timeout, 4096 byte line length limit |
|
||||
|
||||
## Open Questions
|
||||
|
||||
Open questions are tracked in [open-questions.md](open-questions.md). All
|
||||
questions affecting this document have been resolved:
|
||||
Open questions are tracked in [open-questions.md](open-questions.md). Key
|
||||
questions affecting this document:
|
||||
|
||||
- ~~**OQ-03**: Should the health check endpoint be on a separate port?~~ (resolved
|
||||
— ADR-013: separate local port, default 9900, localhost only)
|
||||
@@ -593,4 +631,6 @@ questions affecting this document have been resolved:
|
||||
9900 and admin socket only)
|
||||
- ~~**OQ-12**: Should request access logging be mandatory or optional?~~ (resolved
|
||||
— access logging is mandatory and always-on at `info` level; no configuration
|
||||
option to disable it)
|
||||
option to disable it)
|
||||
- **OQ-14**: Should rate limiter eviction interval and max age be configurable?
|
||||
(see [open-questions.md](open-questions.md))
|
||||
@@ -44,6 +44,9 @@ details.
|
||||
(SAN certificate) deployment models (ADR-019)
|
||||
- TLS termination with ACME (Let's Encrypt) and manual certificate management
|
||||
- Cipher suite restriction matching nginx scope (ECDHE-AES-GCM + TLS 1.3)
|
||||
- HTTP/2 support on the client-facing side (between client and proxy),
|
||||
with ALPN-based protocol detection (ADR-023). Upstream connections
|
||||
remain HTTP/1.1.
|
||||
- HTTP → HTTPS redirect
|
||||
- Host-based routing to multiple upstream services
|
||||
- Reverse proxy to Gitea at `127.0.0.1:3000` (git.alk.dev)
|
||||
@@ -55,7 +58,7 @@ details.
|
||||
- Configurable bind addresses (must be explicit, no `0.0.0.0`)
|
||||
- Local health check endpoint on separate port (default: 9900, localhost only)
|
||||
- Unix domain socket admin API for config reload with feedback
|
||||
- Graceful shutdown (SIGTERM handling)
|
||||
- Graceful shutdown (SIGTERM handling with in-flight request drain)
|
||||
- Systemd unit file
|
||||
- Dual licensing: MIT OR Apache-2.0
|
||||
|
||||
@@ -70,8 +73,10 @@ details.
|
||||
|
||||
### Out of Scope
|
||||
|
||||
- HTTP/2 or HTTP/3 proxying (services that need these run their own native
|
||||
Rust servers — e.g., `api.alk.dev` runs its own HTTP/2+ server)
|
||||
- HTTP/2 or HTTP/3 **proxying to upstreams** — the proxy communicates with
|
||||
upstreams over HTTP/1.1 (or HTTPS/1.1). HTTP/2 **from clients** is supported
|
||||
(see ADR-023). Services that need HTTP/2+ to their backends can handle
|
||||
termination themselves.
|
||||
- Load balancing or round-robin upstream selection
|
||||
- WebSocket proxying (can be added later if needed)
|
||||
- Static file serving
|
||||
@@ -128,7 +133,8 @@ but all routers share `Arc<ArcSwap<DynamicConfig>>` and
|
||||
`Arc<Mutex<HashMap<IpAddr, TokenBucket>>>` via axum State. Site routing is
|
||||
global: the `Host` header is matched against a single routing table collected
|
||||
from all listeners' site definitions. Hostnames must be unique across all
|
||||
listeners — see C1 resolution in the architecture review.
|
||||
listeners. Hostnames must be unique across all listeners — see Security & Bug
|
||||
Review #003, finding C1, resolved by ADR-025.
|
||||
|
||||
In container deployments (ADR-020), the proxy runs in a minimal container with
|
||||
`0.0.0.0` bind address and Docker port publishing. Upstream addresses use Docker
|
||||
@@ -143,11 +149,14 @@ loopback, LAN, and tunnel endpoints for multi-host deployments.
|
||||
|-------|---------|---------|-------|
|
||||
| `axum` | 0.8 | HTTP framework | Routing, middleware, extractors |
|
||||
| `tokio` | 1 (full) | Async runtime | Multi-threaded runtime |
|
||||
| `hyper` | 1 | HTTP protocol | Used via axum, and directly for proxy `Client` |
|
||||
| `hyper` | 1 | HTTP protocol | Used via axum, and directly for HTTP/2 server builder |
|
||||
| `hyper-util` | 0.1 | Hyper utilities | Client builder, TokioExecutor, auto::Builder |
|
||||
| `tower` | 0.5 | Middleware ecosystem | Service trait, layers |
|
||||
| `rustls` | 0.23 | TLS implementation | `aws_lc_rs` crypto provider |
|
||||
| `tokio-rustls` | 0.26 | Async TLS I/O | Wraps TCP with TLS |
|
||||
| `rustls-acme` | 0.12 | ACME client | Let's Encrypt auto-provisioning and renewal |
|
||||
| `hyper-rustls` | 0.27 | HTTPS client | Upstream HTTPS connections with rustls TLS |
|
||||
| `rustls-native-certs` | 0.8 | Native cert loading | System root certificates for upstream HTTPS validation |
|
||||
|
||||
### Supporting
|
||||
|
||||
@@ -206,6 +215,12 @@ All design decisions are documented as ADRs in [decisions/](decisions/).
|
||||
| [019](decisions/019-multi-config-listeners.md) | Multi-config listeners | `[[listeners]]` supporting both dedicated-IP and shared-IP deployment models |
|
||||
| [020](decisions/020-container-deployment.md) | Container deployment model | Defense-in-depth via container isolation; file-primary logging; flexible upstream addressing |
|
||||
| [021](decisions/021-x-forwarded-for-edge-proxy.md) | X-Forwarded-For edge proxy model | Replace, don't append — proxy is the edge, no trusted upstream proxies |
|
||||
| [022](decisions/022-health-check-scope.md) | Health check scope — local port and admin socket only | No `/health` route on main listener; health check is port 9900/admin socket only |
|
||||
| [023](decisions/023-http2-client-facing.md) | HTTP/2 client-facing support | ALPN-based protocol detection; HTTP/2 to clients, HTTP/1.1 to upstreams |
|
||||
| [024](decisions/024-ansi-disabled-logging.md) | ANSI-disabled logging | All log output uses `with_ansi(false)` for fail2ban and Docker compatibility |
|
||||
| [025](decisions/025-rate-limiter-ip-source.md) | Rate limiter IP source | ConnectInfo only, never client-supplied X-Forwarded-For |
|
||||
| [026](decisions/026-connector-timeout-ceiling.md) | Connector timeout ceiling | 30s ceiling on connector, per-site timeout via tokio::time::timeout |
|
||||
| [027](decisions/027-admin-socket-resource-limits.md) | Admin socket resource limits | 5s read timeout, 4096 byte line length limit |
|
||||
|
||||
## Open Questions
|
||||
|
||||
@@ -216,4 +231,6 @@ questions affecting this document have been resolved:
|
||||
- ~~**OQ-03**: Should the health check endpoint be on a separate port?~~ (resolved — ADR-013)
|
||||
- ~~**OQ-05**: Should the proxy bind to multiple addresses?~~ (resolved — single `bind_addr` per listener)
|
||||
- ~~**OQ-07**: Should per-site TLS overrides be supported for mixed ACME/manual domains?~~ (resolved — ADR-019: `[[listeners]]` with per-listener TLS config)
|
||||
- ~~**OQ-08**: Should `/health` use a less common path?~~ (resolved — ADR-022: no `/health` route on main listener; health check is port 9900/admin socket only)
|
||||
- ~~**OQ-08**: Should `/health` use a less common path?~~ (resolved — ADR-022: no `/health` route on main listener; health check is port 9900/admin socket only)
|
||||
- **OQ-13**: Should `acme_contact` support multiple email addresses? (see [open-questions.md](open-questions.md))
|
||||
- **OQ-14**: Should rate limiter eviction interval and max age be configurable? (see [open-questions.md](open-questions.md))
|
||||
@@ -21,7 +21,16 @@ general-purpose proxy library (ADR-002, ADR-010).
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Incoming HTTPS request
|
||||
Incoming HTTPS request (HTTP/1.1 or HTTP/2)
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────┐
|
||||
│ TLS Listener │
|
||||
│ ALPN protocol detection: │
|
||||
│ - h2 → hyper http2::Builder │
|
||||
│ - http/1.1 (or none) → auto::Builder │
|
||||
│ ConnectInfo<SocketAddr> from peer_addr │
|
||||
└───────┬──────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
@@ -29,14 +38,15 @@ Incoming HTTPS request
|
||||
│ (Host-based) │
|
||||
│ │
|
||||
│ match Host │
|
||||
│ header on │
|
||||
│ incoming req │
|
||||
│ header or │
|
||||
│ URI :authority │
|
||||
│ on incoming req │
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────┐
|
||||
│ Rate Limiting │ ← tower middleware layer
|
||||
│ Middleware │
|
||||
│ Middleware │ ← IP from ConnectInfo only (ADR-025)
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
@@ -45,9 +55,9 @@ Incoming HTTPS request
|
||||
│ Injection │
|
||||
│ │
|
||||
│ X-Real-IP │ ← connect_info remote_addr
|
||||
│ X-Forwarded-For │ ← append to existing or set
|
||||
│ X-Forwarded-Proto │ ← "https" (or "http" on port 80)
|
||||
│ Host │ ← original host header (already set)
|
||||
│ X-Forwarded-For │ ← replace (edge proxy model)
|
||||
│ X-Forwarded-Proto │ ← "https" (always, on TLS listener)
|
||||
│ Host │ ← original host (already set)
|
||||
└───────┬─────────┘
|
||||
│
|
||||
▼
|
||||
@@ -66,6 +76,7 @@ Incoming HTTPS request
|
||||
│ original req │
|
||||
│ 2. Forward req │
|
||||
│ to upstream │
|
||||
│ (HTTP/1.1) │
|
||||
│ 3. Stream │
|
||||
│ response back │
|
||||
└─────────────────┘
|
||||
@@ -75,19 +86,27 @@ Incoming HTTPS request
|
||||
|
||||
### 1. Host-Based Routing
|
||||
|
||||
The axum router uses a `Host` extractor to match incoming requests to site
|
||||
definitions from `DynamicConfig`. Sites are defined per-listener in the TOML
|
||||
configuration for organizational purposes, but at runtime they are collected
|
||||
into a single global routing table. The proxy looks up the `Host` header in
|
||||
this global table and either proxies to the upstream or returns 404.
|
||||
The axum router matches incoming requests to site definitions from
|
||||
`DynamicConfig`. Sites are defined per-listener in the TOML configuration for
|
||||
organizational purposes, but at runtime they are collected into a single global
|
||||
routing table. The proxy looks up the host in this global table and either
|
||||
proxies to the upstream or returns 404.
|
||||
|
||||
Host matching is **case-insensitive** per RFC 7230 §2.7.3. The `Host` header
|
||||
is normalized to lowercase before matching. Site `host` values in
|
||||
configuration are normalized to lowercase during validation.
|
||||
Host matching is **case-insensitive** per RFC 7230 §2.7.3. The host is
|
||||
normalized to lowercase before matching. Site `host` values in configuration are
|
||||
normalized to lowercase during validation.
|
||||
|
||||
The `Host` header port component (e.g., `git.alk.dev:443`) is stripped before
|
||||
matching. Site `host` values must not include ports.
|
||||
|
||||
**HTTP/2 host resolution**: In HTTP/2, the host is conveyed via the
|
||||
`:authority` pseudo-header rather than the `Host` header. Hyper represents this
|
||||
as the URI host. The proxy handler resolves the host by first checking the
|
||||
`Host` header, then falling back to `req.uri().host()`. This correctly handles
|
||||
both HTTP/1.1 (which always has a `Host` header) and HTTP/2 (which uses
|
||||
`:authority`/URI host). If neither is present, the proxy returns 400 Bad
|
||||
Request. See ADR-023.
|
||||
|
||||
The proxy does not filter or restrict paths. All paths and query strings on a
|
||||
known host are forwarded to the upstream without modification.
|
||||
|
||||
@@ -97,7 +116,21 @@ port (default: 9900, bound to `127.0.0.1` only) and the admin socket's `status`
|
||||
command — not by intercepting traffic on the public-facing proxy. See ADR-013
|
||||
and ADR-022.
|
||||
|
||||
### 2. Proxy Header Injection
|
||||
### 2. Rate Limiter IP Source
|
||||
|
||||
The rate limiting middleware runs **before** the proxy handler. At that point,
|
||||
no proxy headers have been injected — any `X-Forwarded-For` header present is
|
||||
from the client and is untrusted. The rate limiter must use
|
||||
`ConnectInfo<SocketAddr>` as the **sole** source of client IP addresses.
|
||||
Client-supplied `X-Forwarded-For` headers must not be consulted for rate
|
||||
limiting. See ADR-025.
|
||||
|
||||
`ConnectInfo<SocketAddr>` is always present because each listener populates it
|
||||
via `into_make_service_with_connect_info::<SocketAddr>()`. If `ConnectInfo`
|
||||
is absent, the request must be rejected rather than falling back to an
|
||||
untrusted header.
|
||||
|
||||
### 3. Proxy Header Injection
|
||||
|
||||
Headers are injected before forwarding. The proxy is an **edge proxy** — it
|
||||
sits directly in front of the internet with no trusted proxies upstream. This
|
||||
@@ -116,16 +149,21 @@ extracting `TcpStream::peer_addr()` before wrapping the connection in
|
||||
`TlsStream`. Each listener provides this information to its axum Router via
|
||||
`axum::ServiceExt::into_make_service_with_connect_info::<SocketAddr>()`.
|
||||
|
||||
### 3. Request Forwarding
|
||||
### 4. Request Forwarding
|
||||
|
||||
The proxy handler constructs a new request to the upstream:
|
||||
|
||||
1. Build the upstream URI using the site's `upstream_scheme` and `upstream`
|
||||
address, preserving the original path and query string
|
||||
address, preserving the original path and query string. **If URI
|
||||
construction fails** (e.g., the resulting URI is malformed), the proxy must
|
||||
return 502 Bad Gateway and log the error at `warn` level. The proxy must
|
||||
never silently drop parts of the URI (such as the query string) — a
|
||||
malformed upstream URI is an error, not a recoverable condition.
|
||||
2. Copy the request method, headers, and body from the original
|
||||
3. Inject proxy headers (X-Real-IP, X-Forwarded-For, X-Forwarded-Proto)
|
||||
4. Send the request via a shared hyper Client instance
|
||||
5. Stream the response back to the client (chunk-by-chunk, not buffered)
|
||||
4. Remove hop-by-hop headers (Connection, Keep-Alive, Transfer-Encoding, etc.)
|
||||
5. Send the request via a shared hyper Client instance
|
||||
6. Stream the response back to the client (chunk-by-chunk, not buffered)
|
||||
|
||||
If the client disconnects while the upstream is still sending, the upstream
|
||||
connection is closed and the event is logged at `debug` level. If the
|
||||
@@ -135,19 +173,30 @@ The proxy handler constructs a new request to the upstream:
|
||||
The hyper Client is created once at startup and shared via axum's `State`. It
|
||||
must be configured with (see ADR-017 for rationale):
|
||||
- Connection pooling (hyper default behavior)
|
||||
- HTTP/1.1 only for upstream connections (HTTP/2 proxying is out of scope)
|
||||
- HTTP/1.1 only for upstream connections (HTTP/2 proxying to upstreams is out
|
||||
of scope; see ADR-023 for the distinction between client-facing HTTP/2 and
|
||||
upstream HTTP/2)
|
||||
- No redirect following (proxies should not follow redirects)
|
||||
- Separate connect timeout and request timeout (see ADR-015, ADR-017)
|
||||
|
||||
Two client instances are created at startup:
|
||||
- **HTTP client**: For upstream connections using `http://` scheme
|
||||
- **HTTPS client**: For upstream connections using `https://` scheme (using
|
||||
`hyper-rustls` with system native TLS root certificates for certificate
|
||||
validation)
|
||||
|
||||
Per-site timeout overrides are available via `upstream_connect_timeout_secs`
|
||||
and `upstream_request_timeout_secs` in `SiteConfig` (see ADR-015). When not
|
||||
specified, defaults of 5s connect and 60s request are used.
|
||||
specified, defaults of 5s connect and 60s request are used. Both timeouts are
|
||||
enforced using `tokio::time::timeout`, with the connect timeout nested inside
|
||||
the request timeout to ensure the overall deadline is respected.
|
||||
|
||||
### 4. Header Handling
|
||||
### 5. Header Handling
|
||||
|
||||
The proxy must handle request and response headers correctly to avoid security
|
||||
issues and protocol violations.
|
||||
|
||||
**Headers removed before forwarding (hop-by-hop headers per RFC 2616 §13.5.1):**
|
||||
**Headers removed before forwarding (hop-by-hop headers per RFC 7230 §6.1):**
|
||||
|
||||
- `Connection`
|
||||
- `Keep-Alive`
|
||||
@@ -162,6 +211,12 @@ These headers are connection-specific and must not be forwarded to the
|
||||
upstream. Removing `Proxy-Authorization` and `Proxy-Authenticate` prevents
|
||||
credential leakage.
|
||||
|
||||
**Response headers removed:**
|
||||
|
||||
- `Server`: The upstream's `Server` header is intentionally removed as a
|
||||
defense-in-depth measure. The proxy does not add its own `Server` header
|
||||
either. This hides upstream server identity from clients.
|
||||
|
||||
**Headers added or modified:**
|
||||
|
||||
See the Proxy Header Injection section above for the full list of proxy headers
|
||||
@@ -174,12 +229,13 @@ See the Proxy Header Injection section above for the full list of proxy headers
|
||||
|
||||
**Response headers:**
|
||||
|
||||
Upstream response headers are forwarded as-is to the client, with the following
|
||||
Upstream response headers are forwarded to the client with the following
|
||||
exceptions:
|
||||
- Hop-by-hop headers listed above are removed
|
||||
- The `Server` header is removed (defense-in-depth: hiding upstream identity)
|
||||
- The proxy does not add a `Server` header to responses
|
||||
|
||||
### 5. Error Handling
|
||||
### 6. Error Handling
|
||||
|
||||
All error responses use plain text bodies with no proxy version or identity
|
||||
information. No upstream error details are included. Response format:
|
||||
@@ -189,15 +245,17 @@ information. No upstream error details are included. Response format:
|
||||
|
||||
| Upstream Condition | Response | Body | Notes |
|
||||
|-------------------|----------|------|-------|
|
||||
| Upstream reachable | Stream response as-is | (upstream body) | Headers, status, body all forwarded |
|
||||
| Upstream reachable | Stream response as-is | (upstream body) | Headers, status, body all forwarded (minus hop-by-hop and Server headers) |
|
||||
| Upstream unreachable | 502 Bad Gateway | `Bad Gateway` | Logged at `warn` level |
|
||||
| Upstream timeout | 504 Gateway Timeout | `Gateway Timeout` | Logged at `warn` level |
|
||||
| Upstream connect timeout | 504 Gateway Timeout | `Gateway Timeout` | Connect phase timed out; logged at `warn` level |
|
||||
| Upstream request timeout | 504 Gateway Timeout | `Gateway Timeout` | Full request timed out; logged at `warn` level |
|
||||
| Upstream TLS validation failure | 502 Bad Gateway | `Bad Gateway` | Upstream HTTPS cert validation failed |
|
||||
| Request body too large | 413 Payload Too Large | `Payload Too Large` | From `DefaultBodyLimit` middleware |
|
||||
| Rate limit exceeded | 429 Too Many Requests | `Too Many Requests` | Logged at `info` level |
|
||||
| Unknown Host header | 404 Not Found | `Not Found` | No matching site definition |
|
||||
| Missing Host header | 400 Bad Request | `Bad Request` | Required for routing |
|
||||
| Missing Host header (and no URI host) | 400 Bad Request | `Bad Request` | Required for routing; HTTP/2 clients use `:authority` |
|
||||
|
||||
### 6. HTTP → HTTPS Redirect
|
||||
### 7. HTTP → HTTPS Redirect
|
||||
|
||||
A separate HTTP listener on port 80 (per listener) handles redirect. It reads
|
||||
the `Host` header from the incoming request and returns a 301 Permanent Redirect
|
||||
@@ -219,18 +277,34 @@ Each listener has its own HTTP redirect on its own bind address.
|
||||
## Upstream Connection
|
||||
|
||||
The upstream connection scheme defaults to `http://` since the proxy and backend
|
||||
services typically run on the same host (e.g., `127.0.0.1:3000`). The
|
||||
`upstream_scheme` field in each site's configuration allows specifying `https://`
|
||||
for upstreams that require TLS (e.g., separate hosts or secure internal services).
|
||||
services typically run on the same host (e.g., `127.0.0.1:3000`) or the same
|
||||
Docker network (e.g., `gitea:3000`). The `upstream_scheme` field in each site's
|
||||
configuration allows specifying `https://` for upstreams that require TLS
|
||||
(e.g., separate hosts or secure internal services).
|
||||
|
||||
For the initial deployment, upstream connections use plain HTTP (e.g.,
|
||||
`git.alk.dev` → `127.0.0.1:3000`, `alk.dev` → `127.0.0.1:8080`) since TLS
|
||||
between the proxy and backend services on loopback is unnecessary.
|
||||
`git.alk.dev` → `gitea:3000`, `alk.dev` → `app:8080`) since TLS between the
|
||||
proxy and backend services on the same Docker network or loopback is
|
||||
unnecessary.
|
||||
|
||||
When `upstream_scheme` is `"https"`, the proxy validates the upstream's TLS
|
||||
certificate using the system's native TLS root certificates (via `rustls` root
|
||||
cert store). Certificate validation failures result in a 502 Bad Gateway
|
||||
response. No certificate pinning or custom CA support is provided in Phase 1.
|
||||
cert store loaded by `rustls-native-certs`). Certificate validation failures
|
||||
result in a 502 Bad Gateway response. No certificate pinning or custom CA
|
||||
support is provided in Phase 1.
|
||||
|
||||
Two shared hyper Client instances handle upstream connections:
|
||||
- **HTTP client** (`Client<HttpConnector, Body>`): For `http://` upstreams
|
||||
- **HTTPS client** (`Client<HttpsConnector<HttpConnector>, Body>`): For
|
||||
`https://` upstreams, using `hyper-rustls` with system native certificates
|
||||
|
||||
Both clients use a shared `HttpConnector` with a connect timeout ceiling
|
||||
(30 seconds) set via `HttpConnector::set_connect_timeout()`. This ceiling
|
||||
ensures TCP connections cannot hang indefinitely even if the per-site
|
||||
`tokio::time::timeout` wrapper fails. The per-site connect timeout (default
|
||||
5s) is enforced by `tokio::time::timeout`, which fires at the correct
|
||||
per-site threshold. The connector ceiling is a safety backstop, not the
|
||||
primary enforcement mechanism. See ADR-026.
|
||||
|
||||
## Body Size Limit
|
||||
|
||||
@@ -253,14 +327,22 @@ All design decisions are documented as ADRs in [decisions/](decisions/).
|
||||
| [017](decisions/017-upstream-connection-defaults.md) | Upstream connection defaults | HTTP/1.1, no redirects, connection pooling |
|
||||
| [018](decisions/018-body-size-limit.md) | Request body size limit | 100 MB default matching nginx, Gitea push compatibility |
|
||||
| [021](decisions/021-x-forwarded-for-edge-proxy.md) | X-Forwarded-For edge proxy model | Replace, don't append — proxy is the edge, no trusted upstream proxies |
|
||||
| [023](decisions/023-http2-client-facing.md) | HTTP/2 client-facing support | ALPN-based protocol detection; HTTP/2 to clients, HTTP/1.1 to upstreams |
|
||||
| [025](decisions/025-rate-limiter-ip-source.md) | Rate limiter IP source | ConnectInfo only, never client-supplied X-Forwarded-For |
|
||||
| [026](decisions/026-connector-timeout-ceiling.md) | Connector timeout ceiling | 30s ceiling on connector, per-site timeout via tokio::time::timeout |
|
||||
|
||||
## Open Questions
|
||||
|
||||
Open questions are tracked in [open-questions.md](open-questions.md). All
|
||||
questions affecting this document have been resolved:
|
||||
Open questions are tracked in [open-questions.md](open-questions.md). Key
|
||||
questions affecting this document:
|
||||
|
||||
- ~~**OQ-06**: Should upstream timeouts be configurable per-site?~~ (resolved —
|
||||
ADR-015: per-site timeout overrides with defaults)
|
||||
- ~~**OQ-08**: Should the `/health` path use a less common endpoint to avoid
|
||||
upstream collision?~~ (resolved — ADR-022: no `/health` route on the main
|
||||
listener; health checking is via port 9900 and admin socket only)
|
||||
listener; health checking is via port 9900 and admin socket only)
|
||||
- ~~**OQ-09**: How should `upstream_connect_timeout_secs` be enforced?~~
|
||||
(resolved — ADR-026: 30s connector ceiling, per-site timeout via
|
||||
`tokio::time::timeout`)
|
||||
- **OQ-13**: Should `acme_contact` support multiple email addresses? (see
|
||||
[open-questions.md](open-questions.md))
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
status: draft
|
||||
last_updated: 2026-06-11
|
||||
status: reviewed
|
||||
last_updated: 2026-06-12
|
||||
---
|
||||
|
||||
# TLS Termination
|
||||
@@ -175,15 +175,33 @@ maps SNI hostnames to certificate/key pairs loaded from disk.
|
||||
For ACME mode, the `ServerConfig` is built with `with_cert_resolver()`, passing
|
||||
the `ResolvesServerCertAcme` resolver. The ACME configuration includes the
|
||||
domains listed in that listener's `acme_domains`, and the resolver manages the
|
||||
certificate. The ACME TLS-ALPN-01 protocol identifier (`acme-tls/1`) must be
|
||||
registered in the `alpn_protocols` list so the server can respond to
|
||||
TLS-ALPN-01 challenges.
|
||||
certificate.
|
||||
|
||||
The TLS `ServerConfig` advertises ALPN protocols to enable HTTP/2 negotiation.
|
||||
The ALPN configuration differs by TLS mode:
|
||||
|
||||
- **ACME mode**: `h2`, `http/1.1`, and `acme-tls/1`. The `acme-tls/1` entry is
|
||||
required for TLS-ALPN-01 challenge verification during certificate provisioning.
|
||||
- **Manual mode** (single-cert and multi-domain/SNI): `h2` and `http/1.1` only.
|
||||
The `acme-tls/1` entry is not included because manual mode does not use ACME
|
||||
challenges.
|
||||
|
||||
After the TLS handshake, the proxy inspects the negotiated ALPN protocol to
|
||||
select the appropriate HTTP server: `h2` triggers
|
||||
`hyper::server::conn::http2::Builder`, while `http/1.1` (or no ALPN) triggers
|
||||
`hyper_util::server::conn::auto::Builder`. See ADR-023 for details.
|
||||
|
||||
Both modes use the `aws_lc_rs` crypto provider with safe default protocol
|
||||
versions (TLS 1.2 and TLS 1.3).
|
||||
|
||||
## SNI-Based Certificate Selection
|
||||
|
||||
After the TLS handshake, the proxy inspects the negotiated ALPN protocol to
|
||||
determine whether to serve the connection as HTTP/2 or HTTP/1.1. If the client
|
||||
negotiated `h2` via ALPN, the proxy uses `hyper::server::conn::http2::Builder`;
|
||||
otherwise, it uses `hyper_util::server::conn::auto::Builder` with HTTP/1.1
|
||||
and upgrade support. See ADR-023 for details.
|
||||
|
||||
### Dedicated-IP Single-Domain (Multi-Config)
|
||||
|
||||
In the dedicated-IP model, each listener binds to its own IP address and serves
|
||||
@@ -305,6 +323,7 @@ All design decisions are documented as ADRs in [decisions/](decisions/).
|
||||
| [011](decisions/011-multi-domain-tls.md) | Multi-domain TLS config | Single SAN certificate covering all domains via rustls-acme |
|
||||
| [012](decisions/012-cipher-suite-restriction.md) | Restrict cipher suites | Match nginx scope: four ECDHE-AES-GCM suites for TLS 1.2, all TLS 1.3 suites |
|
||||
| [019](decisions/019-multi-config-listeners.md) | Multi-config listeners | `[[listeners]]` supporting both dedicated-IP and shared-IP deployment models |
|
||||
| [023](decisions/023-http2-client-facing.md) | HTTP/2 client-facing support | ALPN-based protocol detection; `h2` and `http/1.1` advertised |
|
||||
|
||||
## Open Questions
|
||||
|
||||
|
||||
705
docs/reviews/003-security-and-bug-review.md
Normal file
705
docs/reviews/003-security-and-bug-review.md
Normal file
@@ -0,0 +1,705 @@
|
||||
---
|
||||
status: draft
|
||||
last_updated: 2026-06-12
|
||||
reviewed_code:
|
||||
- src/main.rs
|
||||
- src/server.rs
|
||||
- src/cli.rs
|
||||
- src/lib.rs
|
||||
- src/shutdown.rs
|
||||
- src/health.rs
|
||||
- src/utils.rs
|
||||
- src/config/static_config.rs
|
||||
- src/config/dynamic_config.rs
|
||||
- src/config/validation.rs
|
||||
- src/config/test_fixtures.rs
|
||||
- src/config/mod.rs
|
||||
- src/proxy/handler.rs
|
||||
- src/proxy/headers.rs
|
||||
- src/proxy/body_limit.rs
|
||||
- src/proxy/error.rs
|
||||
- src/proxy/mod.rs
|
||||
- src/rate_limit/mod.rs
|
||||
- src/rate_limit/bucket.rs
|
||||
- src/admin/socket.rs
|
||||
- src/admin/mod.rs
|
||||
- src/logging/mod.rs
|
||||
- src/logging/format.rs
|
||||
- src/tls/acceptor.rs
|
||||
- src/tls/acme.rs
|
||||
- src/tls/config.rs
|
||||
- src/tls/redirect.rs
|
||||
- src/tls/mod.rs
|
||||
- tests/integration_test.rs
|
||||
- tests/helpers/http_test_helper.rs
|
||||
- tests/helpers/tls_test_helper.rs
|
||||
- tests/helpers/mod.rs
|
||||
reviewer: code-reviewer
|
||||
---
|
||||
|
||||
# Security & Bug Review #003
|
||||
|
||||
## Purpose
|
||||
|
||||
Third review of the codebase, focused on security vulnerabilities, logic bugs,
|
||||
and correctness issues that survived the first two reviews. Also flags dead code,
|
||||
code smells, and test gaps.
|
||||
|
||||
## Severity Definitions
|
||||
|
||||
| Severity | Meaning |
|
||||
|----------|---------|
|
||||
| **Critical** | Will cause incorrect behavior or security issues in production |
|
||||
| **Warning** | Could cause issues under specific conditions or represents a missed edge case |
|
||||
| **Suggestion** | Code quality, style, or minor improvement opportunity |
|
||||
|
||||
---
|
||||
|
||||
## Critical Findings
|
||||
|
||||
### C1. Rate Limiter Uses Client-Supplied X-Forwarded-For for IP Identification
|
||||
|
||||
**File**: `src/rate_limit/mod.rs:66-76`
|
||||
|
||||
**Problem**: The `rate_limit_middleware` extracts the client IP by checking the
|
||||
`X-Forwarded-For` header **first**, then falling back to `ConnectInfo`:
|
||||
|
||||
```rust
|
||||
let client_ip = req
|
||||
.headers()
|
||||
.get("x-forwarded-for") // <-- checked FIRST
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.split(',').next())
|
||||
.and_then(|v| v.trim().parse::<IpAddr>().ok())
|
||||
.or_else(|| {
|
||||
req.extensions()
|
||||
.get::<axum::extract::ConnectInfo<std::net::SocketAddr>>()
|
||||
.map(|ci| ci.ip())
|
||||
});
|
||||
```
|
||||
|
||||
The middleware runs **before** the proxy handler. At that point,
|
||||
`inject_proxy_headers` has not yet run, so `X-Forwarded-For` is whatever the
|
||||
**client** sent — completely untrusted. This creates two attack vectors:
|
||||
|
||||
1. **Rate limit bypass**: Attacker sends each request with
|
||||
`X-Forwarded-For: <random IP>`. Every request appears to come from a
|
||||
different IP, evading the per-IP token bucket entirely.
|
||||
2. **Denial-of-service via IP spoofing**: Attacker sends requests with
|
||||
`X-Forwarded-For: <victim IP>`. The victim's IP gets rate-limited and their
|
||||
legitimate requests receive 429s.
|
||||
|
||||
The proxy is the **edge** — it terminates TLS directly from the internet. The
|
||||
only trustworthy source of client IP is `ConnectInfo<SocketAddr>`, which
|
||||
`ConnectInfoService` sets from the TCP peer address before TLS handshake
|
||||
(`src/server.rs:171-173`).
|
||||
|
||||
**Solution**: Swap the priority: check `ConnectInfo` first, and only fall back
|
||||
to `X-Forwarded-For` if `ConnectInfo` is absent (which shouldn't happen in the
|
||||
current deployment). The `X-Forwarded-For` header from the client is untrusted
|
||||
at the edge.
|
||||
|
||||
```rust
|
||||
let client_ip = req
|
||||
.extensions()
|
||||
.get::<axum::extract::ConnectInfo<std::net::SocketAddr>>()
|
||||
.map(|ci| ci.ip())
|
||||
.or_else(|| {
|
||||
req.headers()
|
||||
.get("x-forwarded-for")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.split(',').next())
|
||||
.and_then(|v| v.trim().parse::<IpAddr>().ok())
|
||||
});
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### C2. InFlightCounter Never Increments — Drain Logic Is Completely Broken
|
||||
|
||||
**File**: `src/server.rs:17-46,73-74`
|
||||
|
||||
**Problem**: `InFlightCounter` has an `increment()` method that is **never
|
||||
called** anywhere in the codebase. The `InFlightGuard` only calls `decrement()`
|
||||
on drop:
|
||||
|
||||
```rust
|
||||
struct InFlightGuard(Arc<InFlightCounter>);
|
||||
|
||||
impl Drop for InFlightGuard {
|
||||
fn drop(&mut self) {
|
||||
self.0.decrement(); // only decrements!
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The guard is created in `serve_https_listener`:
|
||||
|
||||
```rust
|
||||
tokio::spawn(async move {
|
||||
let _guard = InFlightGuard(in_flight.clone());
|
||||
// ...
|
||||
});
|
||||
```
|
||||
|
||||
Since `increment()` is never called, `count` stays at 0. When the first guard
|
||||
drops, `fetch_sub(1)` on an `AtomicUsize` with value 0 wraps to `usize::MAX`.
|
||||
`is_zero()` checks `count == 0`, which will never be true again. The drain loop
|
||||
in `main.rs:250` will always time out and report `usize::MAX` remaining
|
||||
connections.
|
||||
|
||||
**Solution**: Call `in_flight.increment()` before spawning the connection task:
|
||||
|
||||
```rust
|
||||
in_flight.increment();
|
||||
tokio::spawn(async move {
|
||||
let _guard = InFlightGuard(in_flight.clone());
|
||||
// ...
|
||||
});
|
||||
```
|
||||
|
||||
Or fold the increment into `InFlightGuard::new()`:
|
||||
|
||||
```rust
|
||||
impl InFlightGuard {
|
||||
fn new(counter: Arc<InFlightCounter>) -> Self {
|
||||
counter.increment();
|
||||
Self(counter)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### C3. Per-Site `upstream_connect_timeout_secs` Silently Capped at 5 Seconds
|
||||
|
||||
**File**: `src/proxy/handler.rs:86-98,218-224,226-229`
|
||||
|
||||
**Problem**: The proxy handler applies a per-site connect timeout via
|
||||
`tokio::time::timeout`:
|
||||
|
||||
```rust
|
||||
let connect_timeout = Duration::from_secs(site.upstream_connect_timeout_secs);
|
||||
// ...
|
||||
tokio::time::timeout(connect_timeout, state.http_client.request(upstream_req)).await
|
||||
```
|
||||
|
||||
But the HTTP connector inside the client has its own hardcoded connect timeout:
|
||||
|
||||
```rust
|
||||
pub fn create_http_client() -> Client<HttpConnector, Body> {
|
||||
let mut connector = HttpConnector::new();
|
||||
connector.set_connect_timeout(Some(Duration::from_secs(5))); // hardcoded
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
The connector's internal timeout takes precedence — if
|
||||
`upstream_connect_timeout_secs` is set to 10s, the connector still times out at
|
||||
5s. The `tokio::time::timeout` wrapper can only make the effective timeout
|
||||
**shorter**, not longer. Per-site connect timeout values > 5s are silently
|
||||
ignored.
|
||||
|
||||
**Solution**: Set the connector's connect timeout to match (or exceed) the
|
||||
per-site value, or remove the hardcoded connector timeout and rely solely on
|
||||
the `tokio::time::timeout` wrapper. Since the client is shared across all sites,
|
||||
the simplest fix is to set the connector timeout to a high value (e.g., 30s)
|
||||
and let the per-site `tokio::time::timeout` enforce the actual limit:
|
||||
|
||||
```rust
|
||||
connector.set_connect_timeout(Some(Duration::from_secs(30)));
|
||||
```
|
||||
|
||||
Or make the connector timeout configurable per-site by creating clients lazily.
|
||||
|
||||
---
|
||||
|
||||
### C4. `init_json` Without Log File Doesn't Enable JSON Format
|
||||
|
||||
**File**: `src/logging/mod.rs:54-59`
|
||||
|
||||
**Problem**: When `format = "json"` is configured but no `log_file_path` is set,
|
||||
the `None` branch of `init_json` creates a layer **without** calling `.json()`:
|
||||
|
||||
```rust
|
||||
None => {
|
||||
let layer = tracing_subscriber::fmt::layer()
|
||||
.with_ansi(false)
|
||||
.with_filter(env_filter);
|
||||
tracing_subscriber::registry().with(layer).try_init()?;
|
||||
}
|
||||
```
|
||||
|
||||
The output is plain text, not JSON. The `format = "json"` config value is
|
||||
silently ignored. The `Some(path)` branch correctly calls `.json()` on both
|
||||
layers.
|
||||
|
||||
**Solution**: Add `.json()` to the `None` branch:
|
||||
|
||||
```rust
|
||||
None => {
|
||||
let layer = tracing_subscriber::fmt::layer()
|
||||
.json()
|
||||
.with_ansi(false)
|
||||
.with_filter(env_filter);
|
||||
tracing_subscriber::registry().with(layer).try_init()?;
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Warning Findings
|
||||
|
||||
### W1. `is_valid_upstream` Doesn't Validate the Host Part
|
||||
|
||||
**File**: `src/config/validation.rs:309-327`
|
||||
|
||||
**Problem**: `is_valid_upstream` checks that the upstream has a `host:port`
|
||||
format with a valid port number, but performs **no validation on the host
|
||||
part** beyond checking it's non-empty and doesn't start with `http://` or
|
||||
`https://`. Values like `!!!bad!!!:3000` or `@#$%:8080` pass validation.
|
||||
|
||||
**Solution**: Validate the host part is a valid DNS name or IP address. Reuse
|
||||
`is_valid_hostname` for DNS names and add an IP address parse check for the
|
||||
host portion (handling IPv6 bracket notation):
|
||||
|
||||
```rust
|
||||
fn is_valid_upstream(upstream: &str) -> bool {
|
||||
if let Some(idx) = upstream.rfind(':') {
|
||||
let host_part = &upstream[..idx];
|
||||
let port_str = &upstream[idx + 1..];
|
||||
if host_part.is_empty() { return false; }
|
||||
if upstream.starts_with("http://") || upstream.starts_with("https://") { return false; }
|
||||
let port: u16 = match port_str.parse() { Ok(p) => p, Err(_) => return false };
|
||||
if port == 0 { return false; }
|
||||
// Validate host part
|
||||
if host_part.starts_with('[') && host_part.ends_with(']') {
|
||||
let inner = &host_part[1..host_part.len()-1];
|
||||
inner.parse::<std::net::Ipv6Addr>().is_ok()
|
||||
} else {
|
||||
host_part.parse::<std::net::IpAddr>().is_ok() || is_valid_hostname(host_part)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### W2. ACME Contact Validation Only Checks `mailto:` Prefix
|
||||
|
||||
**File**: `src/config/validation.rs:149`
|
||||
|
||||
**Problem**: The validation checks `contact.starts_with("mailto:")` but doesn't
|
||||
verify there's an actual email address after the prefix. `acme_contact =
|
||||
"mailto:"` (empty email) passes validation but will fail at the Let's Encrypt
|
||||
API with a 400-level error at certificate provisioning time — after the proxy
|
||||
has already started.
|
||||
|
||||
**Solution**: Validate that the string after `mailto:` is non-empty and contains
|
||||
an `@` sign:
|
||||
|
||||
```rust
|
||||
if contact.is_empty() || !contact.starts_with("mailto:") {
|
||||
errors.push(ValidationError::AcmeContactInvalid { ... });
|
||||
} else {
|
||||
let email = &contact[7..]; // after "mailto:"
|
||||
if email.is_empty() || !email.contains('@') {
|
||||
errors.push(ValidationError::AcmeContactInvalid { ... });
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### W3. `build_upstream_uri` Silently Drops Query String on Parse Failure
|
||||
|
||||
**File**: `src/proxy/handler.rs:190-202`
|
||||
|
||||
**Problem**: If the full upstream URI (with query string) fails to parse, the
|
||||
fallback drops the query string entirely and `.unwrap()`s the result:
|
||||
|
||||
```rust
|
||||
uri_string.parse::<Uri>().unwrap_or_else(|_| {
|
||||
format!("{}://{}{}", scheme, upstream, path) // query dropped!
|
||||
.parse::<Uri>()
|
||||
.unwrap() // panics if this also fails
|
||||
})
|
||||
```
|
||||
|
||||
This silently corrupts requests — the upstream receives the wrong URL with no
|
||||
query parameters, and neither the client nor operator is notified. The
|
||||
`.unwrap()` on the fallback parse could also panic (though unlikely since
|
||||
`scheme://host:port/path` should always parse).
|
||||
|
||||
**Solution**: Log a warning and return a 502 Bad Gateway instead of silently
|
||||
dropping data:
|
||||
|
||||
```rust
|
||||
fn build_upstream_uri(scheme: &str, upstream: &str, original_uri: &Uri) -> Result<Uri, ()> {
|
||||
let path = original_uri.path();
|
||||
let query = original_uri.query().map(|q| format!("?{}", q)).unwrap_or_default();
|
||||
let uri_string = format!("{}://{}{}{}", scheme, upstream, path, query);
|
||||
uri_string.parse::<Uri>().map_err(|e| {
|
||||
warn!(error = %e, uri = %uri_string, "failed to parse upstream URI");
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### W4. Admin Socket Has No Read Timeout or Line Length Limit
|
||||
|
||||
**File**: `src/admin/socket.rs:166-210`
|
||||
|
||||
**Problem**: `handle_connection` reads one newline-terminated line with
|
||||
`reader.read_line(&mut line)` but sets no timeout and no length limit. A
|
||||
malicious client with access to the admin socket (Unix permissions permitting)
|
||||
can:
|
||||
|
||||
- Connect and send no data, holding a connection and task open indefinitely
|
||||
- Send gigabytes of data without a newline, causing unbounded memory allocation
|
||||
|
||||
**Solution**: Wrap the read in `tokio::time::timeout` and use
|
||||
`read_until` with a reasonable limit, or use `take` to cap the stream:
|
||||
|
||||
```rust
|
||||
let mut reader = BufReader::new(tokio::io::take(reader, 4096)); // 4KB limit
|
||||
let read_result = tokio::time::timeout(
|
||||
Duration::from_secs(5),
|
||||
reader.read_line(&mut line),
|
||||
).await;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### W5. `TlsMode` Wildcard Match Could Cause Listener/Acceptor Mismatch
|
||||
|
||||
**File**: `src/main.rs:170-194,209-210`
|
||||
|
||||
**Problem**: The `match tls_mode` has a wildcard `_` arm that logs a warning
|
||||
and pushes **no** acceptor. Then `bound_listeners.into_iter().zip(tls_acceptors.into_iter())`
|
||||
uses `zip`, which silently stops at the shorter iterator. If the wildcard arm
|
||||
were ever reached, some listeners would have no TLS acceptor and would be
|
||||
silently dropped with no error.
|
||||
|
||||
`setup_tls` already rejects unknown modes with `bail!`, so the wildcard is
|
||||
unreachable in practice. But it's a latent bug waiting for a future refactor.
|
||||
|
||||
**Solution**: Either remove the wildcard arm (since `TlsMode` only has two
|
||||
variants and `setup_tls` already validates), or make the mismatch explicit:
|
||||
|
||||
```rust
|
||||
if bound_listeners.len() != tls_acceptors.len() {
|
||||
anyhow::bail!("listener/acceptor count mismatch");
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### W6. `RawConfig` and `FullConfig` Are Duplicated
|
||||
|
||||
**Files**: `src/cli.rs:49-65`, `src/config/mod.rs:15-31`
|
||||
|
||||
**Problem**: `RawConfig` (used at startup in `cli.rs`) and `FullConfig` (used
|
||||
at reload in `config/mod.rs`) have identical fields and identical serde
|
||||
attributes. They exist because the initial load path manually constructs
|
||||
`StaticConfig` + `SerializableDynamicConfig`, while the reload path uses
|
||||
`FullConfig::into_static_and_dynamic()`. This duplication means any new config
|
||||
field must be added in two places.
|
||||
|
||||
**Solution**: Delete `RawConfig` and use `FullConfig` in `load_config`:
|
||||
|
||||
```rust
|
||||
let full: FullConfig = toml::from_str(&config_content)?;
|
||||
let (static_config, dynamic_config) = full.into_static_and_dynamic();
|
||||
```
|
||||
|
||||
Then `collect_sites` can also be removed since `into_static_and_dynamic`
|
||||
already collects sites from all listeners.
|
||||
|
||||
---
|
||||
|
||||
### W7. `cleanup_stale_socket` Connects to Verify Liveness — Side Effect on Other Process
|
||||
|
||||
**File**: `src/admin/socket.rs:142-160,162-164`
|
||||
|
||||
**Problem**: `is_socket_active` checks whether another process owns the socket
|
||||
by **connecting to it**:
|
||||
|
||||
```rust
|
||||
async fn is_socket_active(path: &str) -> bool {
|
||||
tokio::net::UnixStream::connect(path).await.is_ok()
|
||||
}
|
||||
```
|
||||
|
||||
If another process is listening, this creates a real connection that the other
|
||||
process will `accept()`, read nothing, and close. This is a harmless but
|
||||
unnecessary side effect on the other process.
|
||||
|
||||
**Solution**: Check for liveness by inspecting `/proc/net/unix` or by attempting
|
||||
a zero-byte `sendmsg` with `MSG_PEEK`. For Phase 1, the current approach is
|
||||
acceptable since the admin socket is a local diagnostic interface.
|
||||
|
||||
---
|
||||
|
||||
### W8. `test_health_check_disabled_when_port_zero` Test Name Is Misleading
|
||||
|
||||
**File**: `tests/integration_test.rs:82-88`
|
||||
|
||||
**Problem**: The test is named `test_health_check_disabled_when_port_zero` but
|
||||
port 0 means "OS picks a random port" — the health check listener still starts
|
||||
and binds successfully. The actual "disabled" logic is in `main.rs:94` where
|
||||
`health_check_port > 0` gates the call to `start_health_check_listener`. The
|
||||
test verifies that `start_health_check_listener(0)` works (binds to a random
|
||||
port), which is correct but the name implies it tests the disable path.
|
||||
|
||||
**Solution**: Rename to `test_health_check_binds_random_port_when_zero` or add
|
||||
a separate test that verifies the `main.rs` gating logic.
|
||||
|
||||
---
|
||||
|
||||
### W9. `test_dynamic_config_with_limit` Has Empty `routing_table`
|
||||
|
||||
**File**: `tests/integration_test.rs:618-635`
|
||||
|
||||
**Problem**: The helper constructs `DynamicConfig` directly with
|
||||
`routing_table: Default::default()` (empty HashMap), bypassing
|
||||
`DynamicConfig::from_sites()` which would normally build the routing table.
|
||||
The body limit tests only read `body.limit_bytes` so the empty table doesn't
|
||||
matter, but it's a latent trap for anyone who copies this pattern for tests
|
||||
that need routing.
|
||||
|
||||
**Solution**: Use `DynamicConfig::from_sites()` to construct the config, or at
|
||||
minimum add a comment warning that the routing table is intentionally empty.
|
||||
|
||||
---
|
||||
|
||||
### W10. `TokenBucket` Fields Are `pub` Despite Internal-Only Use
|
||||
|
||||
**File**: `src/rate_limit/bucket.rs:4-9`
|
||||
|
||||
**Problem**: All `TokenBucket` fields are `pub`:
|
||||
|
||||
```rust
|
||||
pub struct TokenBucket {
|
||||
pub tokens: f64,
|
||||
pub last_refill: Instant,
|
||||
pub rate: f64,
|
||||
pub max: u32,
|
||||
pub last_access: Instant,
|
||||
}
|
||||
```
|
||||
|
||||
Only `last_access` is read externally (by `evict_stale`). The other fields
|
||||
should be private to prevent accidental direct mutation that bypasses
|
||||
`try_consume`/`refill` logic.
|
||||
|
||||
**Solution**: Make `tokens`, `last_refill`, `rate`, and `max` private. Keep
|
||||
`last_access` pub(crate) for `evict_stale`.
|
||||
|
||||
---
|
||||
|
||||
### W11. Admin Socket Exposes `reload_mutex` for Testing
|
||||
|
||||
**File**: `src/admin/socket.rs:71-73`
|
||||
|
||||
**Problem**: `AdminSocket::reload_mutex()` is a public method that exists solely
|
||||
for the `test_reload_serialized_with_mutex` test. It exposes an internal
|
||||
synchronization primitive, and the test acquires the mutex before sending a
|
||||
reload command to verify serialization — coupling the test to implementation
|
||||
details.
|
||||
|
||||
**Solution**: Remove the method and test serialization through observable
|
||||
behavior (e.g., send two rapid reload requests and verify the final config
|
||||
state reflects the last one), or make the method `#[cfg(test)]`-gated.
|
||||
|
||||
---
|
||||
|
||||
### W12. `http_port` Type Is `u32` While `https_port` Is `u16`
|
||||
|
||||
**File**: `src/config/static_config.rs:34,36`
|
||||
|
||||
**Problem**: `http_port` is declared as `u32` but `https_port` is `u16`. Both
|
||||
represent TCP port numbers (valid range 1–65535). The type inconsistency means
|
||||
comparisons require casting (`listener.http_port == listener.https_port as u32`
|
||||
at `validation.rs:133`) and `http_port` could theoretically hold values >
|
||||
65535 that are caught by validation rather than the type system.
|
||||
|
||||
**Solution**: Change `http_port` to `u16` for consistency. Port 0 (disabled)
|
||||
fits in `u16`. Update all `as u32` casts in validation and `main.rs`.
|
||||
|
||||
---
|
||||
|
||||
## Suggestions
|
||||
|
||||
### S1. Remove Dead Code Before Release
|
||||
|
||||
**Files**: Multiple
|
||||
|
||||
The following items are defined but never called in production code:
|
||||
|
||||
| Item | File | Note |
|
||||
|------|------|------|
|
||||
| `log_rate_limit!` macro | `src/logging/format.rs:72-83` | Rate limiter uses `warn!` directly |
|
||||
| `log_config_reload!` macro | `src/logging/format.rs:97-106` | Reload uses `info!`/`warn!` directly |
|
||||
| `format_event_fields()` | `src/logging/format.rs:50-54` | Never called |
|
||||
| `ProxyError::BadGateway` (struct variant) | `src/proxy/error.rs:8` | Code uses `UpstreamConnection` instead |
|
||||
| `ProxyError::GatewayTimeout` (struct variant) | `src/proxy/error.rs:10` | Code uses `UpstreamTimeout` instead |
|
||||
| `ProxyError::PayloadTooLarge` | `src/proxy/error.rs:12` | Body limit returns tuple directly |
|
||||
| `ProxyError::NotFound` | `src/proxy/error.rs:20` | Code uses `UnknownHost` instead |
|
||||
| `ProxyError::BadRequest` | `src/proxy/error.rs:22` | Code uses `MissingHost` instead |
|
||||
| `ProxyError::UpstreamTls` | `src/proxy/error.rs:28` | Never constructed |
|
||||
| `build_multi_domain_server_config()` | `src/tls/config.rs:78-102` | Never called |
|
||||
| `SniCertResolver` | `src/tls/config.rs:104-126` | Only used by above |
|
||||
| `AcmeTlsConfig::directory_url()` | `src/tls/acme.rs:53-59` | Only used in tests |
|
||||
| `TestUpstream::url()` | `tests/helpers/http_test_helper.rs:39-41` | `#[allow(dead_code)]` |
|
||||
| `TestUpstream::upstream_addr()` | `tests/helpers/http_test_helper.rs:43-46` | `#[allow(dead_code)]` |
|
||||
|
||||
Either wire these up or remove them. Dead code increases audit surface and
|
||||
creates confusion about what's actually active.
|
||||
|
||||
---
|
||||
|
||||
### S2. Make Eviction Interval and Max Age Configurable
|
||||
|
||||
**File**: `src/main.rs:196-201`
|
||||
|
||||
**Problem**: The eviction task interval (60s) and max age (300s) are hardcoded.
|
||||
In high-traffic deployments, a shorter interval or longer max age might be
|
||||
desirable.
|
||||
|
||||
**Suggestion**: Add `rate_limit.eviction_interval_secs` and
|
||||
`rate_limit.bucket_max_age_secs` to `RateLimitConfig` with defaults of 60 and
|
||||
300. These are dynamic config fields so they can be hot-reloaded.
|
||||
|
||||
---
|
||||
|
||||
### S3. Log Root Certificate Count at Startup
|
||||
|
||||
**File**: `src/proxy/handler.rs:246-258`
|
||||
|
||||
**Problem**: `root_certs()` loads native certificates silently (only logs
|
||||
errors). If the system has zero root certificates (misconfigured CA bundle),
|
||||
all HTTPS upstream connections will fail with opaque TLS errors.
|
||||
|
||||
**Suggestion**: Log the number of loaded certificates at info level:
|
||||
|
||||
```rust
|
||||
let cert_count = result.certs.len();
|
||||
info!(certs_loaded = cert_count, errors = result.errors.len(), "loaded system root certificates");
|
||||
if cert_count == 0 {
|
||||
warn!("no system root certificates loaded — HTTPS upstream connections will fail");
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### S4. Add Read Timeout and Line Length Limit to Admin Socket
|
||||
|
||||
**File**: `src/admin/socket.rs:166-210`
|
||||
|
||||
See W4 for the problem. Even if the admin socket is only accessible via Unix
|
||||
permissions, defense-in-depth warrants basic resource limits.
|
||||
|
||||
---
|
||||
|
||||
### S5. Consolidate `RawConfig` and `FullConfig`
|
||||
|
||||
**Files**: `src/cli.rs:49-65`, `src/config/mod.rs:15-31`
|
||||
|
||||
See W6. Using a single `FullConfig` type for both startup and reload eliminates
|
||||
duplication and ensures both paths stay in sync.
|
||||
|
||||
---
|
||||
|
||||
### S6. Add `#[non_exhaustive]` to `TokenBucket` (or Make Fields Private)
|
||||
|
||||
**File**: `src/rate_limit/bucket.rs:4-9`
|
||||
|
||||
See W10. Making fields private prevents accidental direct mutation. If external
|
||||
crates need to construct `TokenBucket`, add a `new()` constructor (which
|
||||
already exists).
|
||||
|
||||
---
|
||||
|
||||
### S7. Add More Status Info to Admin Socket `status` Command
|
||||
|
||||
**File**: `src/admin/socket.rs:265-275`
|
||||
|
||||
**Suggestion**: The `status` response currently returns `uptime_secs` and
|
||||
`sites`. Consider adding:
|
||||
- `rate_limit` (requests_per_second, burst)
|
||||
- `body_limit_bytes`
|
||||
- `listeners` count
|
||||
- `in_flight_requests` (if InFlightCounter is fixed per C2)
|
||||
|
||||
This gives operators a quick health snapshot without reading logs.
|
||||
|
||||
---
|
||||
|
||||
### S8. Consider Making `upstream_connect_timeout_secs` Actually Work
|
||||
|
||||
**File**: `src/proxy/handler.rs:216-229`
|
||||
|
||||
See C3. The hardcoded 5s connector timeout caps the per-site connect timeout.
|
||||
Either remove the connector timeout or set it to a high ceiling value.
|
||||
|
||||
---
|
||||
|
||||
### S9. `acme_contact` Config Field Should Be `Vec<String>` for Multiple Contacts
|
||||
|
||||
**File**: `src/config/static_config.rs:60`
|
||||
|
||||
**Problem**: `acme_contact` is a single `String` but ACME supports multiple
|
||||
contact emails. The `AcmeTlsConfig.contact` field is already `Vec<String>` and
|
||||
the config value is wrapped in `vec![...]` at `src/tls/acceptor.rs:70`.
|
||||
|
||||
**Suggestion**: Change `acme_contact` to `Vec<String>` in `TlsConfig` for
|
||||
consistency with the ACME protocol. This is a config format change — document
|
||||
the migration.
|
||||
|
||||
---
|
||||
|
||||
### S10. Add Integration Test for Rate Limiter Using `ConnectInfo`
|
||||
|
||||
**File**: `tests/integration_test.rs:90-163`
|
||||
|
||||
**Problem**: All rate limit integration tests pass the client IP via
|
||||
`X-Forwarded-For` header. No test verifies the `ConnectInfo` extraction path,
|
||||
which is the primary path after C1 is fixed.
|
||||
|
||||
**Suggestion**: Add a test that sets `ConnectInfo` on the request extensions
|
||||
(like `make_request_with_connect_info` in `src/proxy/handler.rs` tests) and
|
||||
verifies rate limiting works without the `X-Forwarded-For` header.
|
||||
|
||||
---
|
||||
|
||||
## Summary Statistics
|
||||
|
||||
| Severity | Count | Status |
|
||||
|----------|-------|--------|
|
||||
| Critical | 4 | Must fix before production |
|
||||
| Warning | 12 | Should fix — correctness and robustness |
|
||||
| Suggestion | 10 | Consider for code quality |
|
||||
|
||||
## Recommended Fix Priority
|
||||
|
||||
1. **C1 (rate limiter X-Forwarded-For spoofing)** — Active security vulnerability.
|
||||
Any external attacker can bypass rate limiting entirely.
|
||||
2. **C2 (InFlightCounter never increments)** — Graceful shutdown drain logic is
|
||||
completely non-functional. The drain always times out.
|
||||
3. **C3 (connect timeout capped at 5s)** — Per-site timeout configuration is
|
||||
silently ignored for values > 5s.
|
||||
4. **C4 (JSON format not applied without log file)** — Format config is silently
|
||||
ignored in a common deployment mode (stdout-only JSON logging).
|
||||
5. **W1 (upstream host validation gap)** — Invalid upstream addresses pass config
|
||||
validation.
|
||||
6. **W2 (ACME contact validation gap)** — Empty email after `mailto:` passes
|
||||
validation, fails at runtime.
|
||||
7. **W3 (query string silently dropped)** — Data corruption on URI parse failure.
|
||||
8. **W4 (admin socket resource limits)** — DoS vector for local socket access.
|
||||
9. **W5 (TlsMode wildcard mismatch)** — Latent bug for future refactors.
|
||||
10. **W6 (RawConfig/FullConfig duplication)** — Maintenance burden.
|
||||
11. **Remaining W and S findings** — Fix opportunistically.
|
||||
@@ -9,9 +9,12 @@ use tokio::net::UnixListener;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::shutdown::GracefulShutdown;
|
||||
|
||||
use crate::config::ConfigReloadHandle;
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[non_exhaustive]
|
||||
pub enum AdminSocketError {
|
||||
#[error("admin socket disabled (empty path)")]
|
||||
Disabled,
|
||||
@@ -65,12 +68,16 @@ impl AdminSocket {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn reload_mutex(&self) -> Arc<Mutex<()>> {
|
||||
self.reload_mutex.clone()
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn start_admin_socket(admin_socket: Arc<AdminSocket>) -> Result<(), AdminSocketError> {
|
||||
pub async fn start_admin_socket(
|
||||
admin_socket: Arc<AdminSocket>,
|
||||
shutdown: Arc<GracefulShutdown>,
|
||||
) -> Result<(), AdminSocketError> {
|
||||
if admin_socket.socket_path.is_empty() {
|
||||
info!("admin socket disabled (empty path)");
|
||||
return Err(AdminSocketError::Disabled);
|
||||
@@ -96,19 +103,41 @@ pub async fn start_admin_socket(admin_socket: Arc<AdminSocket>) -> Result<(), Ad
|
||||
|
||||
info!("admin socket listening on {}", socket_path);
|
||||
|
||||
let mut shutdown_rx = shutdown.subscribe();
|
||||
|
||||
loop {
|
||||
match listener.accept().await {
|
||||
Ok((stream, _addr)) => {
|
||||
let admin_socket = admin_socket.clone();
|
||||
tokio::spawn(async move {
|
||||
handle_connection(stream, admin_socket).await;
|
||||
});
|
||||
tokio::select! {
|
||||
result = listener.accept() => {
|
||||
match result {
|
||||
Ok((stream, _addr)) => {
|
||||
let admin_socket = admin_socket.clone();
|
||||
tokio::spawn(async move {
|
||||
handle_connection(stream, admin_socket).await;
|
||||
});
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("failed to accept admin socket connection: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("failed to accept admin socket connection: {}", e);
|
||||
_ = shutdown_rx.changed() => {
|
||||
info!("admin socket shutting down");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cleanup_socket_file(socket_path).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn cleanup_socket_file(path: &str) {
|
||||
if Path::new(path).exists() {
|
||||
if let Err(e) = tokio::fs::remove_file(path).await {
|
||||
warn!("failed to remove admin socket file {}: {}", path, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn cleanup_stale_socket(path: &str) -> Result<(), AdminSocketError> {
|
||||
@@ -136,14 +165,20 @@ async fn is_socket_active(path: &str) -> bool {
|
||||
}
|
||||
|
||||
async fn handle_connection(stream: tokio::net::UnixStream, admin_socket: Arc<AdminSocket>) {
|
||||
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
|
||||
|
||||
let (reader, mut writer) = stream.into_split();
|
||||
let mut reader = BufReader::new(reader);
|
||||
let mut reader = BufReader::new(reader.take(4096));
|
||||
let mut line = String::new();
|
||||
|
||||
match reader.read_line(&mut line).await {
|
||||
Ok(0) | Err(_) => {
|
||||
let read_result = tokio::time::timeout(
|
||||
std::time::Duration::from_secs(5),
|
||||
reader.read_line(&mut line),
|
||||
)
|
||||
.await;
|
||||
|
||||
match read_result {
|
||||
Ok(Ok(0)) | Ok(Err(_)) => {
|
||||
let _ = writer
|
||||
.write_all(
|
||||
format!(
|
||||
@@ -159,7 +194,42 @@ async fn handle_connection(stream: tokio::net::UnixStream, admin_socket: Arc<Adm
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
_ => {}
|
||||
Err(_) => {
|
||||
tracing::debug!("admin socket connection timed out");
|
||||
let _ = writer
|
||||
.write_all(
|
||||
format!(
|
||||
"{}\n",
|
||||
serde_json::to_string(&ErrorResponse {
|
||||
status: "error",
|
||||
message: "read timeout".to_string(),
|
||||
})
|
||||
.unwrap()
|
||||
)
|
||||
.as_bytes(),
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
Ok(Ok(n)) => {
|
||||
if !line.ends_with('\n') && n > 0 {
|
||||
tracing::warn!("admin socket command exceeded 4096 byte limit");
|
||||
let _ = writer
|
||||
.write_all(
|
||||
format!(
|
||||
"{}\n",
|
||||
serde_json::to_string(&ErrorResponse {
|
||||
status: "error",
|
||||
message: "command too long".to_string(),
|
||||
})
|
||||
.unwrap()
|
||||
)
|
||||
.as_bytes(),
|
||||
)
|
||||
.await;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let command = line.trim();
|
||||
@@ -508,7 +578,7 @@ upstream = "127.0.0.1:8080"
|
||||
dir.path().join("config.toml").to_string_lossy().to_string(),
|
||||
));
|
||||
|
||||
let result = start_admin_socket(admin_socket).await;
|
||||
let result = start_admin_socket(admin_socket, Arc::new(GracefulShutdown::new(30))).await;
|
||||
assert!(matches!(result, Err(AdminSocketError::Disabled)));
|
||||
}
|
||||
|
||||
@@ -531,7 +601,7 @@ upstream = "127.0.0.1:8080"
|
||||
dir.path().join("config.toml").to_string_lossy().to_string(),
|
||||
));
|
||||
|
||||
let result = start_admin_socket(admin_socket).await;
|
||||
let result = start_admin_socket(admin_socket, Arc::new(GracefulShutdown::new(30))).await;
|
||||
assert!(matches!(result, Err(AdminSocketError::SocketInUse(_))));
|
||||
}
|
||||
|
||||
@@ -652,4 +722,105 @@ upstream = "127.0.0.1:8080"
|
||||
assert_eq!(parsed["sites"], 1);
|
||||
assert!(parsed["uptime_secs"].is_number());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_read_timeout() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let admin_socket = Arc::new(create_test_admin_socket(dir.path()));
|
||||
let socket_path = dir.path().join("admin.sock");
|
||||
|
||||
let listener = UnixListener::bind(&socket_path).unwrap();
|
||||
|
||||
let admin_socket_clone = admin_socket.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
handle_connection(stream, admin_socket_clone).await;
|
||||
});
|
||||
|
||||
let stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
|
||||
let mut response = String::new();
|
||||
let mut reader = BufReader::new(stream);
|
||||
|
||||
let result =
|
||||
tokio::time::timeout(Duration::from_secs(10), reader.read_line(&mut response)).await;
|
||||
|
||||
handle.await.unwrap();
|
||||
|
||||
assert!(result.is_ok());
|
||||
let parsed: serde_json::Value = serde_json::from_str(response.trim()).unwrap();
|
||||
assert_eq!(parsed["status"], "error");
|
||||
assert_eq!(parsed["message"], "read timeout");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_command_too_long() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let admin_socket = Arc::new(create_test_admin_socket(dir.path()));
|
||||
let socket_path = dir.path().join("admin.sock");
|
||||
|
||||
let listener = UnixListener::bind(&socket_path).unwrap();
|
||||
|
||||
let admin_socket_clone = admin_socket.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
handle_connection(stream, admin_socket_clone).await;
|
||||
});
|
||||
|
||||
let mut stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
|
||||
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
|
||||
let long_data = "A".repeat(5000);
|
||||
stream.write_all(long_data.as_bytes()).await.unwrap();
|
||||
stream.shutdown().await.unwrap();
|
||||
|
||||
let mut response = String::new();
|
||||
let mut reader = BufReader::new(stream);
|
||||
reader.read_line(&mut response).await.unwrap();
|
||||
|
||||
handle.await.unwrap();
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(response.trim()).unwrap();
|
||||
assert_eq!(parsed["status"], "error");
|
||||
assert_eq!(parsed["message"], "command too long");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_command_at_limit_boundary() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let admin_socket = Arc::new(create_test_admin_socket(dir.path()));
|
||||
let socket_path = dir.path().join("admin.sock");
|
||||
|
||||
let listener = UnixListener::bind(&socket_path).unwrap();
|
||||
|
||||
let admin_socket_clone = admin_socket.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
let (stream, _) = listener.accept().await.unwrap();
|
||||
handle_connection(stream, admin_socket_clone).await;
|
||||
});
|
||||
|
||||
let mut stream = tokio::net::UnixStream::connect(&socket_path).await.unwrap();
|
||||
use tokio::io::{AsyncBufReadExt, AsyncWriteExt, BufReader};
|
||||
|
||||
let at_limit = format!("{}\n", "A".repeat(4095));
|
||||
stream.write_all(at_limit.as_bytes()).await.unwrap();
|
||||
stream.shutdown().await.unwrap();
|
||||
|
||||
let mut response = String::new();
|
||||
let mut reader = BufReader::new(stream);
|
||||
reader.read_line(&mut response).await.unwrap();
|
||||
|
||||
handle.await.unwrap();
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(response.trim()).unwrap();
|
||||
assert_eq!(parsed["status"], "error");
|
||||
assert_eq!(
|
||||
parsed["message"]
|
||||
.as_str()
|
||||
.unwrap()
|
||||
.starts_with("unknown command:"),
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
49
src/cli.rs
49
src/cli.rs
@@ -3,9 +3,7 @@ use std::path::Path;
|
||||
use anyhow::{Context, Result};
|
||||
use clap::Parser;
|
||||
|
||||
use crate::config::dynamic_config::{
|
||||
BodyConfig, DynamicConfig, RateLimitConfig, SerializableDynamicConfig,
|
||||
};
|
||||
use crate::config::dynamic_config::DynamicConfig;
|
||||
use crate::config::static_config::StaticConfig;
|
||||
use crate::config::validation::validate;
|
||||
|
||||
@@ -46,48 +44,15 @@ where
|
||||
Cli::parse_from(args)
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Deserialize)]
|
||||
struct RawConfig {
|
||||
#[serde(default)]
|
||||
listeners: Vec<crate::config::static_config::ListenerConfig>,
|
||||
#[serde(default)]
|
||||
allow_wildcard_bind: bool,
|
||||
#[serde(default = "crate::config::static_config::default_health_check_port")]
|
||||
health_check_port: u16,
|
||||
#[serde(default = "crate::config::static_config::default_admin_socket_path")]
|
||||
admin_socket_path: String,
|
||||
#[serde(default = "crate::config::static_config::default_shutdown_timeout_secs")]
|
||||
shutdown_timeout_secs: u64,
|
||||
#[serde(default)]
|
||||
logging: crate::config::static_config::LoggingConfig,
|
||||
rate_limit: RateLimitConfig,
|
||||
body: BodyConfig,
|
||||
}
|
||||
|
||||
pub fn load_config(cli: &Cli) -> Result<LoadedConfig> {
|
||||
let config_path = Path::new(&cli.config);
|
||||
let config_content = std::fs::read_to_string(config_path)
|
||||
.with_context(|| format!("failed to read config file: {}", cli.config))?;
|
||||
|
||||
let raw: RawConfig = toml::from_str(&config_content)
|
||||
let full_config = crate::config::FullConfig::parse(&config_content)
|
||||
.with_context(|| format!("failed to parse config file: {}", cli.config))?;
|
||||
|
||||
let static_config = StaticConfig {
|
||||
listeners: raw.listeners,
|
||||
allow_wildcard_bind: raw.allow_wildcard_bind,
|
||||
health_check_port: raw.health_check_port,
|
||||
admin_socket_path: raw.admin_socket_path,
|
||||
shutdown_timeout_secs: raw.shutdown_timeout_secs,
|
||||
logging: raw.logging,
|
||||
};
|
||||
|
||||
let serializable_dynamic = SerializableDynamicConfig {
|
||||
sites: collect_sites(&static_config),
|
||||
rate_limit: raw.rate_limit,
|
||||
body: raw.body,
|
||||
};
|
||||
|
||||
let dynamic_config: DynamicConfig = serializable_dynamic.into();
|
||||
let (static_config, dynamic_config) = full_config.into_static_and_dynamic();
|
||||
|
||||
let allow_wildcard_bind = static_config.allow_wildcard_bind || cli.allow_wildcard_bind;
|
||||
|
||||
@@ -109,14 +74,6 @@ pub fn load_config(cli: &Cli) -> Result<LoadedConfig> {
|
||||
})
|
||||
}
|
||||
|
||||
fn collect_sites(static_config: &StaticConfig) -> Vec<crate::config::dynamic_config::SiteConfig> {
|
||||
let mut sites = Vec::new();
|
||||
for listener in &static_config.listeners {
|
||||
sites.extend(listener.sites.clone());
|
||||
}
|
||||
sites
|
||||
}
|
||||
|
||||
pub fn run_validate(cli: &Cli) -> Result<()> {
|
||||
match load_config(cli) {
|
||||
Ok(_) => {
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use serde::Deserialize;
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||
pub struct StaticConfig {
|
||||
pub listeners: Vec<ListenerConfig>,
|
||||
@@ -28,12 +27,11 @@ pub fn default_shutdown_timeout_secs() -> u64 {
|
||||
30
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||
pub struct ListenerConfig {
|
||||
pub bind_addr: String,
|
||||
#[serde(default = "default_http_port")]
|
||||
pub http_port: u32,
|
||||
pub http_port: u16,
|
||||
#[serde(default = "default_https_port")]
|
||||
pub https_port: u16,
|
||||
pub tls: TlsConfig,
|
||||
@@ -41,17 +39,14 @@ pub struct ListenerConfig {
|
||||
pub sites: Vec<crate::config::dynamic_config::SiteConfig>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn default_http_port() -> u32 {
|
||||
fn default_http_port() -> u16 {
|
||||
80
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn default_https_port() -> u16 {
|
||||
443
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||
pub struct TlsConfig {
|
||||
pub mode: String,
|
||||
@@ -69,12 +64,10 @@ pub struct TlsConfig {
|
||||
pub key_path: String,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn default_acme_directory() -> String {
|
||||
"production".to_string()
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq)]
|
||||
pub struct LoggingConfig {
|
||||
#[serde(default = "default_log_level")]
|
||||
@@ -85,12 +78,10 @@ pub struct LoggingConfig {
|
||||
pub log_file_path: Option<String>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn default_log_level() -> String {
|
||||
"info".to_string()
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn default_log_format() -> String {
|
||||
"text".to_string()
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ use super::dynamic_config::DynamicConfig;
|
||||
use super::static_config::StaticConfig;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
#[non_exhaustive]
|
||||
pub enum ValidationError {
|
||||
#[error("at least one listener must be defined")]
|
||||
NoListeners,
|
||||
@@ -41,19 +42,19 @@ pub enum ValidationError {
|
||||
#[error("body.limit_bytes must be > 0, got {value}")]
|
||||
BodyLimitBytesZero { value: u64 },
|
||||
#[error("duplicate bind_addr:http_port combination: {bind_addr}:{http_port}")]
|
||||
DuplicateHttpBind { bind_addr: String, http_port: u32 },
|
||||
DuplicateHttpBind { bind_addr: String, http_port: u16 },
|
||||
#[error(
|
||||
"listener {bind_addr}: http_port ({http_port}) and https_port ({https_port}) must differ"
|
||||
)]
|
||||
HttpsAndHttpPortSame {
|
||||
bind_addr: String,
|
||||
http_port: u32,
|
||||
http_port: u16,
|
||||
https_port: u16,
|
||||
},
|
||||
#[error("listener {bind_addr}: https_port must be 1-65535, got {https_port}")]
|
||||
HttpsPortInvalid { bind_addr: String, https_port: u16 },
|
||||
#[error("listener {bind_addr}: http_port must be 0 (disabled) or 1-65535, got {http_port}")]
|
||||
HttpPortInvalid { bind_addr: String, http_port: u32 },
|
||||
HttpPortInvalid { bind_addr: String, http_port: u16 },
|
||||
#[error("health_check_port {health_check_port} conflicts with listener {bind_addr}:{port}")]
|
||||
HealthCheckPortConflict {
|
||||
health_check_port: u16,
|
||||
@@ -122,14 +123,7 @@ pub fn validate(
|
||||
});
|
||||
}
|
||||
|
||||
if listener.http_port > 65535 {
|
||||
errors.push(ValidationError::HttpPortInvalid {
|
||||
bind_addr: listener.bind_addr.clone(),
|
||||
http_port: listener.http_port,
|
||||
});
|
||||
}
|
||||
|
||||
if listener.http_port > 0 && listener.http_port == listener.https_port as u32 {
|
||||
if listener.http_port > 0 && listener.http_port == listener.https_port {
|
||||
errors.push(ValidationError::HttpsAndHttpPortSame {
|
||||
bind_addr: listener.bind_addr.clone(),
|
||||
http_port: listener.http_port,
|
||||
@@ -149,6 +143,13 @@ pub fn validate(
|
||||
errors.push(ValidationError::AcmeContactInvalid {
|
||||
bind_addr: listener.bind_addr.clone(),
|
||||
});
|
||||
} else {
|
||||
let email = &contact[7..];
|
||||
if email.is_empty() || !email.contains('@') {
|
||||
errors.push(ValidationError::AcmeContactInvalid {
|
||||
bind_addr: listener.bind_addr.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
"manual" => {
|
||||
@@ -181,6 +182,10 @@ pub fn validate(
|
||||
}
|
||||
}
|
||||
|
||||
// Health check always binds to 127.0.0.1 (hardcoded in src/health.rs), so this
|
||||
// conflict check is conservative — it warns even when the health check port
|
||||
// wouldn't actually conflict (e.g., health check on 127.0.0.1:80 vs listener
|
||||
// on 203.0.113.10:80). This is acceptable for Phase 1.
|
||||
if static_config.health_check_port > 0 {
|
||||
for listener in &static_config.listeners {
|
||||
if static_config.health_check_port == listener.https_port {
|
||||
@@ -190,14 +195,11 @@ pub fn validate(
|
||||
port: listener.https_port,
|
||||
});
|
||||
}
|
||||
if listener.http_port > 0
|
||||
&& listener.http_port <= 65535
|
||||
&& static_config.health_check_port as u32 == listener.http_port
|
||||
{
|
||||
if listener.http_port > 0 && static_config.health_check_port == listener.http_port {
|
||||
errors.push(ValidationError::HealthCheckPortConflict {
|
||||
health_check_port: static_config.health_check_port,
|
||||
bind_addr: listener.bind_addr.clone(),
|
||||
port: listener.http_port as u16,
|
||||
port: listener.http_port,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -315,7 +317,15 @@ fn is_valid_upstream(upstream: &str) -> bool {
|
||||
Ok(p) => p,
|
||||
Err(_) => return false,
|
||||
};
|
||||
port != 0
|
||||
if port == 0 {
|
||||
return false;
|
||||
}
|
||||
if host_part.starts_with('[') && host_part.ends_with(']') {
|
||||
let inner = &host_part[1..host_part.len() - 1];
|
||||
inner.parse::<std::net::Ipv6Addr>().is_ok()
|
||||
} else {
|
||||
host_part.parse::<std::net::IpAddr>().is_ok() || is_valid_hostname(host_part)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
@@ -733,20 +743,6 @@ mod tests {
|
||||
.any(|e| matches!(e, ValidationError::HttpsPortInvalid { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule13_http_port_invalid() {
|
||||
let mut config = valid_static_config();
|
||||
config.listeners[0].http_port = 65536;
|
||||
config.listeners[0].tls = make_acme_tls();
|
||||
let dynamic = valid_dynamic_config();
|
||||
let result = validate(&config, &dynamic, false);
|
||||
assert!(result.is_err());
|
||||
let errors = result.unwrap_err();
|
||||
assert!(errors
|
||||
.iter()
|
||||
.any(|e| matches!(e, ValidationError::HttpPortInvalid { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule13_http_port_disabled_valid() {
|
||||
let mut config = valid_static_config();
|
||||
@@ -1004,6 +1000,58 @@ mod tests {
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule19_acme_contact_mailto_empty_email() {
|
||||
let mut config = valid_static_config();
|
||||
config.listeners[0].tls = TlsConfig {
|
||||
mode: "acme".to_string(),
|
||||
acme_domains: vec!["test.local".to_string()],
|
||||
acme_cache_dir: "/tmp/cache".to_string(),
|
||||
acme_directory: "production".to_string(),
|
||||
acme_contact: "mailto:".to_string(),
|
||||
cert_path: String::new(),
|
||||
key_path: String::new(),
|
||||
};
|
||||
config.listeners[0].sites = vec![SiteConfig {
|
||||
host: "test.local".to_string(),
|
||||
upstream: "127.0.0.1:8080".to_string(),
|
||||
..valid_dynamic_config().sites[0].clone()
|
||||
}];
|
||||
let dynamic = valid_dynamic_config();
|
||||
let result = validate(&config, &dynamic, false);
|
||||
assert!(result.is_err());
|
||||
let errors = result.unwrap_err();
|
||||
assert!(errors
|
||||
.iter()
|
||||
.any(|e| matches!(e, ValidationError::AcmeContactInvalid { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule19_acme_contact_mailto_no_at_sign() {
|
||||
let mut config = valid_static_config();
|
||||
config.listeners[0].tls = TlsConfig {
|
||||
mode: "acme".to_string(),
|
||||
acme_domains: vec!["test.local".to_string()],
|
||||
acme_cache_dir: "/tmp/cache".to_string(),
|
||||
acme_directory: "production".to_string(),
|
||||
acme_contact: "mailto:user".to_string(),
|
||||
cert_path: String::new(),
|
||||
key_path: String::new(),
|
||||
};
|
||||
config.listeners[0].sites = vec![SiteConfig {
|
||||
host: "test.local".to_string(),
|
||||
upstream: "127.0.0.1:8080".to_string(),
|
||||
..valid_dynamic_config().sites[0].clone()
|
||||
}];
|
||||
let dynamic = valid_dynamic_config();
|
||||
let result = validate(&config, &dynamic, false);
|
||||
assert!(result.is_err());
|
||||
let errors = result.unwrap_err();
|
||||
assert!(errors
|
||||
.iter()
|
||||
.any(|e| matches!(e, ValidationError::AcmeContactInvalid { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_config_passes() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
@@ -1096,4 +1144,49 @@ mod tests {
|
||||
.iter()
|
||||
.any(|e| matches!(e, ValidationError::KeyPathNotReadable { .. })));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_valid_hostname() {
|
||||
assert!(is_valid_upstream("gitea:3000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_valid_ipv4() {
|
||||
assert!(is_valid_upstream("127.0.0.1:3000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_valid_ipv6_bracket() {
|
||||
assert!(is_valid_upstream("[::1]:3000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_valid_ipv6_bracket_full() {
|
||||
assert!(is_valid_upstream("[2001:db8::1]:8080"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_invalid_hostname_chars() {
|
||||
assert!(!is_valid_upstream("!!!bad!!!:3000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_invalid_special_chars() {
|
||||
assert!(!is_valid_upstream("@#$%:8080"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_empty_host() {
|
||||
assert!(!is_valid_upstream(":3000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_invalid_ipv6_bracket() {
|
||||
assert!(!is_valid_upstream("[notipv6]:3000"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rule17_upstream_hostname_with_dots() {
|
||||
assert!(is_valid_upstream("app.example.com:8080"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
#[cfg(test)]
|
||||
#[derive(Default)]
|
||||
struct KvVisitor {
|
||||
pairs: Vec<(String, String)>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl KvVisitor {
|
||||
fn format(&self) -> String {
|
||||
let parts: Vec<String> = self
|
||||
@@ -20,6 +22,7 @@ impl KvVisitor {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl tracing::field::Visit for KvVisitor {
|
||||
fn record_str(&mut self, field: &tracing::field::Field, value: &str) {
|
||||
self.pairs
|
||||
@@ -47,12 +50,6 @@ impl tracing::field::Visit for KvVisitor {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format_event_fields(event: &tracing::Event<'_>) -> String {
|
||||
let mut visitor = KvVisitor::default();
|
||||
event.record(&mut visitor);
|
||||
visitor.format()
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! log_request {
|
||||
($client_ip:expr, $host:expr, $method:expr, $path:expr, $status:expr, $upstream:expr, $duration_ms:expr) => {
|
||||
@@ -69,19 +66,6 @@ macro_rules! log_request {
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! log_rate_limit {
|
||||
($client_ip:expr, $host:expr, $path:expr, $status:expr) => {
|
||||
tracing::warn!(
|
||||
prefix = "RATE_LIMIT",
|
||||
client_ip = %$client_ip,
|
||||
host = %$host,
|
||||
path = %$path,
|
||||
status = %$status,
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! log_upstream_error {
|
||||
($host:expr, $upstream:expr, $error:expr) => {
|
||||
@@ -94,17 +78,6 @@ macro_rules! log_upstream_error {
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! log_config_reload {
|
||||
($status:expr, $sites:expr) => {
|
||||
tracing::info!(
|
||||
prefix = "CONFIG_RELOAD",
|
||||
status = %$status,
|
||||
sites = %$sites,
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -180,8 +153,6 @@ mod tests {
|
||||
"127.0.0.1:3000",
|
||||
45u64
|
||||
);
|
||||
log_rate_limit!("10.0.0.1", "example.com", "/login", 429u16);
|
||||
log_upstream_error!("git.alk.dev", "127.0.0.1:3000", "connection refused");
|
||||
log_config_reload!("success", 1u32);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -39,9 +39,11 @@ fn init_json(env_filter: EnvFilter, log_file_path: &Option<String>, level: Level
|
||||
let file_env_filter = make_env_filter(level);
|
||||
let stdout_layer = tracing_subscriber::fmt::layer()
|
||||
.json()
|
||||
.with_ansi(false)
|
||||
.with_filter(env_filter);
|
||||
let file_layer = tracing_subscriber::fmt::layer()
|
||||
.json()
|
||||
.with_ansi(false)
|
||||
.with_writer(file_writer)
|
||||
.with_filter(file_env_filter);
|
||||
tracing_subscriber::registry()
|
||||
@@ -52,6 +54,7 @@ fn init_json(env_filter: EnvFilter, log_file_path: &Option<String>, level: Level
|
||||
None => {
|
||||
let layer = tracing_subscriber::fmt::layer()
|
||||
.json()
|
||||
.with_ansi(false)
|
||||
.with_filter(env_filter);
|
||||
tracing_subscriber::registry().with(layer).try_init()?;
|
||||
}
|
||||
@@ -72,8 +75,11 @@ fn init_text(env_filter: EnvFilter, log_file_path: &Option<String>, level: Level
|
||||
let file_writer = Arc::new(file);
|
||||
|
||||
let file_env_filter = make_env_filter(level);
|
||||
let stdout_layer = tracing_subscriber::fmt::layer().with_filter(env_filter);
|
||||
let stdout_layer = tracing_subscriber::fmt::layer()
|
||||
.with_ansi(false)
|
||||
.with_filter(env_filter);
|
||||
let file_layer = tracing_subscriber::fmt::layer()
|
||||
.with_ansi(false)
|
||||
.with_writer(file_writer)
|
||||
.with_filter(file_env_filter);
|
||||
tracing_subscriber::registry()
|
||||
@@ -82,7 +88,9 @@ fn init_text(env_filter: EnvFilter, log_file_path: &Option<String>, level: Level
|
||||
.try_init()?;
|
||||
}
|
||||
None => {
|
||||
let layer = tracing_subscriber::fmt::layer().with_filter(env_filter);
|
||||
let layer = tracing_subscriber::fmt::layer()
|
||||
.with_ansi(false)
|
||||
.with_filter(env_filter);
|
||||
tracing_subscriber::registry().with(layer).try_init()?;
|
||||
}
|
||||
}
|
||||
|
||||
39
src/main.rs
39
src/main.rs
@@ -65,6 +65,10 @@ async fn run_server(loaded_config: cli::LoadedConfig, config_path: &str) -> Resu
|
||||
let dynamic_config: DynamicConfig = loaded_config.dynamic_config;
|
||||
let config_arc = Arc::new(ArcSwap::from_pointee(dynamic_config));
|
||||
|
||||
let shutdown = Arc::new(GracefulShutdown::new(
|
||||
loaded_config.static_config.shutdown_timeout_secs,
|
||||
));
|
||||
|
||||
let rate_limiter = Arc::new(RateLimiter::new(config_arc.clone()));
|
||||
|
||||
let http_client = create_http_client();
|
||||
@@ -81,6 +85,12 @@ async fn run_server(loaded_config: cli::LoadedConfig, config_path: &str) -> Resu
|
||||
loaded_config.static_config.clone(),
|
||||
));
|
||||
|
||||
reverse_proxy::shutdown::register_signal_handlers(
|
||||
shutdown.clone(),
|
||||
reload_handle.clone(),
|
||||
config_path.to_string(),
|
||||
)?;
|
||||
|
||||
if loaded_config.static_config.health_check_port > 0 {
|
||||
let (health_addr, _health_handle) =
|
||||
health::start_health_check_listener(loaded_config.static_config.health_check_port)
|
||||
@@ -96,8 +106,9 @@ async fn run_server(loaded_config: cli::LoadedConfig, config_path: &str) -> Resu
|
||||
config_path.to_string(),
|
||||
));
|
||||
let admin_socket_clone = admin_socket.clone();
|
||||
let shutdown_for_admin = shutdown.clone();
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = start_admin_socket(admin_socket_clone).await {
|
||||
if let Err(e) = start_admin_socket(admin_socket_clone, shutdown_for_admin).await {
|
||||
match e {
|
||||
AdminSocketError::Disabled => {}
|
||||
AdminSocketError::SocketInUse(path) => {
|
||||
@@ -109,6 +120,7 @@ async fn run_server(loaded_config: cli::LoadedConfig, config_path: &str) -> Resu
|
||||
AdminSocketError::Io(e) => {
|
||||
error!("admin socket IO error: {}", e);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
});
|
||||
@@ -150,7 +162,7 @@ async fn run_server(loaded_config: cli::LoadedConfig, config_path: &str) -> Resu
|
||||
|
||||
let mut tls_acceptors = Vec::new();
|
||||
for (listener_config, _) in &bound_listeners {
|
||||
let tls_mode = setup_tls(&listener_config.tls).context(format!(
|
||||
let tls_mode = setup_tls(&listener_config.tls, shutdown.clone()).context(format!(
|
||||
"failed to setup TLS for listener {}",
|
||||
listener_config.bind_addr
|
||||
))?;
|
||||
@@ -175,20 +187,19 @@ async fn run_server(loaded_config: cli::LoadedConfig, config_path: &str) -> Resu
|
||||
}
|
||||
}
|
||||
|
||||
let shutdown = Arc::new(GracefulShutdown::new(
|
||||
loaded_config.static_config.shutdown_timeout_secs,
|
||||
));
|
||||
|
||||
reverse_proxy::shutdown::register_signal_handlers(
|
||||
shutdown.clone(),
|
||||
reload_handle.clone(),
|
||||
config_path.to_string(),
|
||||
)?;
|
||||
if bound_listeners.len() != tls_acceptors.len() {
|
||||
anyhow::bail!(
|
||||
"listener/acceptor count mismatch: {} listeners, {} acceptors",
|
||||
bound_listeners.len(),
|
||||
tls_acceptors.len()
|
||||
);
|
||||
}
|
||||
|
||||
let _eviction_handle = start_eviction_task(
|
||||
rate_limiter.clone(),
|
||||
std::time::Duration::from_secs(60),
|
||||
std::time::Duration::from_secs(300),
|
||||
shutdown.subscribe(),
|
||||
);
|
||||
|
||||
let app = build_router(proxy_state.clone(), config_arc.clone(), rate_limiter);
|
||||
@@ -230,8 +241,12 @@ async fn run_server(loaded_config: cli::LoadedConfig, config_path: &str) -> Resu
|
||||
|
||||
info!("shutdown signal received, starting graceful shutdown");
|
||||
|
||||
let shutdown_timeout = shutdown.shutdown_timeout();
|
||||
for handle in https_server_handles {
|
||||
handle.abort();
|
||||
let result = tokio::time::timeout(shutdown_timeout, handle).await;
|
||||
if result.is_err() {
|
||||
warn!("shutdown timeout expired, aborting listener task");
|
||||
}
|
||||
}
|
||||
|
||||
let remaining = drain_in_flight(&in_flight, shutdown.shutdown_timeout()).await;
|
||||
|
||||
@@ -23,6 +23,10 @@ pub async fn body_limit_middleware(
|
||||
limit
|
||||
};
|
||||
|
||||
// Early rejection: if Content-Length is present and exceeds the limit, reject
|
||||
// immediately without reading the body. For requests without Content-Length
|
||||
// (chunked, HTTP/2), the Limited body wrapper below enforces the limit during
|
||||
// streaming. This is a two-layer defense.
|
||||
if let Some(content_length) = request.headers().get("content-length") {
|
||||
if let Ok(length_str) = content_length.to_str() {
|
||||
if let Ok(length) = length_str.parse::<u64>() {
|
||||
|
||||
@@ -2,29 +2,22 @@ use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[non_exhaustive]
|
||||
pub enum ProxyError {
|
||||
#[error("Bad Gateway")]
|
||||
BadGateway { host: String, upstream: String },
|
||||
#[error("Gateway Timeout")]
|
||||
GatewayTimeout { host: String, upstream: String },
|
||||
#[error("Payload Too Large")]
|
||||
PayloadTooLarge,
|
||||
#[error("Too Many Requests")]
|
||||
TooManyRequests {
|
||||
client_ip: String,
|
||||
host: String,
|
||||
path: String,
|
||||
},
|
||||
#[error("Not Found")]
|
||||
NotFound,
|
||||
#[error("Bad Request")]
|
||||
BadRequest,
|
||||
#[error("upstream connection failed")]
|
||||
UpstreamConnection(#[source] hyper_util::client::legacy::Error),
|
||||
#[error("upstream timeout")]
|
||||
UpstreamTimeout,
|
||||
#[error("upstream tls certificate validation failed")]
|
||||
UpstreamTls(#[source] std::io::Error),
|
||||
#[error("no matching site for host")]
|
||||
UnknownHost,
|
||||
#[error("missing host header")]
|
||||
@@ -36,13 +29,11 @@ impl ProxyError {
|
||||
match self {
|
||||
Self::BadGateway { .. } => StatusCode::BAD_GATEWAY,
|
||||
Self::GatewayTimeout { .. } => StatusCode::GATEWAY_TIMEOUT,
|
||||
Self::PayloadTooLarge => StatusCode::PAYLOAD_TOO_LARGE,
|
||||
Self::TooManyRequests { .. } => StatusCode::TOO_MANY_REQUESTS,
|
||||
Self::NotFound | Self::UnknownHost => StatusCode::NOT_FOUND,
|
||||
Self::BadRequest | Self::MissingHost => StatusCode::BAD_REQUEST,
|
||||
Self::UpstreamConnection(_) => StatusCode::BAD_GATEWAY,
|
||||
Self::UpstreamTimeout => StatusCode::GATEWAY_TIMEOUT,
|
||||
Self::UpstreamTls(_) => StatusCode::BAD_GATEWAY,
|
||||
Self::UnknownHost => StatusCode::NOT_FOUND,
|
||||
Self::MissingHost => StatusCode::BAD_REQUEST,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,13 +41,11 @@ impl ProxyError {
|
||||
match self {
|
||||
Self::BadGateway { .. } => "Bad Gateway",
|
||||
Self::GatewayTimeout { .. } => "Gateway Timeout",
|
||||
Self::PayloadTooLarge => "Payload Too Large",
|
||||
Self::TooManyRequests { .. } => "Too Many Requests",
|
||||
Self::NotFound | Self::UnknownHost => "Not Found",
|
||||
Self::BadRequest | Self::MissingHost => "Bad Request",
|
||||
Self::UpstreamConnection(_) => "Bad Gateway",
|
||||
Self::UpstreamTimeout => "Gateway Timeout",
|
||||
Self::UpstreamTls(_) => "Bad Gateway",
|
||||
Self::UnknownHost => "Not Found",
|
||||
Self::MissingHost => "Bad Request",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -98,9 +87,6 @@ impl IntoResponse for ProxyError {
|
||||
Self::UpstreamTimeout => {
|
||||
tracing::warn!(status = 504, "upstream timeout");
|
||||
}
|
||||
Self::UpstreamTls(e) => {
|
||||
tracing::warn!(error = %e, status = 502, "upstream TLS error");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
@@ -175,23 +161,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn payload_too_large_response() {
|
||||
let resp = into_response(ProxyError::PayloadTooLarge);
|
||||
assert_eq!(resp.status(), StatusCode::PAYLOAD_TOO_LARGE);
|
||||
let body = axum::body::to_bytes(resp.into_body(), 1024).await.unwrap();
|
||||
assert_eq!(&body[..], b"Payload Too Large");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn payload_too_large_content_type() {
|
||||
let resp = into_response(ProxyError::PayloadTooLarge);
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/plain; charset=utf-8"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn too_many_requests_response() {
|
||||
let resp = into_response(ProxyError::TooManyRequests {
|
||||
@@ -217,40 +186,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn not_found_response() {
|
||||
let resp = into_response(ProxyError::NotFound);
|
||||
assert_eq!(resp.status(), StatusCode::NOT_FOUND);
|
||||
let body = axum::body::to_bytes(resp.into_body(), 1024).await.unwrap();
|
||||
assert_eq!(&body[..], b"Not Found");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn not_found_content_type() {
|
||||
let resp = into_response(ProxyError::NotFound);
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/plain; charset=utf-8"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_request_response() {
|
||||
let resp = into_response(ProxyError::BadRequest);
|
||||
assert_eq!(resp.status(), StatusCode::BAD_REQUEST);
|
||||
let body = axum::body::to_bytes(resp.into_body(), 1024).await.unwrap();
|
||||
assert_eq!(&body[..], b"Bad Request");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn bad_request_content_type() {
|
||||
let resp = into_response(ProxyError::BadRequest);
|
||||
assert_eq!(
|
||||
resp.headers().get("content-type").unwrap(),
|
||||
"text/plain; charset=utf-8"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_display_matches_body() {
|
||||
assert_eq!(
|
||||
@@ -269,7 +204,6 @@ mod tests {
|
||||
.to_string(),
|
||||
"Gateway Timeout"
|
||||
);
|
||||
assert_eq!(ProxyError::PayloadTooLarge.to_string(), "Payload Too Large");
|
||||
assert_eq!(
|
||||
ProxyError::TooManyRequests {
|
||||
client_ip: String::new(),
|
||||
@@ -279,7 +213,5 @@ mod tests {
|
||||
.to_string(),
|
||||
"Too Many Requests"
|
||||
);
|
||||
assert_eq!(ProxyError::NotFound.to_string(), "Not Found");
|
||||
assert_eq!(ProxyError::BadRequest.to_string(), "Bad Request");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ use axum::Router;
|
||||
use hyper_util::client::legacy::connect::HttpConnector;
|
||||
use hyper_util::client::legacy::Client;
|
||||
use hyper_util::rt::TokioExecutor;
|
||||
use tracing::warn;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use crate::config::dynamic_config::DynamicConfig;
|
||||
use crate::log_request;
|
||||
@@ -39,11 +39,14 @@ async fn proxy_handler(
|
||||
let host = req
|
||||
.headers()
|
||||
.get(axum::http::header::HOST)
|
||||
.and_then(|v| v.to_str().ok());
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.or_else(|| req.uri().host())
|
||||
.unwrap_or_default();
|
||||
|
||||
let host = match host {
|
||||
Some(h) => h,
|
||||
None => return ProxyError::MissingHost.into_response(),
|
||||
let host = if host.is_empty() {
|
||||
return ProxyError::MissingHost.into_response();
|
||||
} else {
|
||||
host
|
||||
};
|
||||
|
||||
let config = state.config.load();
|
||||
@@ -59,7 +62,23 @@ async fn proxy_handler(
|
||||
let upstream_scheme = site.upstream_scheme.clone();
|
||||
let upstream = site.upstream.clone();
|
||||
let upstream_addr = format!("{}://{}", upstream_scheme, upstream);
|
||||
let upstream_uri = build_upstream_uri(&upstream_scheme, &upstream, req.uri());
|
||||
let upstream_uri = match build_upstream_uri(&upstream_scheme, &upstream, req.uri()) {
|
||||
Ok(uri) => uri,
|
||||
Err(()) => {
|
||||
log_upstream_error!(&host_owned, &upstream_addr, "malformed upstream URI");
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
log_request!(
|
||||
&client_ip,
|
||||
&host_owned,
|
||||
&method,
|
||||
&path,
|
||||
502u16,
|
||||
&upstream,
|
||||
duration_ms
|
||||
);
|
||||
return StatusCode::BAD_GATEWAY.into_response();
|
||||
}
|
||||
};
|
||||
|
||||
let upstream_req = match build_upstream_request(req, &upstream_uri) {
|
||||
Ok(r) => r,
|
||||
@@ -80,23 +99,23 @@ async fn proxy_handler(
|
||||
}
|
||||
};
|
||||
|
||||
let connect_timeout = Duration::from_secs(site.upstream_connect_timeout_secs);
|
||||
let request_timeout = Duration::from_secs(site.upstream_request_timeout_secs);
|
||||
// The timeout covers the entire HTTP round-trip including response body
|
||||
// streaming. For large file downloads or slow upstreams, this means the
|
||||
// timeout kills the response even if the upstream is actively sending data.
|
||||
// A more precise timeout would apply only to connect + first-byte, then
|
||||
// stream the body without a timeout. The `upstream_connect_timeout_secs`
|
||||
// field in SiteConfig exists for a separate connect timeout (see W4).
|
||||
// For Phase 1, this full-request timeout is acceptable.
|
||||
|
||||
let result = if upstream_scheme == "https" {
|
||||
tokio::time::timeout(request_timeout, state.https_client.request(upstream_req)).await
|
||||
tokio::time::timeout(request_timeout, async {
|
||||
tokio::time::timeout(connect_timeout, state.https_client.request(upstream_req)).await
|
||||
})
|
||||
.await
|
||||
} else {
|
||||
tokio::time::timeout(request_timeout, state.http_client.request(upstream_req)).await
|
||||
tokio::time::timeout(request_timeout, async {
|
||||
tokio::time::timeout(connect_timeout, state.http_client.request(upstream_req)).await
|
||||
})
|
||||
.await
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(Ok(upstream_resp)) => {
|
||||
Ok(Ok(Ok(upstream_resp))) => {
|
||||
let status = upstream_resp.status().as_u16();
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
log_request!(
|
||||
@@ -110,11 +129,14 @@ async fn proxy_handler(
|
||||
);
|
||||
let (mut parts, body) = upstream_resp.into_parts();
|
||||
remove_hop_by_hop(&mut parts.headers);
|
||||
// The upstream Server header is intentionally removed. As a security-focused
|
||||
// reverse proxy, we hide upstream server identity as a defense-in-depth measure.
|
||||
// The proxy does not add its own Server header either. See W8 in review #002.
|
||||
parts.headers.remove("server");
|
||||
let body = Body::new(body);
|
||||
Response::from_parts(parts, body)
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
Ok(Ok(Err(e))) => {
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
if e.is_connect() {
|
||||
log_upstream_error!(&host_owned, &upstream_addr, &format!("{}", e));
|
||||
@@ -148,6 +170,21 @@ async fn proxy_handler(
|
||||
resp
|
||||
}
|
||||
}
|
||||
Ok(Err(_)) => {
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
log_upstream_error!(&host_owned, &upstream_addr, "upstream connect timeout");
|
||||
let resp = ProxyError::UpstreamTimeout.into_response();
|
||||
log_request!(
|
||||
&client_ip,
|
||||
&host_owned,
|
||||
&method,
|
||||
&path,
|
||||
504u16,
|
||||
&upstream,
|
||||
duration_ms
|
||||
);
|
||||
resp
|
||||
}
|
||||
Err(_) => {
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
log_upstream_error!(&host_owned, &upstream_addr, "upstream timeout");
|
||||
@@ -166,17 +203,15 @@ async fn proxy_handler(
|
||||
}
|
||||
}
|
||||
|
||||
fn build_upstream_uri(scheme: &str, upstream: &str, original_uri: &Uri) -> Uri {
|
||||
fn build_upstream_uri(scheme: &str, upstream: &str, original_uri: &Uri) -> Result<Uri, ()> {
|
||||
let path = original_uri.path();
|
||||
let query = original_uri
|
||||
.query()
|
||||
.map(|q| format!("?{}", q))
|
||||
.unwrap_or_default();
|
||||
let uri_string = format!("{}://{}{}{}", scheme, upstream, path, query);
|
||||
uri_string.parse::<Uri>().unwrap_or_else(|_| {
|
||||
format!("{}://{}{}", scheme, upstream, path)
|
||||
.parse::<Uri>()
|
||||
.unwrap()
|
||||
uri_string.parse::<Uri>().map_err(|e| {
|
||||
warn!(error = %e, uri = %uri_string, "failed to parse upstream URI");
|
||||
})
|
||||
}
|
||||
|
||||
@@ -192,13 +227,21 @@ fn build_upstream_request(req: Request<Body>, upstream_uri: &Uri) -> anyhow::Res
|
||||
builder.body(req.into_body()).map_err(Into::into)
|
||||
}
|
||||
|
||||
const CONNECT_TIMEOUT_CEILING_SECS: u64 = 30;
|
||||
|
||||
pub fn create_http_client() -> Client<HttpConnector, Body> {
|
||||
let mut connector = HttpConnector::new();
|
||||
connector.set_connect_timeout(Some(Duration::from_secs(CONNECT_TIMEOUT_CEILING_SECS)));
|
||||
Client::builder(TokioExecutor::new())
|
||||
.pool_idle_timeout(Duration::from_secs(90))
|
||||
.build_http()
|
||||
.build(connector)
|
||||
}
|
||||
|
||||
pub fn create_https_client() -> Client<hyper_rustls::HttpsConnector<HttpConnector>, Body> {
|
||||
let mut http_connector = HttpConnector::new();
|
||||
http_connector.set_connect_timeout(Some(Duration::from_secs(CONNECT_TIMEOUT_CEILING_SECS)));
|
||||
http_connector.enforce_http(false);
|
||||
|
||||
let tls_config = rustls::ClientConfig::builder()
|
||||
.with_root_certificates(root_certs())
|
||||
.with_no_client_auth();
|
||||
@@ -207,7 +250,7 @@ pub fn create_https_client() -> Client<hyper_rustls::HttpsConnector<HttpConnecto
|
||||
.with_tls_config(tls_config)
|
||||
.https_or_http()
|
||||
.enable_http1()
|
||||
.build();
|
||||
.wrap_connector(http_connector);
|
||||
|
||||
Client::builder(TokioExecutor::new())
|
||||
.pool_idle_timeout(Duration::from_secs(90))
|
||||
@@ -220,10 +263,23 @@ fn root_certs() -> rustls::RootCertStore {
|
||||
for cert in result.certs {
|
||||
roots.add(cert).ok();
|
||||
}
|
||||
if !result.errors.is_empty() {
|
||||
for err in &result.errors {
|
||||
warn!(error = %err, "failed to load native certificate");
|
||||
}
|
||||
let cert_count = roots.len();
|
||||
let error_count = result.errors.len();
|
||||
if cert_count == 0 {
|
||||
warn!(
|
||||
certs_loaded = cert_count,
|
||||
errors = error_count,
|
||||
"no system root certificates loaded — HTTPS upstream connections will fail"
|
||||
);
|
||||
} else {
|
||||
info!(
|
||||
certs_loaded = cert_count,
|
||||
errors = error_count,
|
||||
"loaded system root certificates"
|
||||
);
|
||||
}
|
||||
for err in &result.errors {
|
||||
warn!(error = %err, "failed to load native certificate");
|
||||
}
|
||||
roots
|
||||
}
|
||||
@@ -317,21 +373,21 @@ mod tests {
|
||||
#[test]
|
||||
fn test_build_upstream_uri_with_query() {
|
||||
let uri: Uri = "/path?foo=bar".parse().unwrap();
|
||||
let result = build_upstream_uri("http", "127.0.0.1:8080", &uri);
|
||||
let result = build_upstream_uri("http", "127.0.0.1:8080", &uri).unwrap();
|
||||
assert_eq!(result.to_string(), "http://127.0.0.1:8080/path?foo=bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_upstream_uri_without_query() {
|
||||
let uri: Uri = "/path".parse().unwrap();
|
||||
let result = build_upstream_uri("http", "127.0.0.1:8080", &uri);
|
||||
let result = build_upstream_uri("http", "127.0.0.1:8080", &uri).unwrap();
|
||||
assert_eq!(result.to_string(), "http://127.0.0.1:8080/path");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_upstream_uri_https() {
|
||||
let uri: Uri = "/secure".parse().unwrap();
|
||||
let result = build_upstream_uri("https", "upstream.example.com", &uri);
|
||||
let result = build_upstream_uri("https", "upstream.example.com", &uri).unwrap();
|
||||
assert_eq!(result.to_string(), "https://upstream.example.com/secure");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,6 +25,10 @@ pub fn inject_proxy_headers(headers: &mut HeaderMap, remote_addr: SocketAddr) {
|
||||
|
||||
headers.insert(HeaderName::from_static("x-real-ip"), ip_value.clone());
|
||||
|
||||
// X-Forwarded-For is SET (not appended) because this proxy is the outermost
|
||||
// edge proxy. Any existing X-Forwarded-For from the client is untrusted and
|
||||
// must be replaced with the actual client IP from ConnectInfo.
|
||||
// See ADR-021 for the edge proxy model rationale.
|
||||
headers.insert(HeaderName::from_static("x-forwarded-for"), ip_value);
|
||||
|
||||
// X-Forwarded-Proto is always "https" because this proxy only forwards requests
|
||||
|
||||
@@ -2,11 +2,11 @@ use std::net::{IpAddr, Ipv6Addr};
|
||||
use std::time::Instant;
|
||||
|
||||
pub struct TokenBucket {
|
||||
pub tokens: f64,
|
||||
pub last_refill: Instant,
|
||||
pub rate: f64,
|
||||
pub max: u32,
|
||||
pub last_access: Instant,
|
||||
tokens: f64,
|
||||
last_refill: Instant,
|
||||
rate: f64,
|
||||
max: u32,
|
||||
pub(crate) last_access: Instant,
|
||||
}
|
||||
|
||||
impl TokenBucket {
|
||||
|
||||
@@ -64,19 +64,12 @@ pub async fn rate_limit_middleware(
|
||||
next: Next,
|
||||
) -> axum::response::Response {
|
||||
let client_ip = req
|
||||
.headers()
|
||||
.get("x-forwarded-for")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.and_then(|v| v.split(',').next())
|
||||
.and_then(|v| v.trim().parse::<IpAddr>().ok())
|
||||
.or_else(|| {
|
||||
req.extensions()
|
||||
.get::<axum::extract::ConnectInfo<std::net::SocketAddr>>()
|
||||
.map(|ci| ci.ip())
|
||||
});
|
||||
.extensions()
|
||||
.get::<axum::extract::ConnectInfo<std::net::SocketAddr>>()
|
||||
.map(|ci| ci.ip());
|
||||
|
||||
let Some(ip) = client_ip else {
|
||||
return next.run(req).await;
|
||||
return (StatusCode::TOO_MANY_REQUESTS, "Too Many Requests").into_response();
|
||||
};
|
||||
|
||||
let host = req
|
||||
@@ -102,12 +95,20 @@ pub fn start_eviction_task(
|
||||
limiter: Arc<RateLimiter>,
|
||||
interval: Duration,
|
||||
max_age: Duration,
|
||||
mut shutdown_rx: tokio::sync::watch::Receiver<bool>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
let mut interval_timer = tokio::time::interval(interval);
|
||||
loop {
|
||||
interval_timer.tick().await;
|
||||
limiter.evict_stale(max_age);
|
||||
tokio::select! {
|
||||
_ = interval_timer.tick() => {
|
||||
limiter.evict_stale(max_age);
|
||||
}
|
||||
_ = shutdown_rx.changed() => {
|
||||
tracing::info!("rate limiter eviction task shutting down");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -205,4 +206,108 @@ mod tests {
|
||||
assert!(limiter.check_and_consume(IpAddr::from([192, 168, 1, 1])));
|
||||
assert!(!limiter.check_and_consume(IpAddr::from([192, 168, 1, 1])));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn middleware_uses_connect_info_without_xff_header() {
|
||||
let limiter = make_limiter(10, 5);
|
||||
|
||||
let app = axum::Router::new()
|
||||
.route("/", axum::routing::get(|| async { "ok" }))
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
limiter,
|
||||
rate_limit_middleware,
|
||||
))
|
||||
.into_make_service_with_connect_info::<std::net::SocketAddr>();
|
||||
|
||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
tokio::spawn(async move { axum::serve(listener, app).await.unwrap() });
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
for _ in 0..5 {
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::OK);
|
||||
}
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::TOO_MANY_REQUESTS);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn middleware_rejects_without_connect_info() {
|
||||
let limiter = make_limiter(10, 20);
|
||||
|
||||
let app = axum::Router::new()
|
||||
.route("/", axum::routing::get(|| async { "ok" }))
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
limiter,
|
||||
rate_limit_middleware,
|
||||
));
|
||||
|
||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
tokio::spawn(async move { axum::serve(listener, app).await.unwrap() });
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::TOO_MANY_REQUESTS);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn middleware_ignores_xff_header_same_bucket() {
|
||||
let limiter = make_limiter(10, 2);
|
||||
|
||||
let app = axum::Router::new()
|
||||
.route("/", axum::routing::get(|| async { "ok" }))
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
limiter,
|
||||
rate_limit_middleware,
|
||||
))
|
||||
.into_make_service_with_connect_info::<std::net::SocketAddr>();
|
||||
|
||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
tokio::spawn(async move { axum::serve(listener, app).await.unwrap() });
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("X-Forwarded-For", "10.0.0.1")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::OK);
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("X-Forwarded-For", "10.0.0.2")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::OK);
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("X-Forwarded-For", "10.0.0.3")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::TOO_MANY_REQUESTS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ use axum::extract::ConnectInfo;
|
||||
use axum::http::Request;
|
||||
use axum::response::Response;
|
||||
use axum::Router;
|
||||
use hyper::body::Incoming;
|
||||
use hyper_util::rt::TokioExecutor;
|
||||
use hyper_util::service::TowerToHyperService;
|
||||
use tokio::net::TcpListener;
|
||||
@@ -19,6 +20,13 @@ pub struct InFlightCounter {
|
||||
|
||||
struct InFlightGuard(Arc<InFlightCounter>);
|
||||
|
||||
impl InFlightGuard {
|
||||
fn new(counter: Arc<InFlightCounter>) -> Self {
|
||||
counter.increment();
|
||||
Self(counter)
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for InFlightGuard {
|
||||
fn drop(&mut self) {
|
||||
self.0.decrement();
|
||||
@@ -70,7 +78,7 @@ pub async fn serve_https_listener(
|
||||
let in_flight = in_flight.clone();
|
||||
|
||||
tokio::spawn(async move {
|
||||
let _guard = InFlightGuard(in_flight.clone());
|
||||
let _guard = InFlightGuard::new(in_flight.clone());
|
||||
|
||||
let tls_stream = match tls_acceptor.accept(tcp_stream).await {
|
||||
Ok(stream) => stream,
|
||||
@@ -80,24 +88,40 @@ pub async fn serve_https_listener(
|
||||
}
|
||||
};
|
||||
|
||||
let alpn = tls_stream.get_ref().1.alpn_protocol();
|
||||
let is_h2 = alpn == Some(b"h2");
|
||||
|
||||
let svc = ConnectInfoService {
|
||||
inner: router.into_service::<hyper::body::Incoming>(),
|
||||
inner: router.into_service::<Incoming>(),
|
||||
remote_addr,
|
||||
};
|
||||
|
||||
let svc = TowerToHyperService::new(svc);
|
||||
|
||||
let io = hyper_util::rt::TokioIo::new(tls_stream);
|
||||
|
||||
if let Err(e) = hyper_util::server::conn::auto::Builder::new(TokioExecutor::new())
|
||||
.serve_connection_with_upgrades(io, svc)
|
||||
.await
|
||||
{
|
||||
if let Some(hyper_err) = e.downcast_ref::<hyper::Error>() {
|
||||
if hyper_err.is_incomplete_message() {
|
||||
return;
|
||||
}
|
||||
if is_h2 {
|
||||
let mut builder = hyper::server::conn::http2::Builder::new(TokioExecutor::new());
|
||||
if let Err(e) = builder
|
||||
.enable_connect_protocol()
|
||||
.serve_connection(io, svc)
|
||||
.await
|
||||
{
|
||||
error!(error = %e, "HTTPS/2 connection error");
|
||||
}
|
||||
} else {
|
||||
let mut builder = hyper_util::server::conn::auto::Builder::new(TokioExecutor::new());
|
||||
builder.http2().enable_connect_protocol();
|
||||
if let Err(e) = builder
|
||||
.serve_connection_with_upgrades(io, svc)
|
||||
.await
|
||||
{
|
||||
if let Some(hyper_err) = e.downcast_ref::<hyper::Error>() {
|
||||
if hyper_err.is_incomplete_message() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
error!(error = %e, "HTTPS connection error");
|
||||
}
|
||||
error!(error = %e, "HTTPS connection error");
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -125,7 +149,7 @@ pub async fn drain_in_flight(
|
||||
if start.elapsed() >= timeout {
|
||||
return in_flight.count.load(Ordering::SeqCst);
|
||||
}
|
||||
tokio::time::sleep(std::time::Duration::from_millis(50)).await;
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,11 +159,10 @@ struct ConnectInfoService<S> {
|
||||
remote_addr: SocketAddr,
|
||||
}
|
||||
|
||||
impl<S, B> Service<Request<B>> for ConnectInfoService<S>
|
||||
impl<S> Service<Request<Incoming>> for ConnectInfoService<S>
|
||||
where
|
||||
S: Service<Request<B>, Response = Response> + Clone + Send + 'static,
|
||||
S: Service<Request<Incoming>, Response = Response> + Clone + Send + 'static,
|
||||
S::Future: Send + 'static,
|
||||
B: Send + 'static,
|
||||
{
|
||||
type Response = S::Response;
|
||||
type Error = S::Error;
|
||||
@@ -152,7 +175,7 @@ where
|
||||
self.inner.poll_ready(cx)
|
||||
}
|
||||
|
||||
fn call(&mut self, mut req: Request<B>) -> Self::Future {
|
||||
fn call(&mut self, mut req: Request<Incoming>) -> Self::Future {
|
||||
req.extensions_mut().insert(ConnectInfo(self.remote_addr));
|
||||
self.inner.call(req)
|
||||
}
|
||||
|
||||
@@ -8,10 +8,10 @@ use tracing::info;
|
||||
use super::acme::{spawn_acme_state, AcmeTlsConfig};
|
||||
use super::config::crypto_provider;
|
||||
use crate::config::static_config::TlsConfig;
|
||||
use crate::shutdown::GracefulShutdown;
|
||||
|
||||
const ACME_TLS_ALPN_01: &[u8] = b"acme-tls/1";
|
||||
|
||||
#[allow(dead_code)]
|
||||
fn build_acme_server_config(
|
||||
resolver: Arc<rustls_acme::ResolvesServerCertAcme>,
|
||||
) -> Result<Arc<ServerConfig>> {
|
||||
@@ -30,7 +30,6 @@ fn build_acme_server_config(
|
||||
Ok(Arc::new(config))
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[derive(Debug)]
|
||||
pub enum TlsMode {
|
||||
Manual(Arc<ServerConfig>),
|
||||
@@ -40,8 +39,7 @@ pub enum TlsMode {
|
||||
},
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn setup_tls(tls_config: &TlsConfig) -> Result<TlsMode> {
|
||||
pub fn setup_tls(tls_config: &TlsConfig, shutdown: Arc<GracefulShutdown>) -> Result<TlsMode> {
|
||||
match tls_config.mode.as_str() {
|
||||
"manual" => {
|
||||
if tls_config.cert_path.is_empty() {
|
||||
@@ -75,7 +73,7 @@ pub fn setup_tls(tls_config: &TlsConfig) -> Result<TlsMode> {
|
||||
|
||||
let default_config = build_acme_server_config(resolver.clone())?;
|
||||
|
||||
spawn_acme_state(state, tls_config.acme_domains.clone());
|
||||
spawn_acme_state(state, tls_config.acme_domains.clone(), shutdown);
|
||||
|
||||
info!(
|
||||
domains = ?tls_config.acme_domains,
|
||||
@@ -136,7 +134,8 @@ mod tests {
|
||||
cert_path: String::new(),
|
||||
key_path: "/some/key.pem".to_string(),
|
||||
};
|
||||
let result = setup_tls(&tls_config);
|
||||
let shutdown = Arc::new(GracefulShutdown::new(30));
|
||||
let result = setup_tls(&tls_config, shutdown);
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err().to_string();
|
||||
assert!(err.contains("cert_path"));
|
||||
@@ -153,7 +152,8 @@ mod tests {
|
||||
cert_path: "/some/cert.pem".to_string(),
|
||||
key_path: String::new(),
|
||||
};
|
||||
let result = setup_tls(&tls_config);
|
||||
let shutdown = Arc::new(GracefulShutdown::new(30));
|
||||
let result = setup_tls(&tls_config, shutdown);
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err().to_string();
|
||||
assert!(err.contains("key_path"));
|
||||
@@ -170,7 +170,8 @@ mod tests {
|
||||
cert_path: String::new(),
|
||||
key_path: String::new(),
|
||||
};
|
||||
let result = setup_tls(&tls_config);
|
||||
let shutdown = Arc::new(GracefulShutdown::new(30));
|
||||
let result = setup_tls(&tls_config, shutdown);
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err().to_string();
|
||||
assert!(err.contains("acme_domains"));
|
||||
@@ -187,7 +188,8 @@ mod tests {
|
||||
cert_path: String::new(),
|
||||
key_path: String::new(),
|
||||
};
|
||||
let result = setup_tls(&tls_config);
|
||||
let shutdown = Arc::new(GracefulShutdown::new(30));
|
||||
let result = setup_tls(&tls_config, shutdown);
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err().to_string();
|
||||
assert!(err.contains("acme_cache_dir"));
|
||||
@@ -204,7 +206,8 @@ mod tests {
|
||||
cert_path: String::new(),
|
||||
key_path: String::new(),
|
||||
};
|
||||
let result = setup_tls(&tls_config);
|
||||
let shutdown = Arc::new(GracefulShutdown::new(30));
|
||||
let result = setup_tls(&tls_config, shutdown);
|
||||
assert!(result.is_err());
|
||||
let err = result.unwrap_err().to_string();
|
||||
assert!(err.contains("unknown TLS mode"));
|
||||
|
||||
180
src/tls/acme.rs
180
src/tls/acme.rs
@@ -6,13 +6,12 @@ use rustls_acme::caches::DirCache;
|
||||
use rustls_acme::{AcmeConfig, AcmeState, EventError, EventOk, ResolvesServerCertAcme};
|
||||
use tracing::{error, info, warn};
|
||||
|
||||
#[allow(dead_code)]
|
||||
use crate::shutdown::GracefulShutdown;
|
||||
|
||||
const LETS_ENCRYPT_PRODUCTION_DIRECTORY: &str = "https://acme-v02.api.letsencrypt.org/directory";
|
||||
#[allow(dead_code)]
|
||||
const LETS_ENCRYPT_STAGING_DIRECTORY: &str =
|
||||
"https://acme-staging-v02.api.letsencrypt.org/directory";
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub struct AcmeTlsConfig {
|
||||
pub domains: Vec<String>,
|
||||
pub cache_dir: PathBuf,
|
||||
@@ -20,7 +19,6 @@ pub struct AcmeTlsConfig {
|
||||
pub contact: Vec<String>,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub struct AcmeTlsSetup {
|
||||
pub resolver: Arc<ResolvesServerCertAcme>,
|
||||
pub state: AcmeState<std::io::Error, std::io::Error>,
|
||||
@@ -52,7 +50,7 @@ impl AcmeTlsConfig {
|
||||
Ok(AcmeTlsSetup { resolver, state })
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[cfg(test)]
|
||||
pub fn directory_url(&self) -> &str {
|
||||
match self.directory.as_str() {
|
||||
"production" => LETS_ENCRYPT_PRODUCTION_DIRECTORY,
|
||||
@@ -62,97 +60,109 @@ impl AcmeTlsConfig {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn spawn_acme_state(
|
||||
state: AcmeState<std::io::Error, std::io::Error>,
|
||||
domains: Vec<String>,
|
||||
shutdown: Arc<GracefulShutdown>,
|
||||
) -> tokio::task::JoinHandle<()> {
|
||||
tokio::spawn(async move {
|
||||
use futures::StreamExt;
|
||||
let mut state = state;
|
||||
let mut shutdown_rx = shutdown.subscribe();
|
||||
loop {
|
||||
match state.next().await {
|
||||
Some(Ok(event)) => match event {
|
||||
EventOk::DeployedCachedCert => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: deployed cached certificate"
|
||||
);
|
||||
tokio::select! {
|
||||
event = state.next() => {
|
||||
match event {
|
||||
Some(Ok(event)) => match event {
|
||||
EventOk::DeployedCachedCert => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: deployed cached certificate"
|
||||
);
|
||||
}
|
||||
EventOk::DeployedNewCert => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: deployed new certificate"
|
||||
);
|
||||
}
|
||||
EventOk::CertCacheStore => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: certificate stored to cache"
|
||||
);
|
||||
}
|
||||
EventOk::AccountCacheStore => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: account stored to cache"
|
||||
);
|
||||
}
|
||||
},
|
||||
Some(Err(err)) => match &err {
|
||||
EventError::CertCacheLoad(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: certificate cache load failed"
|
||||
);
|
||||
}
|
||||
EventError::AccountCacheLoad(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: account cache load failed"
|
||||
);
|
||||
}
|
||||
EventError::CertCacheStore(e) => {
|
||||
warn!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: certificate cache store failed"
|
||||
);
|
||||
}
|
||||
EventError::AccountCacheStore(e) => {
|
||||
warn!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: account cache store failed"
|
||||
);
|
||||
}
|
||||
EventError::CachedCertParse(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: cached certificate parse failed"
|
||||
);
|
||||
}
|
||||
EventError::Order(e) => {
|
||||
warn!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: certificate order failed, will retry"
|
||||
);
|
||||
}
|
||||
EventError::NewCertParse(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: new certificate parse failed"
|
||||
);
|
||||
}
|
||||
},
|
||||
None => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: state machine ended"
|
||||
);
|
||||
break;
|
||||
}
|
||||
}
|
||||
EventOk::DeployedNewCert => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: deployed new certificate"
|
||||
);
|
||||
}
|
||||
EventOk::CertCacheStore => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: certificate stored to cache"
|
||||
);
|
||||
}
|
||||
EventOk::AccountCacheStore => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: account stored to cache"
|
||||
);
|
||||
}
|
||||
},
|
||||
Some(Err(err)) => match &err {
|
||||
EventError::CertCacheLoad(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: certificate cache load failed"
|
||||
);
|
||||
}
|
||||
EventError::AccountCacheLoad(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: account cache load failed"
|
||||
);
|
||||
}
|
||||
EventError::CertCacheStore(e) => {
|
||||
warn!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: certificate cache store failed"
|
||||
);
|
||||
}
|
||||
EventError::AccountCacheStore(e) => {
|
||||
warn!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: account cache store failed"
|
||||
);
|
||||
}
|
||||
EventError::CachedCertParse(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: cached certificate parse failed"
|
||||
);
|
||||
}
|
||||
EventError::Order(e) => {
|
||||
warn!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: certificate order failed, will retry"
|
||||
);
|
||||
}
|
||||
EventError::NewCertParse(e) => {
|
||||
error!(
|
||||
domains = ?domains,
|
||||
error = ?e,
|
||||
"ACME: new certificate parse failed"
|
||||
);
|
||||
}
|
||||
},
|
||||
None => {
|
||||
}
|
||||
_ = shutdown_rx.changed() => {
|
||||
info!(
|
||||
domains = ?domains,
|
||||
"ACME: state machine ended"
|
||||
"ACME: state machine shutting down"
|
||||
);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::sync::Arc;
|
||||
@@ -7,14 +6,11 @@ use anyhow::{bail, Context, Result};
|
||||
use rustls::crypto::aws_lc_rs::cipher_suite;
|
||||
use rustls::crypto::aws_lc_rs::{default_provider, kx_group};
|
||||
use rustls::pki_types::{CertificateDer, PrivateKeyDer};
|
||||
use rustls::server::{ClientHello, ResolvesServerCert};
|
||||
use rustls::sign::CertifiedKey;
|
||||
use rustls::version::{TLS12, TLS13};
|
||||
use rustls::ServerConfig;
|
||||
use rustls::SupportedCipherSuite;
|
||||
use rustls_pemfile;
|
||||
|
||||
#[allow(dead_code)]
|
||||
static RESTRICTED_CIPHER_SUITES: &[SupportedCipherSuite] = &[
|
||||
cipher_suite::TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
|
||||
cipher_suite::TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
|
||||
@@ -68,54 +64,14 @@ pub fn build_manual_server_config(cert_path: &str, key_path: &str) -> Result<Ser
|
||||
.with_single_cert(certs, key)
|
||||
.with_context(|| "failed to configure certificate/key pair")?;
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
pub fn build_multi_domain_server_config(
|
||||
domain_certs: &HashMap<String, (Vec<CertificateDer<'static>>, PrivateKeyDer<'static>)>,
|
||||
) -> Result<ServerConfig> {
|
||||
let provider = crypto_provider();
|
||||
|
||||
let mut resolver = SniCertResolver::new();
|
||||
for (domain, (certs, key)) in domain_certs {
|
||||
let certified_key = CertifiedKey::from_der(certs.clone(), key.clone_key(), &provider)
|
||||
.with_context(|| format!("failed to load cert/key for domain {domain}"))?;
|
||||
resolver.add(domain, Arc::new(certified_key));
|
||||
}
|
||||
|
||||
let config = ServerConfig::builder_with_provider(provider)
|
||||
.with_protocol_versions(&[&TLS12, &TLS13])
|
||||
.with_context(|| "failed to set protocol versions")?
|
||||
.with_no_client_auth()
|
||||
.with_cert_resolver(Arc::new(resolver));
|
||||
let mut config = config;
|
||||
// Advertise HTTP/2 and HTTP/1.1 via ALPN so clients can negotiate HTTP/2.
|
||||
// Note: acme-tls/1 is NOT included here — it's only needed for ACME mode.
|
||||
config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()];
|
||||
|
||||
Ok(config)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SniCertResolver {
|
||||
entries: HashMap<String, Arc<CertifiedKey>>,
|
||||
}
|
||||
|
||||
impl SniCertResolver {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
entries: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, domain: &str, certified_key: Arc<CertifiedKey>) {
|
||||
self.entries.insert(domain.to_lowercase(), certified_key);
|
||||
}
|
||||
}
|
||||
|
||||
impl ResolvesServerCert for SniCertResolver {
|
||||
fn resolve(&self, client_hello: ClientHello<'_>) -> Option<Arc<CertifiedKey>> {
|
||||
let server_name = client_hello.server_name()?;
|
||||
self.entries.get(&server_name.to_lowercase()).cloned()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -255,55 +211,6 @@ mod tests {
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sni_resolver_known_domain() {
|
||||
let (certs, key) = generate_test_cert("example.com");
|
||||
let provider = crypto_provider();
|
||||
let certified_key = CertifiedKey::from_der(certs, key, &provider).unwrap();
|
||||
let mut resolver = SniCertResolver::new();
|
||||
resolver.add("example.com", Arc::new(certified_key));
|
||||
|
||||
let resolved = resolver.entries.get("example.com");
|
||||
assert!(resolved.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sni_resolver_unknown_domain_returns_none() {
|
||||
let (certs, key) = generate_test_cert("example.com");
|
||||
let provider = crypto_provider();
|
||||
let certified_key = CertifiedKey::from_der(certs, key, &provider).unwrap();
|
||||
let mut resolver = SniCertResolver::new();
|
||||
resolver.add("example.com", Arc::new(certified_key));
|
||||
|
||||
let resolved = resolver.entries.get("unknown.com");
|
||||
assert!(resolved.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sni_resolver_case_insensitive() {
|
||||
let (certs, key) = generate_test_cert("Example.COM");
|
||||
let provider = crypto_provider();
|
||||
let certified_key = CertifiedKey::from_der(certs, key, &provider).unwrap();
|
||||
let mut resolver = SniCertResolver::new();
|
||||
resolver.add("Example.COM", Arc::new(certified_key));
|
||||
|
||||
assert!(resolver.entries.contains_key("example.com"));
|
||||
assert!(!resolver.entries.contains_key("Example.COM"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_multi_domain_server_config() {
|
||||
let (certs1, key1) = generate_test_cert("site1.example.com");
|
||||
let (certs2, key2) = generate_test_cert("site2.example.com");
|
||||
|
||||
let mut domain_certs = HashMap::new();
|
||||
domain_certs.insert("site1.example.com".to_string(), (certs1, key1));
|
||||
domain_certs.insert("site2.example.com".to_string(), (certs2, key2));
|
||||
|
||||
let config = build_multi_domain_server_config(&domain_certs).unwrap();
|
||||
assert!(!config.ignore_client_order);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_certs_empty_file() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
68
tasks/fix/acme-contact-validation.md
Normal file
68
tasks/fix/acme-contact-validation.md
Normal file
@@ -0,0 +1,68 @@
|
||||
---
|
||||
id: fix/acme-contact-validation
|
||||
name: Validate ACME contact email has non-empty address with @ sign (W2)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: trivial
|
||||
impact: isolated
|
||||
level: implementation
|
||||
review_findings: [W2]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
The ACME contact validation only checks that `contact.starts_with("mailto:")`
|
||||
but doesn't verify there's an actual email address after the prefix. Values
|
||||
like `mailto:` (empty email) pass validation but will fail at the Let's Encrypt
|
||||
API at certificate provisioning time — after the proxy has already started.
|
||||
|
||||
The spec (config.md validation rule 19) now requires: "a valid `mailto:` URI
|
||||
with a non-empty email address containing an `@` sign."
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/config/validation.rs`** — ACME contact validation (lines 148-153):
|
||||
- After checking `starts_with("mailto:")`, validate the email part:
|
||||
```rust
|
||||
let contact = &listener.tls.acme_contact;
|
||||
if contact.is_empty() || !contact.starts_with("mailto:") {
|
||||
errors.push(ValidationError::AcmeContactInvalid {
|
||||
bind_addr: listener.bind_addr.clone(),
|
||||
});
|
||||
} else {
|
||||
let email = &contact[7..]; // after "mailto:"
|
||||
if email.is_empty() || !email.contains('@') {
|
||||
errors.push(ValidationError::AcmeContactInvalid {
|
||||
bind_addr: listener.bind_addr.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
- Add tests for:
|
||||
- Valid: `mailto:admin@example.com`
|
||||
- Invalid: `mailto:` (empty), `mailto:user` (no @), empty string, non-mailto
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `mailto:` (empty email) is rejected by validation
|
||||
- [ ] `mailto:user` (no @ sign) is rejected by validation
|
||||
- [ ] `mailto:admin@example.com` still passes validation
|
||||
- [ ] Non-mailto values still rejected
|
||||
- [ ] New unit tests for the tightened validation
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/config.md — validation rule 19
|
||||
- docs/reviews/003-security-and-bug-review.md — W2 finding
|
||||
- src/config/validation.rs — ACME contact validation, existing tests
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
id: fix/add-code-comments
|
||||
name: Add clarifying code comments for correct-but-non-obvious behaviors
|
||||
status: pending
|
||||
status: completed
|
||||
depends_on: [fix/remove-health-and-hardcode-https]
|
||||
scope: narrow
|
||||
risk: trivial
|
||||
|
||||
52
tasks/fix/admin-socket-reload-mutex-visibility.md
Normal file
52
tasks/fix/admin-socket-reload-mutex-visibility.md
Normal file
@@ -0,0 +1,52 @@
|
||||
---
|
||||
id: fix/admin-socket-reload-mutex-visibility
|
||||
name: Gate AdminSocket::reload_mutex with #[cfg(test)] (W11)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: trivial
|
||||
impact: isolated
|
||||
level: implementation
|
||||
review_findings: [W11]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
`AdminSocket::reload_mutex()` is a public method that exists solely for the
|
||||
`test_reload_serialized_with_mutex` test. It exposes an internal synchronization
|
||||
primitive, and the test acquires the mutex before sending a reload command —
|
||||
coupling the test to implementation details.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/admin/socket.rs`**:
|
||||
- Gate `reload_mutex()` with `#[cfg(test)]`:
|
||||
```rust
|
||||
#[cfg(test)]
|
||||
pub fn reload_mutex(&self) -> Arc<Mutex<()>> {
|
||||
self.reload_mutex.clone()
|
||||
}
|
||||
```
|
||||
- The existing test `test_reload_serialized_with_mutex` already uses this
|
||||
method, so it will continue to work.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `reload_mutex()` is only available in test builds (`#[cfg(test)]`)
|
||||
- [ ] The `test_reload_serialized_with_mutex` test still compiles and passes
|
||||
- [ ] `cargo clippy` passes with no warnings in non-test build
|
||||
|
||||
## References
|
||||
|
||||
- docs/reviews/003-security-and-bug-review.md — W11 finding
|
||||
- src/admin/socket.rs — `AdminSocket::reload_mutex()`, test
|
||||
|
||||
## Notes
|
||||
|
||||
> The review suggests alternatively removing the method entirely and testing
|
||||
> serialization through observable behavior. For Phase 1, gating with
|
||||
> `#[cfg(test)]` is the simpler fix that preserves the existing test.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
100
tasks/fix/admin-socket-resource-limits.md
Normal file
100
tasks/fix/admin-socket-resource-limits.md
Normal file
@@ -0,0 +1,100 @@
|
||||
---
|
||||
id: fix/admin-socket-resource-limits
|
||||
name: Add read timeout and line length limit to admin socket (ADR-027)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: low
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [W4, S4]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
The admin socket's `handle_connection` reads one newline-terminated line with
|
||||
`reader.read_line(&mut line)` but sets no timeout and no length limit. This
|
||||
allows:
|
||||
1. A client to connect and send no data, holding a connection indefinitely
|
||||
2. A client to send unbounded data without a newline, causing OOM
|
||||
|
||||
ADR-027 specifies: 5-second read timeout, 4096 byte line length limit.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/admin/socket.rs`** — `handle_connection` function (lines 166-210):
|
||||
- Wrap the `BufReader` with `tokio::io::take` to limit read size to 4096 bytes:
|
||||
```rust
|
||||
let (reader, mut writer) = stream.into_split();
|
||||
let mut reader = BufReader::new(tokio::io::take(reader, 4096));
|
||||
let mut line = String::new();
|
||||
```
|
||||
- Wrap the `read_line` call in a `tokio::time::timeout`:
|
||||
```rust
|
||||
use std::time::Duration;
|
||||
let read_result = tokio::time::timeout(
|
||||
Duration::from_secs(5),
|
||||
reader.read_line(&mut line),
|
||||
).await;
|
||||
```
|
||||
- Handle timeout and line-too-long cases:
|
||||
```rust
|
||||
match read_result {
|
||||
Ok(Ok(0)) | Ok(Err(_)) => {
|
||||
// existing "invalid input" handling
|
||||
}
|
||||
Err(_) => {
|
||||
// timeout
|
||||
tracing::debug!("admin socket connection timed out");
|
||||
let _ = writer.write_all(
|
||||
format!("{}\n", serde_json::to_string(&ErrorResponse {
|
||||
status: "error",
|
||||
message: "read timeout".to_string(),
|
||||
}).unwrap()).as_bytes()
|
||||
).await;
|
||||
return;
|
||||
}
|
||||
Ok(Ok(n)) => {
|
||||
// Check if line was truncated (no newline found within limit)
|
||||
if !line.ends_with('\n') && n > 0 {
|
||||
tracing::warn!("admin socket command exceeded 4096 byte limit");
|
||||
let _ = writer.write_all(
|
||||
format!("{}\n", serde_json::to_string(&ErrorResponse {
|
||||
status: "error",
|
||||
message: "command too long".to_string(),
|
||||
}).unwrap()).as_bytes()
|
||||
).await;
|
||||
return;
|
||||
}
|
||||
// ... existing command handling
|
||||
}
|
||||
}
|
||||
```
|
||||
- Update existing tests and add new tests for timeout and line length limit.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Read timeout of 5 seconds applied to admin socket connections
|
||||
- [ ] Line length limit of 4096 bytes applied (via `tokio::io::take`)
|
||||
- [ ] Timeout logged at `debug` level per ADR-027
|
||||
- [ ] Line-too-long logged at `warn` level per ADR-027
|
||||
- [ ] Both conditions return appropriate error JSON to the client
|
||||
- [ ] Legitimate commands (`reload`, `status`) still work
|
||||
- [ ] New tests for timeout and line length limit behavior
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/decisions/027-admin-socket-resource-limits.md — ADR-027
|
||||
- docs/architecture/operations.md — admin socket resource limits
|
||||
- docs/reviews/003-security-and-bug-review.md — W4, S4 findings
|
||||
- src/admin/socket.rs — `handle_connection`
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
id: fix/clean-dead-code
|
||||
name: Remove dead_code annotations and add #[non_exhaustive] to public enums
|
||||
status: pending
|
||||
status: completed
|
||||
depends_on: [fix/acme-contact-and-challenge]
|
||||
scope: narrow
|
||||
risk: low
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
id: fix/connect-timeout
|
||||
name: Wire upstream_connect_timeout_secs to enforce separate connect timeout
|
||||
status: pending
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: medium
|
||||
|
||||
58
tasks/fix/connector-timeout-ceiling.md
Normal file
58
tasks/fix/connector-timeout-ceiling.md
Normal file
@@ -0,0 +1,58 @@
|
||||
---
|
||||
id: fix/connector-timeout-ceiling
|
||||
name: Raise connector timeout ceiling to 30s per ADR-026
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: low
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [C3]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
The HTTP connector's `set_connect_timeout` is hardcoded to 5 seconds. Per-site
|
||||
connect timeout values > 5s are silently capped because the connector's internal
|
||||
timeout fires before the `tokio::time::timeout` wrapper.
|
||||
|
||||
ADR-026 establishes a 30-second ceiling on the connector. The per-site
|
||||
`tokio::time::timeout` enforces the actual per-site connect timeout. The
|
||||
connector ceiling is a safety backstop, not the primary enforcement mechanism.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/proxy/handler.rs`**:
|
||||
- Change `DEFAULT_CONNECT_TIMEOUT_SECS` from 5 to 30:
|
||||
```rust
|
||||
const CONNECT_TIMEOUT_CEILING_SECS: u64 = 30;
|
||||
```
|
||||
- Rename the constant to make its role clear (ceiling, not the default connect
|
||||
timeout for sites).
|
||||
- Update both `create_http_client` and `create_https_client` to use the renamed
|
||||
constant.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Connector `set_connect_timeout` is set to 30 seconds
|
||||
- [ ] Constant is named to reflect its ceiling role (not "default")
|
||||
- [ ] Per-site connect timeout (default 5s) via `tokio::time::timeout` is unchanged
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/decisions/026-connector-timeout-ceiling.md — ADR-026
|
||||
- docs/architecture/proxy.md — Upstream connection section
|
||||
- docs/reviews/003-security-and-bug-review.md — C3 finding
|
||||
- src/proxy/handler.rs — `create_http_client`, `create_https_client`
|
||||
|
||||
## Notes
|
||||
|
||||
> The previous `fix/connect-timeout` task wired the two-phase timeout approach.
|
||||
> This task completes that work by raising the connector ceiling so per-site
|
||||
> timeouts > 5s actually work.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
95
tasks/fix/consolidate-config-types.md
Normal file
95
tasks/fix/consolidate-config-types.md
Normal file
@@ -0,0 +1,95 @@
|
||||
---
|
||||
id: fix/consolidate-config-types
|
||||
name: Delete RawConfig and use FullConfig in load_config (W6, S5)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: medium
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [W6, S5]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
`RawConfig` (in `src/cli.rs`) and `FullConfig` (in `src/config/mod.rs`) have
|
||||
identical fields and identical serde attributes. They exist because the initial
|
||||
load path manually constructs `StaticConfig` + `SerializableDynamicConfig`,
|
||||
while the reload path uses `FullConfig::into_static_and_dynamic()`. Any new
|
||||
config field must be added in two places.
|
||||
|
||||
The fix is to delete `RawConfig` and use `FullConfig` in `load_config`. The
|
||||
`collect_sites` helper can also be removed since `into_static_and_dynamic`
|
||||
already collects sites from all listeners.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/cli.rs`**:
|
||||
- Delete the `RawConfig` struct (lines 49-65)
|
||||
- Rewrite `load_config` to use `FullConfig`:
|
||||
```rust
|
||||
pub fn load_config(cli: &Cli) -> Result<LoadedConfig> {
|
||||
let config_path = Path::new(&cli.config);
|
||||
let config_content = std::fs::read_to_string(config_path)
|
||||
.with_context(|| format!("failed to read config file: {}", cli.config))?;
|
||||
|
||||
let full_config = crate::config::FullConfig::parse(&config_content)
|
||||
.with_context(|| format!("failed to parse config file: {}", cli.config))?;
|
||||
|
||||
let (static_config, dynamic_config) = full_config.into_static_and_dynamic();
|
||||
|
||||
let allow_wildcard_bind = static_config.allow_wildcard_bind || cli.allow_wildcard_bind;
|
||||
|
||||
validate(&static_config, &dynamic_config, cli.allow_wildcard_bind).map_err(|errors| {
|
||||
anyhow::anyhow!(
|
||||
"config validation failed:\n{}",
|
||||
errors.iter()
|
||||
.map(|e| format!(" - {}", e))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(LoadedConfig {
|
||||
static_config,
|
||||
dynamic_config,
|
||||
allow_wildcard_bind,
|
||||
})
|
||||
}
|
||||
```
|
||||
- Delete the `collect_sites` helper function (lines 112-118)
|
||||
- Remove the now-unused imports: `SerializableDynamicConfig`, `BodyConfig`,
|
||||
`RateLimitConfig` (if they're only used via `RawConfig`)
|
||||
|
||||
**`src/config/mod.rs`**:
|
||||
- No changes needed — `FullConfig` already has `into_static_and_dynamic()`
|
||||
- Verify `FullConfig::parse` and `into_static_and_dynamic` produce identical
|
||||
results to the old `RawConfig` path
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `RawConfig` struct deleted from `src/cli.rs`
|
||||
- [ ] `collect_sites` function deleted from `src/cli.rs`
|
||||
- [ ] `load_config` uses `FullConfig::parse` + `into_static_and_dynamic`
|
||||
- [ ] Startup config loading produces same results as before
|
||||
- [ ] Config validation still runs on both startup and reload
|
||||
- [ ] All existing `cli.rs` tests pass
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/config.md — config reload, FullConfig
|
||||
- docs/reviews/003-security-and-bug-review.md — W6, S5 findings
|
||||
- src/cli.rs — RawConfig, load_config, collect_sites
|
||||
- src/config/mod.rs — FullConfig, into_static_and_dynamic
|
||||
|
||||
## Notes
|
||||
|
||||
> This changes the startup config parsing path. While the behavior should be
|
||||
> identical (same fields, same serde attributes), this is a sensitive area. A
|
||||
> review task follows the code quality generation.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
id: fix/graceful-shutdown
|
||||
name: Fix shutdown to drain listeners and stop background tasks cleanly
|
||||
status: pending
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: moderate
|
||||
risk: medium
|
||||
|
||||
77
tasks/fix/http-port-type-u16.md
Normal file
77
tasks/fix/http-port-type-u16.md
Normal file
@@ -0,0 +1,77 @@
|
||||
---
|
||||
id: fix/http-port-type-u16
|
||||
name: Change http_port type from u32 to u16 per spec (W12)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: low
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [W12]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
`http_port` is declared as `u32` in `ListenerConfig` but `https_port` is `u16`.
|
||||
Both represent TCP port numbers (valid range 1–65535). The type inconsistency
|
||||
means comparisons require casting (`listener.http_port == listener.https_port as
|
||||
u32`) and `http_port` could theoretically hold values > 65535 caught by
|
||||
validation rather than the type system.
|
||||
|
||||
The spec (config.md) now declares `http_port` as `u16`.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/config/static_config.rs`**:
|
||||
- Change `http_port` field type from `u32` to `u16` in `ListenerConfig`
|
||||
- Update `default_http_port()` return type to `u16`
|
||||
|
||||
**`src/config/validation.rs`**:
|
||||
- Change `DuplicateHttpBind` error type: `http_port: u32` → `http_port: u16`
|
||||
- Change `HttpsAndHttpPortSame` error type: `http_port: u32` → `http_port: u16`
|
||||
- Change `HttpPortInvalid` error type: `http_port: u32` → `http_port: u16`
|
||||
- Remove `as u32` casts — both `http_port` and `https_port` are now `u16`
|
||||
- Remove the `http_port > 65535` check (impossible with `u16`, but keep `http_port > 0`
|
||||
for the "disabled" check)
|
||||
- Update comparison: `listener.http_port == listener.https_port` (no cast needed)
|
||||
- Update health check port comparison: remove `as u32` cast
|
||||
|
||||
**`src/main.rs`**:
|
||||
- Update any `http_port` references that assume `u32`
|
||||
|
||||
**`src/cli.rs`**:
|
||||
- Update `RawConfig.http_port` type from `u32` to `u16` (if `RawConfig` still
|
||||
exists after `fix/consolidate-config-types`; if not, this file is unaffected)
|
||||
|
||||
**`src/config/test_fixtures.rs`**:
|
||||
- Update any test fixture `http_port` values from `u32` to `u16`
|
||||
|
||||
**`tests/integration_test.rs`**:
|
||||
- Update any hardcoded `http_port` values
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `http_port` is `u16` in `ListenerConfig`
|
||||
- [ ] All `as u32` casts on `http_port` removed
|
||||
- [ ] `http_port > 65535` validation check removed (impossible with u16)
|
||||
- [ ] `http_port == https_port` comparison works without casting
|
||||
- [ ] All validation tests pass
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/config.md — `http_port` type declaration
|
||||
- docs/reviews/003-security-and-bug-review.md — W12 finding
|
||||
- src/config/static_config.rs — `ListenerConfig` struct
|
||||
- src/config/validation.rs — validation rules, error types
|
||||
|
||||
## Notes
|
||||
|
||||
> If `fix/consolidate-config-types` runs first and removes `RawConfig`, the
|
||||
> `src/cli.rs` changes in this task are reduced. The two tasks are independent
|
||||
> in terms of the type change itself.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
74
tasks/fix/inflight-counter-increment.md
Normal file
74
tasks/fix/inflight-counter-increment.md
Normal file
@@ -0,0 +1,74 @@
|
||||
---
|
||||
id: fix/inflight-counter-increment
|
||||
name: Fix InFlightCounter to increment before spawning task (C2 + drain interval)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: medium
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [C2]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
`InFlightCounter::increment()` is never called anywhere in the codebase. The
|
||||
`InFlightGuard` only decrements on drop. Since `count` stays at 0, the first
|
||||
guard drop does `fetch_sub(1)` on an `AtomicUsize` with value 0, which wraps to
|
||||
`usize::MAX`. `is_zero()` checks `count == 0`, which never becomes true again.
|
||||
The drain logic in `drain_in_flight` always times out.
|
||||
|
||||
The spec (operations.md shutdown sequence) states: "each request **must**
|
||||
increment the counter when it begins and decrement when it completes (via guard
|
||||
drop). The increment must happen before the request task is spawned."
|
||||
|
||||
Additionally, the spec states the drain polls every 100ms, but the current
|
||||
implementation uses 50ms. Align with the spec.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/server.rs`**:
|
||||
- Fold the increment into `InFlightGuard::new()` so it's impossible to forget:
|
||||
```rust
|
||||
impl InFlightGuard {
|
||||
fn new(counter: Arc<InFlightCounter>) -> Self {
|
||||
counter.increment();
|
||||
Self(counter)
|
||||
}
|
||||
}
|
||||
```
|
||||
- Update `serve_https_listener` to use `InFlightGuard::new(in_flight.clone())`
|
||||
instead of `InFlightGuard(in_flight.clone())`.
|
||||
- Make `InFlightGuard`'s tuple struct private (if it isn't already) so callers
|
||||
must use `new()`.
|
||||
|
||||
**`src/server.rs` — `drain_in_flight`**:
|
||||
- Change polling interval from 50ms to 100ms per the spec (operations.md):
|
||||
```rust
|
||||
tokio::time::sleep(std::time::Duration::from_millis(100)).await;
|
||||
```
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `InFlightGuard::new()` calls `counter.increment()` before returning
|
||||
- [ ] `InFlightGuard` is constructed via `new()` only, not the tuple struct
|
||||
- [ ] `serve_https_listener` uses `InFlightGuard::new(in_flight.clone())`
|
||||
- [ ] `drain_in_flight` polls every 100ms (not 50ms)
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/operations.md — shutdown sequence, in-flight counter
|
||||
- docs/reviews/003-security-and-bug-review.md — C2 finding
|
||||
- src/server.rs — InFlightCounter, InFlightGuard, drain_in_flight
|
||||
- src/main.rs — drain_in_flight caller
|
||||
|
||||
## Notes
|
||||
|
||||
> The previous `fix/graceful-shutdown` task addressed the abort-vs-join logic
|
||||
> but did not fix the increment bug. This task completes that work.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
58
tasks/fix/json-format-without-logfile.md
Normal file
58
tasks/fix/json-format-without-logfile.md
Normal file
@@ -0,0 +1,58 @@
|
||||
---
|
||||
id: fix/json-format-without-logfile
|
||||
name: Fix JSON format not applied when no log file is configured (C4)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: trivial
|
||||
impact: isolated
|
||||
level: implementation
|
||||
review_findings: [C4]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
When `format = "json"` is configured but no `log_file_path` is set, the `None`
|
||||
branch of `init_json` creates a layer **without** calling `.json()`. The output
|
||||
is plain text, not JSON. The `format = "json"` config value is silently ignored.
|
||||
|
||||
The spec (operations.md) states: "Both output destinations must respect the
|
||||
`format` config value: when `format = "json"`, both file and stdout output must
|
||||
use JSON formatting."
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/logging/mod.rs`** — `init_json` function, `None` branch (lines 54-58):
|
||||
- Add `.json()` to the stdout-only layer:
|
||||
```rust
|
||||
None => {
|
||||
let layer = tracing_subscriber::fmt::layer()
|
||||
.json()
|
||||
.with_ansi(false)
|
||||
.with_filter(env_filter);
|
||||
tracing_subscriber::registry().with(layer).try_init()?;
|
||||
}
|
||||
```
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `.json()` is called in the `None` branch of `init_json`
|
||||
- [ ] `format = "json"` with no `log_file_path` produces JSON output on stdout
|
||||
- [ ] `format = "json"` with `log_file_path` still produces JSON on both outputs
|
||||
- [ ] `format = "text"` paths are unchanged
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/operations.md — logging output, format guarantee
|
||||
- docs/reviews/003-security-and-bug-review.md — C4 finding
|
||||
- src/logging/mod.rs — `init_json` function
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
66
tasks/fix/log-root-cert-count.md
Normal file
66
tasks/fix/log-root-cert-count.md
Normal file
@@ -0,0 +1,66 @@
|
||||
---
|
||||
id: fix/log-root-cert-count
|
||||
name: Log system root certificate count at startup (S3)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: trivial
|
||||
impact: isolated
|
||||
level: implementation
|
||||
review_findings: [S3]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
`root_certs()` loads native certificates silently — only logs errors. If the
|
||||
system has zero root certificates (misconfigured CA bundle), all HTTPS upstream
|
||||
connections will fail with opaque TLS errors and no diagnostic message.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/proxy/handler.rs`** — `root_certs()` function (lines 246-258):
|
||||
- Add an info-level log with cert count and warn if zero:
|
||||
```rust
|
||||
fn root_certs() -> rustls::RootCertStore {
|
||||
let mut roots = rustls::RootCertStore::empty();
|
||||
let result = rustls_native_certs::load_native_certs();
|
||||
for cert in result.certs {
|
||||
roots.add(cert).ok();
|
||||
}
|
||||
let cert_count = roots.len();
|
||||
let error_count = result.errors.len();
|
||||
if cert_count == 0 {
|
||||
warn!(certs_loaded = cert_count, errors = error_count,
|
||||
"no system root certificates loaded — HTTPS upstream connections will fail");
|
||||
} else {
|
||||
info!(certs_loaded = cert_count, errors = error_count,
|
||||
"loaded system root certificates");
|
||||
}
|
||||
for err in &result.errors {
|
||||
warn!(error = %err, "failed to load native certificate");
|
||||
}
|
||||
roots
|
||||
}
|
||||
```
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Info-level log with cert count when certs > 0
|
||||
- [ ] Warn-level log when cert count is 0
|
||||
- [ ] Error count included in log output
|
||||
- [ ] Individual cert load errors still logged at warn level
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/reviews/003-security-and-bug-review.md — S3 finding
|
||||
- src/proxy/handler.rs — `root_certs()` function
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
67
tasks/fix/rate-limiter-connectinfo-tests.md
Normal file
67
tasks/fix/rate-limiter-connectinfo-tests.md
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
id: fix/rate-limiter-connectinfo-tests
|
||||
name: Update rate limiter tests to use ConnectInfo instead of X-Forwarded-For (S10)
|
||||
status: completed
|
||||
depends_on: [fix/rate-limiter-ip-source]
|
||||
scope: narrow
|
||||
risk: medium
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [S10]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
After `fix/rate-limiter-ip-source` removes X-Forwarded-For parsing from the rate
|
||||
limiter, existing integration tests that pass client IPs via `X-Forwarded-For`
|
||||
headers will no longer work correctly. The rate limiter now reads exclusively
|
||||
from `ConnectInfo<SocketAddr>`, so tests must provide `ConnectInfo` in request
|
||||
extensions.
|
||||
|
||||
This task also addresses review finding S10: "No test verifies the ConnectInfo
|
||||
extraction path, which is the primary path after C1 is fixed."
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`tests/integration_test.rs`**:
|
||||
- Find all rate limit integration tests (lines 90-163 and any others) that set
|
||||
`X-Forwarded-For` headers for rate limiting purposes
|
||||
- Update those tests to set `ConnectInfo<SocketAddr>` in request extensions
|
||||
instead of (or in addition to) `X-Forwarded-For`
|
||||
- Verify that `X-Forwarded-For` headers are now **ignored** by the rate limiter
|
||||
— add a test that sends requests with different `X-Forwarded-For` values from
|
||||
the same `ConnectInfo` IP and confirms they all count against the same bucket
|
||||
- Add a test that verifies requests **without** `ConnectInfo` are rejected with
|
||||
429 (per ADR-025)
|
||||
|
||||
**`src/rate_limit/mod.rs`** (test module):
|
||||
- Update any unit tests that rely on `X-Forwarded-For` extraction
|
||||
- Add a test that verifies `ConnectInfo`-based rate limiting works without any
|
||||
`X-Forwarded-For` header
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] All rate limit integration tests use `ConnectInfo` for client IP
|
||||
- [ ] Tests verify `X-Forwarded-For` headers are ignored by rate limiter
|
||||
- [ ] New test: requests without `ConnectInfo` are rejected with 429
|
||||
- [ ] New test: different `X-Forwarded-For` values from same `ConnectInfo` IP
|
||||
count against the same bucket (proving XFF is ignored)
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/decisions/025-rate-limiter-ip-source.md — ADR-025
|
||||
- docs/reviews/003-security-and-bug-review.md — S10 finding
|
||||
- tests/integration_test.rs — rate limit tests
|
||||
- src/rate_limit/mod.rs — rate limiter middleware and tests
|
||||
|
||||
## Notes
|
||||
|
||||
> This task depends on `fix/rate-limiter-ip-source` because the code changes
|
||||
> must be in place before the tests can be updated. Attempting this before the
|
||||
> C1 fix would require writing tests for the old (broken) behavior.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
78
tasks/fix/rate-limiter-ip-source.md
Normal file
78
tasks/fix/rate-limiter-ip-source.md
Normal file
@@ -0,0 +1,78 @@
|
||||
---
|
||||
id: fix/rate-limiter-ip-source
|
||||
name: Fix rate limiter to use ConnectInfo only (ADR-025)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: high
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [C1]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
The rate limiter currently extracts the client IP by checking the `X-Forwarded-For`
|
||||
header **first**, then falling back to `ConnectInfo`. This is a security
|
||||
vulnerability: since the rate limiter runs as middleware before the proxy
|
||||
handler injects trusted headers, the `X-Forwarded-For` value is whatever the
|
||||
client sent — completely untrusted. This enables two attack vectors:
|
||||
|
||||
1. **Rate limit bypass**: Attacker sends each request with a different random
|
||||
`X-Forwarded-For` value, evading per-IP token buckets entirely.
|
||||
2. **DoS via IP spoofing**: Attacker sends requests with `X-Forwarded-For:
|
||||
<victim IP>`, depleting the victim's bucket.
|
||||
|
||||
ADR-025 establishes that the rate limiter must use `ConnectInfo<SocketAddr>` as
|
||||
the **sole** source of client IP. If `ConnectInfo` is absent, the request must
|
||||
be **rejected** (not fall back to an untrusted header).
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/rate_limit/mod.rs`**:
|
||||
- Replace the current IP extraction logic in `rate_limit_middleware` (lines 66-76)
|
||||
with ConnectInfo-first, no fallback:
|
||||
```rust
|
||||
let client_ip = req
|
||||
.extensions()
|
||||
.get::<axum::extract::ConnectInfo<std::net::SocketAddr>>()
|
||||
.map(|ci| ci.ip());
|
||||
```
|
||||
- When `ConnectInfo` is absent, return 429 (reject the request) rather than
|
||||
passing through without rate limiting. Per ADR-025: "If ConnectInfo is absent,
|
||||
the request must be rejected rather than falling back to an untrusted header."
|
||||
```rust
|
||||
let Some(ip) = client_ip else {
|
||||
return (StatusCode::TOO_MANY_REQUESTS, "Too Many Requests").into_response();
|
||||
};
|
||||
```
|
||||
- Remove all `X-Forwarded-For` header parsing from the rate limiter middleware.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Rate limiter extracts client IP from `ConnectInfo<SocketAddr>` only
|
||||
- [ ] No `X-Forwarded-For` header parsing in the rate limiter middleware
|
||||
- [ ] Requests without `ConnectInfo` are rejected with 429 (not passed through)
|
||||
- [ ] `cargo test` passes (note: integration tests that pass `X-Forwarded-For`
|
||||
for rate limiting will need to be updated in the separate test task
|
||||
`fix/rate-limiter-connectinfo-tests`)
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/decisions/025-rate-limiter-ip-source.md — ADR-025
|
||||
- docs/architecture/operations.md — Rate limiting design, IP source section
|
||||
- docs/architecture/proxy.md — Rate limiter IP source section
|
||||
- docs/reviews/003-security-and-bug-review.md — C1 finding
|
||||
- src/rate_limit/mod.rs — current implementation (lines 61-104)
|
||||
|
||||
## Notes
|
||||
|
||||
> This is the highest-priority security fix. After this change, the integration
|
||||
> tests in `tests/integration_test.rs` that rely on `X-Forwarded-For` for rate
|
||||
> limiting will need to be updated. That work is tracked separately in
|
||||
> `fix/rate-limiter-connectinfo-tests`.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
96
tasks/fix/remove-dead-code-remnants.md
Normal file
96
tasks/fix/remove-dead-code-remnants.md
Normal file
@@ -0,0 +1,96 @@
|
||||
---
|
||||
id: fix/remove-dead-code-remnants
|
||||
name: Remove dead code items identified in security review #003 (S1)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: trivial
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [S1]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
Security review #003 identifies the following dead code items that survived the
|
||||
previous `fix/clean-dead-code` task. These items are defined but never called in
|
||||
production code:
|
||||
|
||||
| Item | File | Note |
|
||||
|------|------|------|
|
||||
| `log_rate_limit!` macro | `src/logging/format.rs:72-83` | Rate limiter uses `warn!` directly |
|
||||
| `log_config_reload!` macro | `src/logging/format.rs:97-106` | Reload uses `info!`/`warn!` directly |
|
||||
| `format_event_fields()` | `src/logging/format.rs:50-54` | Never called |
|
||||
| `ProxyError::PayloadTooLarge` | `src/proxy/error.rs:12` | Body limit returns tuple directly |
|
||||
| `ProxyError::NotFound` | `src/proxy/error.rs:20` | Code uses `UnknownHost` instead |
|
||||
| `ProxyError::BadRequest` | `src/proxy/error.rs:22` | Code uses `MissingHost` instead |
|
||||
| `ProxyError::UpstreamTls` | `src/proxy/error.rs:28` | Never constructed |
|
||||
| `build_multi_domain_server_config()` | `src/tls/config.rs:78-102` | Never called |
|
||||
| `SniCertResolver` | `src/tls/config.rs:104-126` | Only used by above |
|
||||
| `AcmeTlsConfig::directory_url()` | `src/tls/acme.rs:53-59` | Only used in tests |
|
||||
|
||||
### Changes Required
|
||||
|
||||
For each item, either wire it up (if it should be used) or remove it:
|
||||
|
||||
**Logging macros** (`src/logging/format.rs`):
|
||||
- `log_rate_limit!`: The rate limiter middleware now uses `warn!` directly.
|
||||
Remove the macro and its test invocation in `log_macros_compile`.
|
||||
- `log_config_reload!`: The admin socket reload uses `info!`/`warn!` directly.
|
||||
Remove the macro and its test invocation.
|
||||
- `format_event_fields()`: Never called externally. Remove it.
|
||||
|
||||
**ProxyError variants** (`src/proxy/error.rs`):
|
||||
- `PayloadTooLarge`: Not used by body limit middleware (returns tuple directly).
|
||||
Keep for now with a comment explaining it's reserved for future use, OR remove
|
||||
if no plan to use it. The `#[non_exhaustive]` attribute means removing a variant
|
||||
is not a breaking change for external consumers.
|
||||
- `NotFound`: Superseded by `UnknownHost`. Remove and update `status_code()` and
|
||||
`body()` match arms.
|
||||
- `BadRequest`: Superseded by `MissingHost`. Remove and update match arms.
|
||||
- `UpstreamTls`: Never constructed. Remove and update match arms.
|
||||
- Update unit tests that reference removed variants.
|
||||
|
||||
**TLS dead code** (`src/tls/config.rs`, `src/tls/acme.rs`):
|
||||
- `build_multi_domain_server_config()`: Remove function.
|
||||
- `SniCertResolver`: Remove struct and impl (only used by above function).
|
||||
- `AcmeTlsConfig::directory_url()`: Check if it's used in tests. If only in
|
||||
tests, gate with `#[cfg(test)]`. If truly unused, remove.
|
||||
|
||||
**Test helpers** (`tests/helpers/http_test_helper.rs`):
|
||||
- `TestUpstream::url()`, `TestUpstream::upstream_addr()`: Currently annotated
|
||||
`#[allow(dead_code)]`. If not used in any test, remove. If used, remove the
|
||||
`#[allow(dead_code)]` annotation.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `log_rate_limit!` and `log_config_reload!` macros removed
|
||||
- [ ] `format_event_fields()` removed
|
||||
- [ ] Unused `ProxyError` variants removed (`NotFound`, `BadRequest`, `UpstreamTls`)
|
||||
- [ ] `PayloadTooLarge` either removed or documented with a TODO comment
|
||||
- [ ] `build_multi_domain_server_config()` and `SniCertResolver` removed
|
||||
- [ ] `directory_url()` either `#[cfg(test)]`-gated or removed
|
||||
- [ ] Dead test helper methods removed or `#[allow(dead_code)]` removed
|
||||
- [ ] All match arms in `ProxyError` methods updated for removed variants
|
||||
- [ ] All unit tests updated for removed variants
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/reviews/003-security-and-bug-review.md — S1 finding
|
||||
- src/logging/format.rs — dead macros and function
|
||||
- src/proxy/error.rs — unused ProxyError variants
|
||||
- src/tls/config.rs — dead TLS functions
|
||||
- src/tls/acme.rs — directory_url method
|
||||
- tests/helpers/http_test_helper.rs — dead_code-annotated helpers
|
||||
|
||||
## Notes
|
||||
|
||||
> The previous `fix/clean-dead-code` task added `#[non_exhaustive]` and removed
|
||||
> some dead code. This task addresses the items that survived that round or were
|
||||
> newly identified in review #003.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
57
tasks/fix/rename-misleading-test.md
Normal file
57
tasks/fix/rename-misleading-test.md
Normal file
@@ -0,0 +1,57 @@
|
||||
---
|
||||
id: fix/rename-misleading-test
|
||||
name: Rename misleading health check test and fix dynamic config test (W8, W9)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: trivial
|
||||
impact: isolated
|
||||
level: implementation
|
||||
review_findings: [W8, W9]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
Two test quality issues in `tests/integration_test.rs`:
|
||||
|
||||
**W8**: `test_health_check_disabled_when_port_zero` is misleading — port 0 means
|
||||
"OS picks a random port" not "disabled". The test verifies that
|
||||
`start_health_check_listener(0)` works (binds to a random port), which is
|
||||
correct but the name implies it tests the disable path. The actual disable
|
||||
logic is in `main.rs:94` where `health_check_port > 0` gates the call.
|
||||
|
||||
**W9**: `test_dynamic_config_with_limit` constructs `DynamicConfig` directly
|
||||
with `routing_table: Default::default()` (empty HashMap), bypassing
|
||||
`DynamicConfig::from_sites()`. The body limit tests only read
|
||||
`body.limit_bytes` so it doesn't matter, but it's a latent trap for anyone
|
||||
copying this pattern.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`tests/integration_test.rs`**:
|
||||
- Rename `test_health_check_disabled_when_port_zero` to
|
||||
`test_health_check_binds_random_port_when_zero`
|
||||
- Update `test_dynamic_config_with_limit` to use `DynamicConfig::from_sites()`
|
||||
instead of directly constructing with an empty routing table, OR add a
|
||||
comment explaining why the routing table is intentionally empty for this test
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Test renamed to `test_health_check_binds_random_port_when_zero`
|
||||
- [ ] `test_dynamic_config_with_limit` either uses `from_sites()` or has a
|
||||
comment explaining the intentional empty routing table
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/reviews/003-security-and-bug-review.md — W8, W9 findings
|
||||
- tests/integration_test.rs — test names and implementations
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
62
tasks/fix/tls-mode-wildcard-mismatch.md
Normal file
62
tasks/fix/tls-mode-wildcard-mismatch.md
Normal file
@@ -0,0 +1,62 @@
|
||||
---
|
||||
id: fix/tls-mode-wildcard-mismatch
|
||||
name: Add explicit listener/acceptor count check or remove TlsMode wildcard (W5)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: trivial
|
||||
impact: isolated
|
||||
level: implementation
|
||||
review_findings: [W5]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
The `match tls_mode` in `main.rs` has a wildcard `_` arm that logs a warning
|
||||
and pushes **no** acceptor. Then `bound_listeners.into_iter().zip(tls_acceptors.into_iter())`
|
||||
uses `zip`, which silently stops at the shorter iterator. If the wildcard arm
|
||||
were ever reached, some listeners would have no TLS acceptor and would be
|
||||
silently dropped.
|
||||
|
||||
`setup_tls` already rejects unknown modes with `bail!`, so the wildcard is
|
||||
unreachable in practice. But it's a latent bug for future refactors.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/main.rs`** (lines 170-194):
|
||||
- Option A (preferred): Remove the wildcard `_` arm entirely. Since `TlsMode`
|
||||
only has two variants (`Manual` and `Acme`) and `setup_tls` already validates,
|
||||
the wildcard is dead code. Removing it means the compiler will catch future
|
||||
`TlsMode` additions.
|
||||
|
||||
- Option B: Add an explicit count check after the match loop:
|
||||
```rust
|
||||
if bound_listeners.len() != tls_acceptors.len() {
|
||||
anyhow::bail!("listener/acceptor count mismatch: {} listeners, {} acceptors",
|
||||
bound_listeners.len(), tls_acceptors.len());
|
||||
}
|
||||
```
|
||||
|
||||
If removing the wildcard, this check is redundant but harmless as a
|
||||
defense-in-depth assertion.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] Wildcard `_` arm removed from the `match tls_mode` block, OR
|
||||
- [ ] Explicit count mismatch check added after the acceptor construction loop
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/reviews/003-security-and-bug-review.md — W5 finding
|
||||
- src/main.rs — TLS acceptor construction loop (lines 170-194)
|
||||
- src/tls/acceptor.rs — `setup_tls`, `TlsMode` enum
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
51
tasks/fix/token-bucket-field-visibility.md
Normal file
51
tasks/fix/token-bucket-field-visibility.md
Normal file
@@ -0,0 +1,51 @@
|
||||
---
|
||||
id: fix/token-bucket-field-visibility
|
||||
name: Make TokenBucket fields private except last_access (W10, S6)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: single
|
||||
risk: trivial
|
||||
impact: isolated
|
||||
level: implementation
|
||||
review_findings: [W10, S6]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
All `TokenBucket` fields are `pub` but only `last_access` is read externally (by
|
||||
`evict_stale` in `rate_limit/mod.rs`). The other fields (`tokens`, `last_refill`,
|
||||
`rate`, `max`) should be private to prevent accidental direct mutation that
|
||||
bypasses `try_consume`/`refill` logic.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/rate_limit/bucket.rs`**:
|
||||
- Make `tokens`, `last_refill`, `rate`, `max` private (remove `pub`)
|
||||
- Keep `last_access` as `pub(crate)` for `evict_stale` access
|
||||
- `TokenBucket::new()` already exists as a constructor, so no changes needed there
|
||||
- Update any unit tests that directly access private fields. The tests in
|
||||
`bucket.rs` are in the same module so they have access to private fields.
|
||||
Tests in `mod.rs` may need adjustment if they access `bucket.tokens` etc.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `tokens`, `last_refill`, `rate`, `max` fields are private
|
||||
- [ ] `last_access` is `pub(crate)`
|
||||
- [ ] `new()` constructor is the only way to create a `TokenBucket` externally
|
||||
- [ ] `evict_stale` still compiles and works (uses `last_access`)
|
||||
- [ ] All unit tests pass (in-module tests can still access private fields)
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/reviews/003-security-and-bug-review.md — W10, S6 findings
|
||||
- src/rate_limit/bucket.rs — TokenBucket struct
|
||||
- src/rate_limit/mod.rs — evict_stale
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
83
tasks/fix/upstream-host-validation.md
Normal file
83
tasks/fix/upstream-host-validation.md
Normal file
@@ -0,0 +1,83 @@
|
||||
---
|
||||
id: fix/upstream-host-validation
|
||||
name: Validate host part of upstream address in config (W1)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: low
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [W1]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
`is_valid_upstream` checks that the upstream has a `host:port` format with a
|
||||
valid port number, but performs **no validation on the host part** beyond
|
||||
checking it's non-empty and doesn't start with `http://` or `https://`. Values
|
||||
like `!!!bad!!!:3000` or `@#$%:8080` pass validation.
|
||||
|
||||
The spec (config.md validation rule 17) now requires: "the host part must parse
|
||||
as a valid IpAddr or pass is_valid_hostname validation." Bracket-enclosed values
|
||||
must be parsed as IPv6 addresses.
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/config/validation.rs`** — `is_valid_upstream` function (lines 309-327):
|
||||
- After validating the port, validate the host part:
|
||||
```rust
|
||||
fn is_valid_upstream(upstream: &str) -> bool {
|
||||
if let Some(idx) = upstream.rfind(':') {
|
||||
let host_part = &upstream[..idx];
|
||||
let port_str = &upstream[idx + 1..];
|
||||
if host_part.is_empty() { return false; }
|
||||
if upstream.starts_with("http://") || upstream.starts_with("https://") { return false; }
|
||||
let port: u16 = match port_str.parse() { Ok(p) => p, Err(_) => return false };
|
||||
if port == 0 { return false; }
|
||||
// Validate host part per config.md rule 17
|
||||
if host_part.starts_with('[') && host_part.ends_with(']') {
|
||||
let inner = &host_part[1..host_part.len()-1];
|
||||
inner.parse::<std::net::Ipv6Addr>().is_ok()
|
||||
} else {
|
||||
host_part.parse::<std::net::IpAddr>().is_ok() || is_valid_hostname(host_part)
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
```
|
||||
- Note: `is_valid_hostname` already exists in the same file and is used for site
|
||||
`host` validation. It rejects IP addresses, which is correct for site hosts
|
||||
but wrong for upstream hosts — upstream hosts CAN be IPs. The upstream
|
||||
validation must check `IpAddr::parse` first, then fall back to
|
||||
`is_valid_hostname` for DNS names.
|
||||
|
||||
- Add tests for:
|
||||
- Valid: `gitea:3000`, `127.0.0.1:3000`, `[::1]:3000`
|
||||
- Invalid: `!!!bad!!!:3000`, `@#$%:8080`, `:3000` (empty host)
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `is_valid_upstream` validates the host part as IP address or valid hostname
|
||||
- [ ] IPv6 bracket notation is handled (e.g., `[::1]:3000`)
|
||||
- [ ] Invalid host characters like `!!!bad!!!:3000` are rejected
|
||||
- [ ] Valid upstream formats still pass: `gitea:3000`, `127.0.0.1:3000`
|
||||
- [ ] New unit tests for valid and invalid upstream host parts
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/config.md — validation rule 17
|
||||
- docs/reviews/003-security-and-bug-review.md — W1 finding
|
||||
- src/config/validation.rs — `is_valid_upstream`, `is_valid_hostname`
|
||||
|
||||
## Notes
|
||||
|
||||
> `is_valid_hostname` currently rejects IP addresses (intentional for site
|
||||
> hosts). The upstream validation must handle IPs separately before falling
|
||||
> back to `is_valid_hostname`.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
80
tasks/fix/upstream-uri-error-handling.md
Normal file
80
tasks/fix/upstream-uri-error-handling.md
Normal file
@@ -0,0 +1,80 @@
|
||||
---
|
||||
id: fix/upstream-uri-error-handling
|
||||
name: Return 502 on upstream URI parse failure instead of dropping query string (W3)
|
||||
status: completed
|
||||
depends_on: []
|
||||
scope: narrow
|
||||
risk: low
|
||||
impact: component
|
||||
level: implementation
|
||||
review_findings: [W3]
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
`build_upstream_uri` silently drops the query string on parse failure and
|
||||
`.unwrap()`s the fallback. This corrupts requests — the upstream receives the
|
||||
wrong URL with no query parameters, and neither the client nor operator is
|
||||
notified. The `.unwrap()` on the fallback could also panic.
|
||||
|
||||
The spec (proxy.md request forwarding step 1) states: "If URI construction
|
||||
fails (e.g., the resulting URI is malformed), the proxy must return 502 Bad
|
||||
Gateway and log the error at warn level. The proxy must never silently drop
|
||||
parts of the URI."
|
||||
|
||||
### Changes Required
|
||||
|
||||
**`src/proxy/handler.rs`**:
|
||||
- Change `build_upstream_uri` to return `Result<Uri, ()>`:
|
||||
```rust
|
||||
fn build_upstream_uri(scheme: &str, upstream: &str, original_uri: &Uri) -> Result<Uri, ()> {
|
||||
let path = original_uri.path();
|
||||
let query = original_uri
|
||||
.query()
|
||||
.map(|q| format!("?{}", q))
|
||||
.unwrap_or_default();
|
||||
let uri_string = format!("{}://{}{}{}", scheme, upstream, path, query);
|
||||
uri_string.parse::<Uri>().map_err(|e| {
|
||||
warn!(error = %e, uri = %uri_string, "failed to parse upstream URI");
|
||||
})
|
||||
}
|
||||
```
|
||||
- Update `proxy_handler` to handle the `Err` case:
|
||||
```rust
|
||||
let upstream_uri = match build_upstream_uri(&upstream_scheme, &upstream, req.uri()) {
|
||||
Ok(uri) => uri,
|
||||
Err(()) => {
|
||||
log_upstream_error!(&host_owned, &upstream_addr, "malformed upstream URI");
|
||||
let duration_ms = start.elapsed().as_millis() as u64;
|
||||
log_request!(&client_ip, &host_owned, &method, &path, 502u16, &upstream, duration_ms);
|
||||
return StatusCode::BAD_GATEWAY.into_response();
|
||||
}
|
||||
};
|
||||
```
|
||||
- Update the existing unit tests for `build_upstream_uri` to handle the
|
||||
`Result` return type.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] `build_upstream_uri` returns `Result<Uri, ()>` instead of `Uri`
|
||||
- [ ] URI parse failure logs a warning with the malformed URI string
|
||||
- [ ] URI parse failure returns 502 Bad Gateway to the client
|
||||
- [ ] No `.unwrap()` calls in `build_upstream_uri`
|
||||
- [ ] Query strings are never silently dropped
|
||||
- [ ] Existing unit tests updated for `Result` return type
|
||||
- [ ] `cargo test` passes
|
||||
- [ ] `cargo clippy` passes with no warnings
|
||||
|
||||
## References
|
||||
|
||||
- docs/architecture/proxy.md — request forwarding step 1, URI error handling
|
||||
- docs/reviews/003-security-and-bug-review.md — W3 finding
|
||||
- src/proxy/handler.rs — `build_upstream_uri`, `proxy_handler`
|
||||
|
||||
## Notes
|
||||
|
||||
> To be filled on completion
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
60
tasks/review/post-security-fix-review.md
Normal file
60
tasks/review/post-security-fix-review.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
id: review/post-security-fix-review
|
||||
name: Review security fix implementations before production consideration
|
||||
status: completed
|
||||
depends_on:
|
||||
- fix/rate-limiter-ip-source
|
||||
- fix/inflight-counter-increment
|
||||
- fix/connector-timeout-ceiling
|
||||
- fix/json-format-without-logfile
|
||||
- fix/upstream-host-validation
|
||||
- fix/acme-contact-validation
|
||||
- fix/upstream-uri-error-handling
|
||||
- fix/admin-socket-resource-limits
|
||||
- fix/consolidate-config-types
|
||||
- fix/rate-limiter-connectinfo-tests
|
||||
scope: moderate
|
||||
risk: low
|
||||
impact: project
|
||||
level: review
|
||||
---
|
||||
|
||||
## Description
|
||||
|
||||
Review all security and bug fix implementations from Review #003 before
|
||||
considering them production-ready. Verify that the fixes correctly implement
|
||||
the architecture decisions (ADR-025, ADR-026, ADR-027) and the updated spec
|
||||
documents.
|
||||
|
||||
## Acceptance Criteria
|
||||
|
||||
- [ ] C1 fix: Rate limiter uses ConnectInfo only, rejects without it (ADR-025)
|
||||
- [ ] C2 fix: InFlightCounter increments before task spawn, drain polls 100ms
|
||||
- [ ] C3 fix: Connector ceiling is 30s, per-site timeouts work >5s (ADR-026)
|
||||
- [ ] C4 fix: JSON format applied in stdout-only path
|
||||
- [ ] W1 fix: Upstream host part validated (DNS name or IP, IPv6 brackets)
|
||||
- [ ] W2 fix: ACME contact email validated (non-empty, contains @)
|
||||
- [ ] W3 fix: URI parse failure returns 502, never drops query string silently
|
||||
- [ ] W4 fix: Admin socket has 5s timeout and 4096 byte line limit (ADR-027)
|
||||
- [ ] W6 fix: RawConfig eliminated, FullConfig used in both paths
|
||||
- [ ] S10 fix: Rate limit tests use ConnectInfo, verify XFF is ignored
|
||||
- [ ] All `cargo test` passes
|
||||
- [ ] All `cargo clippy` passes with no warnings
|
||||
- [ ] No regressions in integration tests
|
||||
|
||||
## References
|
||||
|
||||
- docs/reviews/003-security-and-bug-review.md — all findings
|
||||
- docs/architecture/decisions/025-rate-limiter-ip-source.md — ADR-025
|
||||
- docs/architecture/decisions/026-connector-timeout-ceiling.md — ADR-026
|
||||
- docs/architecture/decisions/027-admin-socket-resource-limits.md — ADR-027
|
||||
|
||||
## Notes
|
||||
|
||||
> This review covers the critical security fixes and the sensitive config
|
||||
> consolidation. It should be the last task before the generation 4+ code
|
||||
> quality items are considered final.
|
||||
|
||||
## Summary
|
||||
|
||||
> To be filled on completion
|
||||
@@ -34,14 +34,4 @@ impl TestUpstream {
|
||||
pub async fn spawn_ok() -> Self {
|
||||
Self::spawn(|| Router::new().route("/", get(|| async { "ok" }))).await
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn url(&self) -> String {
|
||||
format!("http://{}", self.addr)
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn upstream_addr(&self) -> String {
|
||||
format!("127.0.0.1:{}", self.addr.port())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ async fn test_health_check_local_port_binds_localhost() {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_health_check_disabled_when_port_zero() {
|
||||
async fn test_health_check_binds_random_port_when_zero() {
|
||||
let result = reverse_proxy::health::start_health_check_listener(0).await;
|
||||
assert!(result.is_ok());
|
||||
let (addr, handle) = result.unwrap();
|
||||
@@ -87,13 +87,16 @@ async fn test_health_check_disabled_when_port_zero() {
|
||||
handle.abort();
|
||||
}
|
||||
|
||||
fn make_rate_limit_app(limiter: Arc<reverse_proxy::rate_limit::RateLimiter>) -> Router {
|
||||
fn make_rate_limit_app(
|
||||
limiter: Arc<reverse_proxy::rate_limit::RateLimiter>,
|
||||
) -> axum::extract::connect_info::IntoMakeServiceWithConnectInfo<Router, std::net::SocketAddr> {
|
||||
Router::new()
|
||||
.route("/", get(|| async { "ok" }))
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
limiter,
|
||||
reverse_proxy::rate_limit::rate_limit_middleware,
|
||||
))
|
||||
.into_make_service_with_connect_info::<std::net::SocketAddr>()
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -116,7 +119,6 @@ async fn test_rate_limit_allows_within_burst() {
|
||||
for _ in 0..5 {
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("x-forwarded-for", "192.168.1.1")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -144,7 +146,6 @@ async fn test_rate_limit_rejects_above_burst() {
|
||||
for _ in 0..2 {
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("x-forwarded-for", "10.0.0.50")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -153,7 +154,6 @@ async fn test_rate_limit_rejects_above_burst() {
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("x-forwarded-for", "10.0.0.50")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -181,7 +181,6 @@ async fn test_rate_limit_429_response_body() {
|
||||
let client = reqwest::Client::new();
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("x-forwarded-for", "203.0.113.50")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -189,7 +188,6 @@ async fn test_rate_limit_429_response_body() {
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("x-forwarded-for", "203.0.113.50")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -217,7 +215,6 @@ async fn test_rate_limit_per_ip_independent() {
|
||||
let client = reqwest::Client::new();
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("x-forwarded-for", "192.168.1.1")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
@@ -225,11 +222,85 @@ async fn test_rate_limit_per_ip_independent() {
|
||||
|
||||
let resp2 = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("x-forwarded-for", "192.168.1.2")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp2.status(), reqwest::StatusCode::OK);
|
||||
assert_eq!(resp2.status(), reqwest::StatusCode::TOO_MANY_REQUESTS);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rate_limit_without_connect_info_rejected_with_429() {
|
||||
let mut config = reverse_proxy::config::test_fixtures::test_dynamic_config();
|
||||
config.rate_limit = reverse_proxy::config::RateLimitConfig {
|
||||
requests_per_second: 10,
|
||||
burst: 20,
|
||||
};
|
||||
let config_arc = Arc::new(ArcSwap::from_pointee(config));
|
||||
let limiter = Arc::new(reverse_proxy::rate_limit::RateLimiter::new(config_arc));
|
||||
|
||||
let app = Router::new().route("/", get(|| async { "ok" })).layer(
|
||||
axum::middleware::from_fn_with_state(
|
||||
limiter,
|
||||
reverse_proxy::rate_limit::rate_limit_middleware,
|
||||
),
|
||||
);
|
||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
tokio::spawn(async { axum::serve(listener, app).await.unwrap() });
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::TOO_MANY_REQUESTS);
|
||||
let body = resp.text().await.unwrap();
|
||||
assert_eq!(body, "Too Many Requests");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_rate_limit_xff_header_ignored_same_bucket() {
|
||||
let mut config = reverse_proxy::config::test_fixtures::test_dynamic_config();
|
||||
config.rate_limit = reverse_proxy::config::RateLimitConfig {
|
||||
requests_per_second: 10,
|
||||
burst: 2,
|
||||
};
|
||||
let config_arc = Arc::new(ArcSwap::from_pointee(config));
|
||||
let limiter = Arc::new(reverse_proxy::rate_limit::RateLimiter::new(config_arc));
|
||||
|
||||
let app = make_rate_limit_app(limiter);
|
||||
let listener = tokio::net::TcpListener::bind("127.0.0.1:0").await.unwrap();
|
||||
let addr = listener.local_addr().unwrap();
|
||||
|
||||
tokio::spawn(async { axum::serve(listener, app).await.unwrap() });
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("X-Forwarded-For", "10.0.0.1")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::OK);
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("X-Forwarded-For", "10.0.0.2")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::OK);
|
||||
|
||||
let resp = client
|
||||
.get(format!("http://127.0.0.1:{}/", addr.port()))
|
||||
.header("X-Forwarded-For", "10.0.0.3")
|
||||
.send()
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(resp.status(), reqwest::StatusCode::TOO_MANY_REQUESTS);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -244,10 +315,12 @@ async fn test_rate_limit_eviction_task() {
|
||||
|
||||
limiter.check_and_consume(std::net::IpAddr::from([192, 168, 1, 1]));
|
||||
|
||||
let shutdown = Arc::new(reverse_proxy::shutdown::GracefulShutdown::new(30));
|
||||
let handle = reverse_proxy::rate_limit::start_eviction_task(
|
||||
limiter.clone(),
|
||||
Duration::from_millis(50),
|
||||
Duration::from_millis(100),
|
||||
shutdown.subscribe(),
|
||||
);
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(200)).await;
|
||||
@@ -259,7 +332,7 @@ async fn test_rate_limit_eviction_task() {
|
||||
|
||||
fn make_redirect_listener_config(
|
||||
bind_addr: &str,
|
||||
http_port: u32,
|
||||
http_port: u16,
|
||||
https_port: u16,
|
||||
) -> reverse_proxy::config::static_config::ListenerConfig {
|
||||
reverse_proxy::config::static_config::ListenerConfig {
|
||||
@@ -614,21 +687,21 @@ fn test_validate_wildcard_bind_via_cli_flag() {
|
||||
}
|
||||
|
||||
fn test_dynamic_config_with_limit(limit_bytes: u64) -> Arc<ArcSwap<DynamicConfig>> {
|
||||
let config = DynamicConfig {
|
||||
sites: vec![SiteConfig {
|
||||
host: "test.local".to_string(),
|
||||
upstream: "127.0.0.1:8080".to_string(),
|
||||
upstream_scheme: "http".to_string(),
|
||||
upstream_connect_timeout_secs: 5,
|
||||
upstream_request_timeout_secs: 60,
|
||||
}],
|
||||
rate_limit: RateLimitConfig {
|
||||
let sites = vec![SiteConfig {
|
||||
host: "test.local".to_string(),
|
||||
upstream: "127.0.0.1:8080".to_string(),
|
||||
upstream_scheme: "http".to_string(),
|
||||
upstream_connect_timeout_secs: 5,
|
||||
upstream_request_timeout_secs: 60,
|
||||
}];
|
||||
let config = DynamicConfig::from_sites(
|
||||
sites,
|
||||
RateLimitConfig {
|
||||
requests_per_second: 10,
|
||||
burst: 20,
|
||||
},
|
||||
body: BodyConfig { limit_bytes },
|
||||
routing_table: Default::default(),
|
||||
};
|
||||
BodyConfig { limit_bytes },
|
||||
);
|
||||
Arc::new(ArcSwap::from_pointee(config))
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user