Skip to content
Merged
53 changes: 48 additions & 5 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,55 @@ MEILI_URL=http://meilisearch:7700
# Read by: MeilisearchClient.kt (also injected into meilisearch container)
MEILI_MASTER_KEY=<rotate-before-deploy-openssl-rand-hex-32>

# OAuth App client_id for /v1/auth/device/* proxy. Must equal the value
# the KMP client ships in BuildKonfig — both ends of the primary→fallback
# auth flow have to use the same OAuth App so device_codes interchange.
# OAuth App client_id for /v1/auth/device/* proxy and the web-flow exchange.
# Must equal the value the KMP client ships in BuildKonfig — both ends of
# the primary→fallback auth flow have to use the same OAuth App so
# device_codes interchange.
# Source: github.com/settings/applications/<app-id>
# Read by: GitHubDeviceClient.kt
GITHUB_OAUTH_CLIENT_ID=<github-oauth-app-client-id>
# Read by: GitHubDeviceClient.kt, OAuthExchangeService (via AppModule)
# (Renamed from GITHUB_OAUTH_CLIENT_ID — old name no longer read.)
OAUTH_CLIENT_ID=<github-oauth-app-client-id>

# OAuth App client_secret for the web-flow code exchange. NEVER commit a
# real value. Distinct from OAUTH_CLIENT_ID — the secret is only ever used
# server-side, and only by the backend (never by the client app or the
# website JS). Source: same GitHub OAuth App settings page.
# Read by: OAuthExchangeService (via AppModule)
OAUTH_CLIENT_SECRET=<github-oauth-app-client-secret>

# Callback URL registered with the OAuth App. Must EXACTLY match what's in
# github.com/settings/applications/<app-id> → "Authorization callback URL".
# Used as redirect_uri on the token exchange; GitHub rejects with
# redirect_uri_mismatch if the values diverge.
# Read by: OAuthExchangeService (via AppModule)
OAUTH_WEB_CALLBACK_URL=https://github-store.org/auth/callback

# Shared secret between this backend and the website (github-store.org).
# Website sends `X-Oauth-Service-Token: <this value>` on every call to
# /v1/oauth/state and /v1/oauth/exchange. Both endpoints refuse the request
# (401 service_auth_required) if missing or wrong. Rotate via:
# openssl rand -base64 48
# Set the SAME value on the website server.
# Read by: OAuthServiceAuth (via AppModule)
OAUTH_SERVICE_TOKEN=<rotate-before-deploy-openssl-rand-base64-48>

# Comma-separated allowlist of Host headers the OAuth S2S endpoints accept.
# Defence-in-depth on top of OAUTH_SERVICE_TOKEN — even if the secret leaks,
# the request still has to land on the canonical vhost. Production must set
# this; an empty value combined with APP_ENV=production refuses every S2S
# OAuth request. If you also serve OAuth via api-direct.github-store.org as
# a fallback vhost, append it: "api.github-store.org,api-direct.github-store.org".
# Read by: OAuthServiceAuth (via AppModule)
OAUTH_SERVICE_ALLOWED_HOSTS=api.github-store.org

# Optional kill switch for the OAuthCleanupWorker. When set to "true", the
# 5-minute DELETE-of-expired-rows loop sleeps each cycle. Request-time
# `expires_at > NOW()` filters keep correctness intact; the only cost is
# unbounded growth of the oauth_ephemeral table while disabled. Use only
# when troubleshooting DB lock contention against /exchange or /handoff.
# Default: unset = worker runs normally.
# Read by: OAuthCleanupWorker
# OAUTH_CLEANUP_DISABLED=true

# =====================================================================
# GitHub token rotation pool (required in production for passthrough)
Expand Down
9 changes: 7 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ All under `/v1/`:
| `GET /announcements` | Public, anonymous announcements feed. Same byte-identical envelope for every caller. Backed by JSON files in `src/main/resources/announcements/<id>.json` (or `ANNOUNCEMENTS_DIR` env override). Validator enforces every rule from `docs/backend/announcements-endpoint.md` §2 at load time; expired items are filtered at serve time. `Cache-Control: public, max-age=600` + ETag revalidation. No auth, no per-user logic, no logging beyond standard access. |
| `POST /auth/device/start` | Stateless proxy for `github.com/login/device/code`. Client used to call GitHub directly; some user networks (documented in OpenHub-Store/GitHub-Store#433, #395) can't reach GitHub reliably. Backend adds `client_id`, forwards GitHub's body verbatim. 10 req/hr/IP. |
| `POST /auth/device/poll` | Stateless proxy for `github.com/login/oauth/access_token`. Reads `device_code` from form body, adds `client_id` + `grant_type`, forwards GitHub's body verbatim (including tokens on success). The backend never logs, caches, or persists the token. 200 req/hr/IP. Per-request diagnostic line `[auth-poll rid=… ] dch=<sha256-prefix(device_code)> ghs=<upstream-status> gh_err=<github-error-code-or-"-"> lat_ms=<ms> ua=<truncated-UA>` is logged for auth-stuck triage (GitHub-Store#433, #395) — the raw `device_code`, response body, and every token field are explicitly excluded; only the `error` key is parsed off the upstream body, via a DTO that doesn't even declare `access_token`/`refresh_token`. |
| `POST /oauth/state` | **S2S, website→backend.** PKCE-S256 web flow's pre-redirect step. Body `{state, code_challenge}` — both 43-char base64url. Stores `oauth:state:<state>` row with the challenge for 60s. Returns 204 on success, 409 on duplicate state, 400 on validation, 401 on bad service auth. Gated by `X-Oauth-Service-Token` (env `OAUTH_SERVICE_TOKEN`) + `Host` allowlist (env `OAUTH_SERVICE_ALLOWED_HOSTS`). 60/hr/IP. |
| `POST /oauth/exchange` | **S2S, website→backend.** Called from the website's `/auth/callback` handler. Body `{code, state, code_verifier}` — backend verifies `SHA256(code_verifier) == stored code_challenge`, POSTs to `github.com/login/oauth/access_token` with `client_id`+`client_secret`+`code`+`redirect_uri`, mints a one-shot `handoff_id`, deletes the state row, returns `{handoff_id}`. 400 on PKCE mismatch / expired state / GitHub error (with `github_<code>`), 502 on upstream timeout. Same S2S auth as `/oauth/state`. 30/hr/IP. **No token, code, or verifier ever logged.** |
| `POST /oauth/handoff/{id}` | **Public, app→backend.** App calls this with the `handoff_id` it received via the website's deep link. Backend does atomic `DELETE...RETURNING value` — single-use, second call returns 404. 60/min/IP brute-force cap. `Cache-Control: no-store`. Token lives in `oauth:handoff:<id>` for at most 60s before this call retires it. |
| `GET /internal/metrics` | Operator-only. Gated by `X-Admin-Token` matching the `ADMIN_TOKEN` env var (open if unset, for local dev). Returns per-source search counters, P-latency, worker queue depth, and top 20 misses (8-char `query_hash` prefix only) in last 7 days. |
| `POST /internal/backfill-stale?limit=N` | Operator-only. Spawns a paced background job that refreshes every curated row whose new metadata columns are still at their migration defaults (currently keyed on `license_spdx_id IS NULL`). One concurrent run; returns 409 on re-trigger. Uses `searchClient.refreshRepo` + persist; respects the quiet window so the daily fetcher's pool stays free. Run after a column-add deploy; no-ops afterwards once the filter no longer matches. |
| `GET /badge/...` | M3-styled SVG badges. Per-repo: `/badge/{owner}/{name}/{kind}/{style}/{variant}` for kind ∈ {release, stars, downloads}. Global: `/badge/{kind}/{style}/{variant}` for kind ∈ {users, fdroid}. Static: `/badge/static/{style}/{variant}?label=&icon=`. Style 1-12 hue, variant 1-3 shade. Vectorized glyph rendering — no font dependency at SVG embed time. |
Expand Down Expand Up @@ -111,7 +114,9 @@ RepoRefreshWorker (hourly) — re-fetches passthrough repos by oldest indexed_a
- **GitHub token strategy: bring-your-own + rotation pool fallback + rate-limit retry.** `/search`, `/search/explore`, `/repo` (lazy-fetch path), `/releases`, `/readme`, and `/user` all read an optional `X-GitHub-Token` header from the client and use that token for the upstream call (scales quota linearly with users). When the client sends none, `GitHubSearchClient.pickFallbackToken()` round-robins across a pool of four backend PATs (`GH_TOKEN_TRENDING`, `GH_TOKEN_NEW_RELEASES`, `GH_TOKEN_MOST_POPULAR`, `GH_TOKEN_TOPICS`). `GitHubResourceClient` (powering `/repo`/`/releases`/`/readme`/`/user`) shares the same pool via the provider injected in `AppModule.kt`. **Rate-limit retry**: when an upstream call returns 429 — or 403 with `x-ratelimit-remaining: 0` or a `retry-after` header — the call is retried once with a different pool token. A bare 403 (auth scope, blocked repo) is NOT retried. **Quiet-window guarantee**: during UTC hours `TOKEN_QUIET_START_UTC`–`TOKEN_QUIET_END_UTC` (default 1–4) the rotation pool is bypassed (only `GITHUB_TOKEN` is used) and rate-limit retry is skipped — so the Python fetcher's daily run gets the full pool. Inside that window a rate-limited user token surfaces verbatim. Any change here must preserve the quiet-window guarantee.
- **Resource cache key segmentation.** `GitHubResourceClient.fetchCached` appends `|authed` or `|anon` to every cache key based on whether the request carried `X-GitHub-Token`. Without this, one user's bad-PAT 403 (or the anon path's IP-rate-limit 403) would poison the slot for every other caller until the negative TTL elapsed. Separate slots are filled and read independently.
- **Upstream 401 → backend 502.** `GitHubResourceClient` remaps a 401 from GitHub to `Result.UpstreamError` (handler returns 502), instead of a `NegativeHit` that would forward 401 verbatim. 401 in our own response surface is reserved for backend-auth (no auth-required routes today, but the convention prevents the client from misreading "your PAT was rejected upstream" as "your session with us expired").
- **Auth is a stateless proxy, not a session.** `/v1/auth/device/*` forwards to `github.com/login/*` with the backend's `GITHUB_OAUTH_CLIENT_ID` injected. The backend must **never** log, cache, or persist the access token returned by a successful poll — it passes through the suspending handler and out to the HTTP response, nothing else. No database table, no in-memory map, no breadcrumb. The client is the only place the token lives. Client is backend-first on these two calls and falls back to direct-to-github.com on 5xx / network errors (only — not on valid-but-negative responses like `authorization_pending` or `access_denied`, which are GitHub's real answer and `github.com` direct would say the same thing).
- **Auth is a stateless proxy, not a session.** `/v1/auth/device/*` forwards to `github.com/login/*` with the backend's `OAUTH_CLIENT_ID` injected. The backend must **never** log, cache, or persist the access token returned by a successful poll — it passes through the suspending handler and out to the HTTP response, nothing else. No database table, no in-memory map, no breadcrumb. The client is the only place the token lives. Client is backend-first on these two calls and falls back to direct-to-github.com on 5xx / network errors (only — not on valid-but-negative responses like `authorization_pending` or `access_denied`, which are GitHub's real answer and `github.com` direct would say the same thing).
- **OAuth web flow with PKCE + handoff** (`oauth/` package, `OAuthRoutes.kt`). Separate path from the device flow above — used by Desktop and any browser-initiated login. App generates `state` (32 random bytes, base64url) + `code_verifier` (43-128-char base64url) + `code_challenge = base64url(SHA256(verifier))`. Website posts state+challenge to `POST /v1/oauth/state`, redirects user to `github.com/login/oauth/authorize` with PKCE-S256, receives the callback at `/auth/callback`, posts code+state+verifier to `POST /v1/oauth/exchange`. Backend verifies PKCE, hits GitHub's token endpoint with `client_id`+`client_secret`, mints a 32-byte `handoff_id`, stores the access_token in `oauth:handoff:<id>` for 60s, deletes the state row (one-shot), returns `{handoff_id}`. App receives `handoff_id` via the website's deep-link redirect, calls `POST /v1/oauth/handoff/{id}` for the token. The handoff row is `DELETE...RETURNING value` — atomic single-use, second call always 404. Storage is `oauth_ephemeral` table (V16); 60s TTL is enforced both by `expires_at > NOW()` on every read and by `OAuthCleanupWorker` reaping expired rows every 5min. The `/state` and `/exchange` routes are S2S (shared-secret + Host allowlist); `/handoff/{id}` is public, brute-force protected by per-IP rate limit + 256-bit ID entropy. The backend never logs `code`, `code_verifier`, `access_token`, or `handoff_id` — only first-8-char prefixes of `state` and `handoff_id` plus status codes and latency.
- **Deep-link contract: `githubstore://auth?handoff=<id>&state=<state>`.** Emitted by the website (NOT the backend). State must round-trip: the app generated it at the start of the flow, validates the value coming back in the deep link against its locally-stored copy, and only then calls `POST /v1/oauth/handoff/<id>`. State on the deep link comes from the website's own KV (the value it received from the app's `/auth/start` call), not from the backend's `oauth_ephemeral` row. Earlier drafts proposed `githubstore://oauth-done?handoff=<id>` — that contract is superseded.
- **Unified ranking via `SearchScore.compute()`** (`ranking/SearchScore.kt`). Formula: `0.40·log₁₀(stars+1)/6 + 0.30·ctr + 0.20·install_success_rate + 0.10·exp(-days_since_release/90)`. Two callers: `SignalAggregationWorker` (hourly, with real signals) and `GitHubSearchClient` at ingest time (cold-start, signals = 0 — still gives passthrough repos a non-null score so they sort). Weights live in the object only; never inline the formula elsewhere.
- **Meilisearch partial-update gotcha — PUT, never POST.** `MeilisearchClient.addDocuments()` is POST, which on Meili *replaces* the document with whatever fields you send (everything else becomes null). `MeilisearchClient.updateScores()` is PUT, which merges. Pushing just `{id, search_score}` with POST will wipe every other field on 3000+ docs. If you add a new "partial update" path, verify the HTTP verb before deploying.
- **Dynamic category/topic ordering.** `RepoRepository.findByCategory()` picks a category-specific primary sort column (`trending_score` for trending, `popularity_score` for most-popular, `latest_release_date` for new-releases), falls back to global `searchScore`, then static `rank` as final tie-breaker. Without category-specific primary, both trending and most-popular collapse onto the same global score — the bug fix in PR #12. `findByTopicBucket()` keeps the simpler `searchScore DESC NULLS LAST, rank ASC` order because topics are flat lists, not flavour-segmented like the categories.
Expand All @@ -133,7 +138,7 @@ RepoRefreshWorker (hourly) — re-fetches passthrough repos by oldest indexed_a

## Deployment notes

- `.env` lives only on the VPS at `/opt/github-store-backend/.env`, never in git. Under `APP_ENV=production` the app refuses to start without these: `DATABASE_URL`, `DATABASE_PASSWORD`, `MEILI_URL`, `MEILI_MASTER_KEY`, `GITHUB_OAUTH_CLIENT_ID`, `DEVICE_ID_PEPPER`. Also relied on: `GITHUB_TOKEN` (quiet-window fallback), `GH_TOKEN_TRENDING` / `GH_TOKEN_NEW_RELEASES` / `GH_TOKEN_MOST_POPULAR` / `GH_TOKEN_TOPICS` (rotation pool), `ADMIN_TOKEN`, `SENTRY_DSN`, `TOKEN_QUIET_START_UTC` / `TOKEN_QUIET_END_UTC` (default 1 / 4), `BADGE_USER_COUNT`, `DATABASE_POOL_SIZE` (default 20), `DISABLE_LIVE_GITHUB_PASSTHROUGH` and `DISABLE_BADGE_FETCH` kill switches. `.env.example` is the canonical list.
- `.env` lives only on the VPS at `/opt/github-store-backend/.env`, never in git. Under `APP_ENV=production` the app refuses to start without these: `DATABASE_URL`, `DATABASE_PASSWORD`, `MEILI_URL`, `MEILI_MASTER_KEY`, `OAUTH_CLIENT_ID` (renamed from the legacy `GITHUB_OAUTH_CLIENT_ID` — old name no longer read), `OAUTH_CLIENT_SECRET`, `OAUTH_SERVICE_TOKEN`, `OAUTH_SERVICE_ALLOWED_HOSTS`, `OAUTH_WEB_CALLBACK_URL`, `DEVICE_ID_PEPPER`. Also relied on: `GITHUB_TOKEN` (quiet-window fallback), `GH_TOKEN_TRENDING` / `GH_TOKEN_NEW_RELEASES` / `GH_TOKEN_MOST_POPULAR` / `GH_TOKEN_TOPICS` (rotation pool), `ADMIN_TOKEN`, `SENTRY_DSN`, `TOKEN_QUIET_START_UTC` / `TOKEN_QUIET_END_UTC` (default 1 / 4), `BADGE_USER_COUNT`, `DATABASE_POOL_SIZE` (default 20), `DISABLE_LIVE_GITHUB_PASSTHROUGH` / `DISABLE_BADGE_FETCH` / `OAUTH_CLEANUP_DISABLED` kill switches. `.env.example` is the canonical list.
- `docker-compose.override.yml` is local-dev only (exposes DB/search ports to host). Never deploy it — `.dockerignore` excludes it.
- Production uses `docker-compose.prod.yml` which does NOT expose Postgres/Meilisearch ports (only Caddy on 80/443). Postgres binds `127.0.0.1:5432` for the SSH tunnel from laptop.
- `Caddyfile.prod` defines two site blocks: `api.github-store.org` (Cloudflare-fronted primary) and `api-direct.github-store.org` (direct origin fallback). Both reverse-proxy to `app:8080` and both issue Let's Encrypt certs. The CDN vhost passes `CF-Connecting-IP` and `X-Forwarded-For` through unchanged (Cloudflare overwrites the former; the latter still has the real client IP appended by Cloudflare). The api-direct vhost overrides XFF with the TCP source and strips `CF-Connecting-IP` to defeat client forgery on the un-fronted path. Cloudflare's origin pull protocol must be **HTTPS**, not HTTP — HTTP triggers Caddy's auto-redirect and leaks the direct hostname.
Expand Down
33 changes: 33 additions & 0 deletions src/main/kotlin/zed/rainxch/githubstore/AppModule.kt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ import zed.rainxch.githubstore.match.FdroidSeedWorker
import zed.rainxch.githubstore.match.SigningFingerprintRepository
import zed.rainxch.githubstore.mirrors.MirrorStatusRegistry
import zed.rainxch.githubstore.mirrors.MirrorStatusWorker
import zed.rainxch.githubstore.oauth.OAuthCleanupWorker
import zed.rainxch.githubstore.oauth.OAuthEphemeralStore
import zed.rainxch.githubstore.oauth.OAuthExchangeService
import zed.rainxch.githubstore.oauth.OAuthServiceAuth
import zed.rainxch.githubstore.oauth.PostgresOAuthEphemeralStore

val appModule = module {
single { EventRepository() }
Expand Down Expand Up @@ -67,4 +72,32 @@ val appModule = module {
persistFn = sc::persist,
)
}

// OAuth web flow. clientId is safe to embed (matches the KMP client's
// BuildKonfig value); clientSecret comes from the OAuth App settings and
// must be set in the production .env. callbackUrl is the website's
// callback handler — the redirect_uri must match what's registered with
// GitHub for the OAuth app, otherwise GitHub rejects with redirect_uri_mismatch.
single<OAuthEphemeralStore> { PostgresOAuthEphemeralStore() }
single {
// Empty fallbacks are dev-only sentinels — validateProductionEnv
// refuses to start prod without OAUTH_CLIENT_ID / OAUTH_CLIENT_SECRET /
// OAUTH_WEB_CALLBACK_URL set, so the "missing-…" strings never run
// through to a real exchange under APP_ENV=production.
OAuthExchangeService(
clientId = System.getenv("OAUTH_CLIENT_ID")?.takeIf { it.isNotBlank() }
?: "missing-client-id",
clientSecret = System.getenv("OAUTH_CLIENT_SECRET")?.takeIf { it.isNotBlank() }
?: "missing-client-secret",
callbackUrl = System.getenv("OAUTH_WEB_CALLBACK_URL")?.takeIf { it.isNotBlank() }
?: "https://localhost.invalid/missing-callback",
)
}
single {
OAuthServiceAuth(
expectedToken = System.getenv("OAUTH_SERVICE_TOKEN"),
allowedHostsCsv = System.getenv("OAUTH_SERVICE_ALLOWED_HOSTS"),
)
}
single { OAuthCleanupWorker(store = get(), supervisor = get()) }
}
Loading
Loading