diff --git a/.gitignore b/.gitignore index c88dde8..2163093 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ node_modules/ dist/ +.compute-build/ +.compute-demo-build/ ds-data/ ds-data-loadtest/ ds-data-loadtest-*/ diff --git a/docs/aggregation-rollups.md b/docs/aggregation-rollups.md index 2ab4cea..4386935 100644 --- a/docs/aggregation-rollups.md +++ b/docs/aggregation-rollups.md @@ -199,6 +199,16 @@ Current response shape: Current coverage fields: +- `mode` +- `complete` +- `stream_head_offset` +- `visible_through_offset` +- `visible_through_primary_timestamp_max` +- `oldest_omitted_append_at` +- `possible_missing_events_upper_bound` +- `possible_missing_uploaded_segments` +- `possible_missing_sealed_rows` +- `possible_missing_wal_rows` - `used_rollups` - `indexed_segments` - `scanned_segments` diff --git a/docs/alternative-metrics-approach.md b/docs/alternative-metrics-approach.md index 06db7ad..e678b45 100644 --- a/docs/alternative-metrics-approach.md +++ b/docs/alternative-metrics-approach.md @@ -17,10 +17,10 @@ Primary Axiom sources used here: Repository sources used for the current Prisma Streams behavior: -- [docs/metrics.md](/Users/sorenschmidt/code/streams/docs/metrics.md) -- [docs/aggregation-rollups.md](/Users/sorenschmidt/code/streams/docs/aggregation-rollups.md) -- [src/metrics.ts](/Users/sorenschmidt/code/streams/src/metrics.ts) -- [src/metrics_emitter.ts](/Users/sorenschmidt/code/streams/src/metrics_emitter.ts) +- [metrics.md](./metrics.md) +- [aggregation-rollups.md](./aggregation-rollups.md) +- [src/metrics.ts](../src/metrics.ts) +- [src/metrics_emitter.ts](../src/metrics_emitter.ts) ## Summary @@ -156,7 +156,7 @@ model: Prisma Streams does **not** bill by active time series, but the current internal metrics path still has active-series pressure in the runtime. -The sharpest example is [src/metrics.ts](/Users/sorenschmidt/code/streams/src/metrics.ts): +The sharpest example is [src/metrics.ts](../src/metrics.ts): - every distinct metric + tag set becomes a `MetricSeries` in memory - that map lives for the whole flush interval diff --git a/docs/better-result-adoption.md b/docs/better-result-adoption.md index 21c8bb7..47573c1 100644 --- a/docs/better-result-adoption.md +++ b/docs/better-result-adoption.md @@ -212,69 +212,23 @@ Exit criteria: - Test suite reflects the Result-first standard. - Policy checks are enforced by default in CI. -## Full Repository Scope (Current Throw/Catch Inventory) +## Current Guardrail -The following files currently contain `throw new Error` and/or `catch (...)` and are in-scope for migration: +The current enforced inventory is generated by the repository check rather than +kept as a static list in this document: -- `src/app.ts` -- `experiments/bench/routing_key_perf.ts` -- `experiments/bench/segment_cache_perf.ts` -- `experiments/bench/synth.ts` -- `src/bootstrap.ts` -- `src/config.ts` -- `src/db/db.ts` -- `src/db/schema.ts` -- `experiments/demo/common.ts` -- `experiments/demo/live_fields_app.ts` -- `experiments/demo/wal_demo_ingest.ts` -- `experiments/demo/wal_demo_subscribe.ts` -- `src/index/binary_fuse.ts` -- `src/index/indexer.ts` -- `src/index/run_format.ts` -- `src/ingest.ts` -- `src/lens/lens.ts` -- `experiments/loadtests/live/common.ts` -- `experiments/loadtests/live/read_path.ts` -- `experiments/loadtests/live/selective_shedding.ts` -- `experiments/loadtests/live/write_path.ts` -- `src/local/cli.ts` -- `src/local/daemon.ts` -- `src/local/http.ts` -- `src/local/paths.ts` -- `src/local/server.ts` -- `src/local/state.ts` -- `src/memory.ts` -- `src/objectstore/mock_r2.ts` -- `src/objectstore/null.ts` -- `src/objectstore/r2.ts` -- `src/offset.ts` -- `src/reader.ts` -- `src/runtime/hash.ts` -- `src/schema/proof.ts` -- `src/schema/registry.ts` -- `src/segment/format.ts` -- `src/segment/segmenter.ts` -- `src/sqlite/adapter.ts` -- `src/touch/processor_worker.ts` -- `src/touch/live_metrics.ts` -- `src/touch/manager.ts` -- `src/touch/spec.ts` -- `src/touch/worker_pool.ts` -- `src/uploader.ts` -- `src/util/base32_crockford.ts` -- `src/util/bloom256.ts` -- `src/util/duration.ts` -- `src/util/json_pointer.ts` -- `src/util/lru.ts` -- `src/util/retry.ts` -- `src/util/siphash.ts` -- `src/util/time.ts` -- `test/chaos_restart_bootstrap.test.ts` -- `test/ingest_queue_drain.test.ts` -- `test/segmenter_throughput.test.ts` -- `test/touch_processor.test.ts` -- `test/touch_memory_journal.test.ts` -- `test/touch_wait_timeout_reliability.test.ts` +```bash +bun run check:result-policy +``` + +That check scans `src/**/*.ts` and fails on: + +- `throw new Error(...)` +- `.unwrap(...)` + +Use the command output as the source of truth for current policy violations. +Tests, scripts, demos, and load-test utilities may still use ordinary thrown +test/process failures where they are not part of runtime error handling. ## Operational Constraints During Migration diff --git a/docs/bun-memory-risk.md b/docs/bun-memory-risk.md new file mode 100644 index 0000000..0665e0c --- /dev/null +++ b/docs/bun-memory-risk.md @@ -0,0 +1,155 @@ +# Bun Memory Risk Policy + +This document is repository policy for Bun APIs that have shown native memory +retention under sustained Streams workloads. Treat it as authoritative when +changing object-store, fetch-body, file-body, ingest, or background indexing +code. + +## Rule + +Do not introduce high-volume use of Bun APIs that materialize native-backed +`Blob`, `File`, `S3File`, `ArrayBuffer`, or request/response bodies without a +specific memory investigation. + +Prefer explicit streaming APIs and bounded byte budgets. If a code path must +materialize bytes, it must be protected by all of the following: + +- a documented size bound +- bounded concurrency +- memory-sampler coverage for RSS, anon RSS, heap, external, and arrayBuffers +- a local 1 GiB Linux-container stress test when the path can run in production + +Forced `Bun.gc(true)` is not an acceptable mitigation by itself. The observed +failure mode is RSS, especially anon RSS, staying high while JS heap and +`arrayBuffers` are much lower. + +## Risky APIs + +Avoid these APIs in long-lived, high-volume production paths: + +- `Bun.S3Client` +- `Bun.S3File` +- `Bun.S3File.arrayBuffer()` +- `Response.arrayBuffer()` over repeated remote downloads +- `Response.blob()` over repeated remote downloads +- `Bun.file(path).arrayBuffer()` +- `Bun.file(path).bytes()` +- `Bun.file(path).text()` for repeated large files +- `Bun.file(path)` as a fetch upload body in sustained object-store upload paths + +Small tests, CLI utilities, and bounded local-only helpers may still use these +APIs, but production server paths must use extra care. If in doubt, treat the +API as unsafe until a memory sampler run proves otherwise. + +## Preferred Patterns + +For object-store uploads: + +- use signed `fetch()` requests instead of `Bun.S3Client` +- stream file uploads with `node:fs.createReadStream()` converted through + `node:stream.Readable.toWeb()` +- keep upload concurrency bounded by the memory preset +- avoid hidden follow-up reads or stats unless required for correctness + +For object-store reads: + +- prefer ranged reads or streaming reads +- use `Response.body.getReader()` rather than `Response.arrayBuffer()` +- if the object-store interface must return `Uint8Array`, collect chunks from + the stream reader under a known object/range size limit + +For local files: + +- prefer `node:fs` streams for large or repeated reads +- use `Bun.mmap()` only for immutable cache files whose pinned mapping is + intentionally tracked as a cache/leak-candidate budget +- avoid repeated `Bun.file().arrayBuffer()`, `bytes()`, or `text()` loops over + large files in the server + +For HTTP request bodies: + +- keep append bodies capped by `DS_APPEND_MAX_BODY_BYTES` +- keep ingest concurrency and queue bytes bounded +- on low-memory presets, close append keep-alive connections and keep the + post-append GC path throttled and observable + +## Streams Evidence + +The production symptom was repeated Compute OOM kills during external event +ingestion. The generator was not colocated with the Streams server, so the +memory pressure belonged to the Streams server process and its background +segment/upload/index work. + +Production failures had this shape: + +- process killed around `809 MiB` anon RSS plus about `51 MiB` shmem RSS +- JS heap, external memory, and tracked application counters did not explain + the RSS high water +- the host clamped a nominal `1024 MB` preset to about `684.9 MiB` of internal + pressure headroom before the kernel killed `bun` + +Local reproduction and fixes: + +- MockR2 with `300ms` operation latency alone did not reproduce the OOM shape. + A 500k-event run peaked around `340 MB` RSS and `290 MB` anon RSS. +- The R2-compatible path using Bun's native S3 implementation against MinIO did + reproduce production-shaped pressure. A 900k-event, 1 GiB Linux-container run + reached about `850.9 MB` RSS and `834.4 MB` anon RSS during background + companion catch-up. +- Replacing `Bun.S3Client` / `S3File` with signed `fetch()` R2 requests dropped + the same class of run to about `533.8 MB` RSS and `506.1 MB` anon RSS. +- Removing the remaining `Bun.file(path)` upload body and + `Response.arrayBuffer()` R2 reads reduced the streamed R2 path further. A + 900k-event, 1 GiB Linux-container run peaked at about `474.8 MB` RSS and + `461.2 MB` anon RSS, with cgroup `memory.peak` about `637.1 MB`, and settled + near `204.7 MB` RSS and `188.9 MB` anon RSS. + +Interpretation: R2 latency can increase overlap between upload and background +work, but the decisive local reproduction came from Bun native S3/body +materialization. Avoiding the Bun S3 API and avoiding remaining Blob/File +`arrayBuffer` paths materially reduced anon RSS. + +## Linked Bun Issues + +These issues were open or still relevant when this document was written on +2026-04-25. Re-check status before removing any guardrail. + +- [oven-sh/bun#29083](https://github.com/oven-sh/bun/issues/29083): + `Bun.S3File.arrayBuffer()` retains RSS and reaches OOM in a 1 GiB Linux + container despite forced GC. This is the closest public repro to the Streams + R2 failure. +- [oven-sh/bun#28741](https://github.com/oven-sh/bun/issues/28741): + fetch `Blob` / `ArrayBuffer` memory is not reclaimed after references are + cleared and GC is forced. +- [oven-sh/bun#20487](https://github.com/oven-sh/bun/issues/20487): + large file downloads through `@google-cloud/storage` and Bun S3 keep RSS high + after GC; the reporter observed Node returning closer to baseline while Bun + accumulated RSS. +- [oven-sh/bun#28427](https://github.com/oven-sh/bun/issues/28427): + simple repeated fetch polling report marked as a Bun memory leak / needs + triage. +- [oven-sh/bun#15020](https://github.com/oven-sh/bun/issues/15020): + repeated file reads with `node:fs` and `Bun.File` reported as memory not being + freed. +- [oven-sh/bun#12941](https://github.com/oven-sh/bun/issues/12941): + earlier Blob/ArrayBuffer GC-retention report. This one was closed as not + planned, but it is relevant history because later open reports describe the + same retention class. + +## Review Checklist + +Before merging a change that touches body, file, fetch, or object-store code, +check: + +- Does the change add `Bun.S3Client`, `Bun.S3File`, `Bun.file()`, `.blob()`, or + `.arrayBuffer()` to a hot path? +- If bytes are materialized, what is the maximum size and concurrency? +- Is the memory visible in `GET /v1/server/_mem` or + `DS_MEMORY_SAMPLER_PATH` output? +- Has the path been tested in a memory-limited Linux container when it can run + on Compute? +- If RSS/anon RSS remains high after work completes, did heap, external, + `arrayBuffers`, SQLite stats, active jobs, ingest queue bytes, and index or + companion phases explain it? + +If the answer is unclear, use the streaming alternative first. diff --git a/docs/bundled-companion-and-backfill.md b/docs/bundled-companion-and-backfill.md index 4a703bd..55719db 100644 --- a/docs/bundled-companion-and-backfill.md +++ b/docs/bundled-companion-and-backfill.md @@ -23,13 +23,20 @@ For a sealed uploaded segment, the steady-state published objects are: The `.cix` may contain any subset of: +- `exact` - `col` - `fts` - `agg` - `mblk` -The exact secondary index family remains separate because it is a compacted -cross-segment accelerator, not a per-segment section family. +The exact secondary index family remains separate from `.exact`: secondary +exact runs are compacted cross-segment accelerators, while `.exact` is the +per-segment doc-level postings section. + +Decoded section views are cached in memory by companion object key, plan +generation, and section kind. The cache is bounded by +`DS_SEARCH_COMPANION_SECTION_CACHE_BYTES`; raw immutable `.cix` objects remain +managed by the local companion file cache. ## Why Bundle Companions diff --git a/docs/compute-demo.md b/docs/compute-demo.md new file mode 100644 index 0000000..3b2e96d --- /dev/null +++ b/docs/compute-demo.md @@ -0,0 +1,124 @@ +# Compute Demo Deployment + +This document describes the supported Prisma Compute deployment that layers two +browser entrypoints on top of the normal Streams server: + +- `/studio` serves the Prisma Studio streams-only UI against the colocated + Streams server. +- `/generate` serves a write generator with `1k`, `10k`, and `100k` buttons. + The page includes a stream-name field that defaults to `demo-app`. Each run + ensures that selected `application/json` stream has the `evlog` profile and + appends canonical evlog records in server-side chunks with live progress + polling. + +The normal Streams HTTP surface remains available on `/v1/*`. + +By default, this entrypoint can still run a colocated Streams app. For the +Studio/generator demo, prefer external-streams mode by setting +`COMPUTE_DEMO_STREAMS_SERVER_URL`. In that mode, the deployment does not create +or use a local Streams store; it proxies Studio and generator writes to the +configured Streams server. + +## Entrypoint + +Use [`src/compute/demo_entry.ts`](../src/compute/demo_entry.ts) for this +deployment shape. It: + +- applies the normal Compute argv defaults from + [`src/compute/entry.ts`](../src/compute/entry.ts), including `--object-store r2` + and `--auto-tune` when `DS_MEMORY_LIMIT_MB` is set, before the colocated + Streams server config is loaded +- starts the regular Streams app in-process +- fronts it with the `/studio`, `/studio/api/streams/*`, `/generate`, and + `/api/generate/jobs*` routes +- defaults the generator stream field to `demo-app` + +When `COMPUTE_DEMO_STREAMS_SERVER_URL` or `STREAMS_SERVER_URL` is set, the +entrypoint skips the colocated Streams app and proxies all Streams requests to +that external server instead. + +For the colocated 1 GiB Compute demo, the `1024 MiB` auto-tune preset keeps +segmenting in the main process, seals smaller 8 MiB / 50k-row segments, +disables the segment disk cache, uses one upload lane, and limits +bundled-companion work to one segment / one yield block per pass. During a +generate job, the colocated demo pauses segmenter and indexer loops, appends +directly to the in-process ingest queue instead of serializing batches through +Bun's `Request` body path, then resumes background cutting and indexing for the +generated stream. That avoids overlapping the generator, append path, segment +cutting, uploading, and companion building too aggressively on Compute hosts +whose effective RSS headroom is lower than the nominal memory preset. + +When the generator runs on a separate server and writes over HTTP, the Streams +server cannot use the colocated direct-append pause hooks. The low-memory +server preset still keeps append responses non-keep-alive and defers immediate +index/companion enqueue wakeups until the server has had a short +foreground-quiet window, with a bounded deferral cap so a continuous trickle of +writes cannot starve background catch-up. Uploaded segments therefore avoid +starting companion backfill while the external generator is still bursting +writes, but continue to make progress after the burst or during low-rate ingest. + +## Bundle Build + +Studio assets are not part of this repository, so the Compute bundle must be +prebuilt with a local Studio checkout available. + +Default Studio root discovery: + +- `PRISMA_STUDIO_ROOT`, if set +- otherwise the sibling repository path `../studio` + +Build the bundle: + +```bash +bun run build:compute-demo-bundle +``` + +That script: + +- prebuilds the Studio demo client and CSS +- injects them into the bundle through a virtual module +- bundles `src/compute/demo_entry.ts` +- bundles the Streams worker entrypoints required on Compute + +## Deploy + +Example Compute deploy: + +```bash +PRISMA_API_TOKEN=... \ + bunx @prisma/compute-cli deploy \ + --service your-service-id \ + --skip-build \ + --path .compute-demo-build/bundle \ + --entrypoint compute/demo_entry.js +``` + +Required runtime env is the same as the normal R2-backed Compute server: + +- `DS_HOST=0.0.0.0` +- `DS_ROOT=/mnt/app/prisma-streams` +- `DS_MEMORY_LIMIT_MB=1024` +- `DURABLE_STREAMS_R2_BUCKET` +- `DURABLE_STREAMS_R2_ACCOUNT_ID` +- `DURABLE_STREAMS_R2_ACCESS_KEY_ID` +- `DURABLE_STREAMS_R2_SECRET_ACCESS_KEY` + +On Prisma Compute, keep `DS_ROOT` under `/mnt/app`. Paths such as `/tmp` are +ephemeral and lose the colocated Streams SQLite state on restart. + +External-streams demo mode only needs the HTTP bind settings plus the target +Streams server: + +```bash +DS_HOST=0.0.0.0 \ +COMPUTE_DEMO_STREAMS_SERVER_URL=https://cmoa45nql0u6bzycn7dwdpxe0.cdg.prisma.build \ + bun run src/compute/demo_entry.ts +``` + +## Studio Integration Shape + +The Studio browser app still reads config from `/api/config`, but that config +points its Streams integration at `/studio/api/streams`. + +That proxy path forwards to the configured Streams target, so Studio is always +inspecting the same server that `/generate` writes to. diff --git a/docs/daily-ingest-report-with-more-fts.md b/docs/daily-ingest-report-with-more-fts.md index 223713a..0fe81f4 100644 --- a/docs/daily-ingest-report-with-more-fts.md +++ b/docs/daily-ingest-report-with-more-fts.md @@ -34,7 +34,7 @@ much work per segment in one uninterrupted turn: ## Fix The bundled companion builder in -[src/search/companion_manager.ts](/Users/sorenschmidt/code/streams/src/search/companion_manager.ts) +[src/search/companion_manager.ts](../src/search/companion_manager.ts) was changed in two important ways: 1. Single-pass section building @@ -61,7 +61,7 @@ Additional observability was also added: ## Targeted Regression Coverage The new targeted regression test is in -[test/companion_backfill.test.ts](/Users/sorenschmidt/code/streams/test/companion_backfill.test.ts). +[test/companion_backfill.test.ts](../test/companion_backfill.test.ts). It exercises a GH Archive-like large-text schema and verifies that a bundled companion build yields back to the event loop before it finishes: diff --git a/docs/durable-streams-spec.md b/docs/durable-streams-spec.md index 75c1120..25da70b 100644 --- a/docs/durable-streams-spec.md +++ b/docs/durable-streams-spec.md @@ -507,8 +507,8 @@ Rules: changed and the exact family has not rebuilt for that config yet - `bundled_companions` reports current `.cix` coverage for the desired companion plan generation -- `search_families` covers bundled companion sections such as `col`, `fts`, - `agg`, and `mblk` +- `search_families` covers bundled companion sections such as `exact`, `col`, + `fts`, `agg`, and `mblk` - `manifest.last_uploaded_size_bytes` is the uploaded manifest object size as a string when known - `routing_key_index`, `routing_key_lexicon`, each `exact_indexes[*]`, @@ -554,7 +554,7 @@ Rules: - segment and index caches are seeded on first read of a remote object; they may increase even when the request was initiated by a read-only UI flow - `storage.companion_families` splits bundled companion bytes by section family - (`col`, `fts`, `agg`, `mblk`) + (`exact`, `col`, `fts`, `agg`, `mblk`) - `object_store_requests` reports node-local per-stream object-store request counters, split into puts and reads, plus a per-artifact breakdown - this is the supported combined descriptor endpoint for stream-management UIs @@ -757,6 +757,10 @@ Current request fields: - `size` - `search_after` - `sort` + - optional sort list + - when omitted, non-scoring filter queries sort by `offset:desc` + - when omitted, scoring text queries sort by `_score:desc`, then the + primary timestamp field, then `offset:desc` - `timeout_ms` - optional lower per-request budget - server-side effective timeout is always clamped to `<= 3000 ms` @@ -804,6 +808,12 @@ Current search response headers: - `search-scanned-tail-docs` - `search-scanned-tail-time-ms` - `search-exact-candidate-time-ms` +- `search-candidate-doc-ids` +- `search-decoded-records` +- `search-json-parse-time-ms` +- `search-segment-payload-bytes-fetched` +- `search-sort-time-ms` +- `search-peak-hits-held` - `search-index-families-used` Current search coverage fields: @@ -828,6 +838,12 @@ Current search coverage fields: - `scanned_tail_docs` - `scanned_tail_time_ms` - `exact_candidate_time_ms` +- `candidate_doc_ids` +- `decoded_records` +- `json_parse_time_ms` +- `segment_payload_bytes_fetched` +- `sort_time_ms` +- `peak_hits_held` - `index_families_used` Current query support: diff --git a/docs/gharchive-demo.md b/docs/gharchive-demo.md index 3005379..09183cc 100644 --- a/docs/gharchive-demo.md +++ b/docs/gharchive-demo.md @@ -132,9 +132,9 @@ For the `all` range, the demo intentionally uses smaller append batches by default so the workload does not amplify the server's append-path JSON materialization cost on low-memory hosts. On 1–2 GiB auto-tuned servers, the runtime also clamps upload and bundled-companion backfill to single-lane -settings. Segment geometry stays at the normal `16 MiB` / `100,000`-row seal -thresholds so long-range ingest is not dominated by many tiny uploaded segment -objects. +settings. The `1024 MiB` and smaller presets also use `8 MiB` / `50,000`-row +segment geometry so one cut does not transiently hold a large encoded working +set on memory-clamped hosts. The `all` range now starts at `2020-01-01 10:00 UTC`, not at the earliest GH Archive history. This is intentional: diff --git a/docs/index.md b/docs/index.md index 874f6cb..c9d39d3 100644 --- a/docs/index.md +++ b/docs/index.md @@ -8,6 +8,8 @@ and tooling. - [overview.md](./overview.md) — product overview, quick start, package surfaces, and the main documentation map +- [compute-demo.md](./compute-demo.md) — Compute deployment that layers `/studio` and `/generate` + on top of the Streams server - [live.md](./live.md) — end-to-end guide for the live / touch system and the `/touch/*` APIs - [live-query-invalidation.md](./live-query-invalidation.md) — SQL query-family matrix for @@ -30,7 +32,8 @@ and tooling. - [sqlite-schema.md](./sqlite-schema.md) — SQLite schema, invariants, and migration expectations - [architecture.md](./architecture.md) — system architecture and data flow - [tiered-index.md](./tiered-index.md) — tiered routing-key index design -- [indexing-architecture.md](./indexing-architecture.md) — current exact + `.col` + `.fts` search indexing model +- [indexing-architecture.md](./indexing-architecture.md) — current exact + `.col` + `.fts` + `.agg` + `.mblk` search indexing model +- [aspirational-indexing-architecture.md](./aspirational-indexing-architecture.md) — long-term target indexing architecture, not the shipped model - [storage-layout-architecture.md](./storage-layout-architecture.md) — `PSCIX2` bundled companion storage layout and per-family binary section codecs - [bundled-companion-and-backfill.md](./bundled-companion-and-backfill.md) — bundled `.cix` companions and async backfill for existing streams - [low-latency-reads-under-ingest.md](./low-latency-reads-under-ingest.md) — future architecture for stable `/_search` and `/_aggregate` latency under heavy ingest @@ -45,6 +48,8 @@ and tooling. ## Operations - [operational-notes.md](./operational-notes.md) — tuning knobs and stall diagnosis +- [bun-memory-risk.md](./bun-memory-risk.md) — repository policy for risky Bun + body, file, and S3 APIs that can retain anon RSS - [memory-assumption.md](./memory-assumption.md) — ranked working assumptions for explaining RSS growth from the current memory observability surfaces - [recovery-integrity-runbook.md](./recovery-integrity-runbook.md) — recovery steps and correctness checks - [segment-performance.md](./segment-performance.md) — segment read-path performance notes diff --git a/docs/indexing-architecture.md b/docs/indexing-architecture.md index 0bd236a..ca3da3d 100644 --- a/docs/indexing-architecture.md +++ b/docs/indexing-architecture.md @@ -18,6 +18,8 @@ Prisma Streams now ships these indexing layers: - the existing exact-match secondary index family, now treated as an internal accelerator derived from schema `search.fields` - a bundled per-segment `PSCIX2` companion container (`.cix`) +- a plan-relative binary `exact` section family inside `.cix` for doc-level + exact-value postings - a plan-relative binary `col` section family inside `.cix` for typed equality, range, and existence - a plan-relative binary `fts` section family inside `.cix` for keyword exact/prefix and text @@ -29,7 +31,14 @@ Prisma Streams now ships these indexing layers: Bundled companion reads now use a local immutable `.cix` cache plus `Bun.mmap()` over the cached file. The object-store read unit is therefore the full bundled companion on first access, while the runtime decode unit remains -the requested family section. +the requested family section. Decoded companion sections are also cached in +memory by object key, plan generation, and section kind within the +`DS_SEARCH_COMPANION_SECTION_CACHE_BYTES` budget. + +Explicit primary timestamp sorts use companion segment timestamp bounds for a +top-k plan: sealed segments with known bounds are visited in likely result +order, only the requested page of best hits is retained, and remaining segments +are skipped once their timestamp range cannot beat the current kth hit. The public schema model is **`search`**, not `indexes[]`. @@ -520,13 +529,13 @@ bundled companion plan changes. Current bundled-companion rules: - each uploaded segment may have one current `.cix` -- the `.cix` may contain any subset of `col`, `fts`, `agg`, and `mblk` +- the `.cix` may contain any subset of `exact`, `col`, `fts`, `agg`, and `mblk` - the desired bundled companion plan is hashed and versioned per stream - bundled companions use the binary `PSCIX2` container with a fixed header and fixed section table - each bundled companion build loads one segment and builds enabled families - sequentially, so `col`, `fts`, `agg`, and `mblk` do not keep their heaviest - in-memory state live at the same time + sequentially, so `exact`, `col`, `fts`, `agg`, and `mblk` do not keep their + heaviest in-memory state live at the same time - family payloads are plan-relative and do not repeat field or rollup names - query-time companion reads cache raw `.cix` bytes plus the parsed section table and decode only the requested section family on demand @@ -630,11 +639,20 @@ Current search coverage fields: - `possible_missing_wal_rows` - `indexed_segments` - `indexed_segment_time_ms` +- `fts_section_get_ms` +- `fts_decode_ms` +- `fts_clause_estimate_ms` - `scanned_segments` - `scanned_segment_time_ms` - `scanned_tail_docs` - `scanned_tail_time_ms` - `exact_candidate_time_ms` +- `candidate_doc_ids` +- `decoded_records` +- `json_parse_time_ms` +- `segment_payload_bytes_fetched` +- `sort_time_ms` +- `peak_hits_held` - `index_families_used` Current query support: @@ -651,6 +669,14 @@ Current query support: Current candidate-planning behavior: +- exact-equality clauses use bundled `.exact` doc-id postings when available + before intersecting with `.col` and `.fts` candidates +- for append-order reverse search, a non-empty per-segment candidate doc-id set + is walked directly in offset order, and only blocks containing candidate hits + are decoded +- remote candidate-doc searches range-read the segment footer and matching + compressed blocks instead of fetching the full segment object when the DSB3 + footer is available - fielded exact keyword clauses still use the internal exact family first for sealed-history segment pruning when that family is available - if a keyword field is also present in bundled `.fts` because it enables @@ -659,6 +685,9 @@ Current candidate-planning behavior: - positive `.fts` clauses are evaluated in estimated-selectivity order, and later clauses are checked against the current candidate doc-id set instead of materializing every clause against the whole segment +- quiet WAL-tail exact clauses use a per-reader in-memory exact postings cache + for the visible tail range, so repeated exact tail lookups fetch only matching + WAL rows instead of scanning the whole tail again Current non-support: @@ -749,7 +778,7 @@ endpoints: - routing-key lexicon status - internal exact-index status, including stale-config detection - bundled companion object coverage -- `col`, `fts`, `agg`, and `mblk` family progress derived from bundled +- `exact`, `col`, `fts`, `agg`, and `mblk` family progress derived from bundled companion sections Current exact-index scheduling: @@ -840,8 +869,8 @@ The intended planner order for metrics streams is: The long-term design doc is still directionally correct, but the current system ships a smaller subset: -- `.col` and `.fts` are per-segment companions only; there are no compacted - `.col`, `.fts`, or `.agg` runs yet +- `.exact`, `.col`, and `.fts` are per-segment companions only; there are no + compacted `.exact`, `.col`, `.fts`, or `.agg` runs yet - `.sub` is not implemented - `_search` does not ship snippets - current text scoring is query-time text scoring over the source records; it is diff --git a/docs/metrics.md b/docs/metrics.md index c7685de..a36c86b 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -221,7 +221,7 @@ series tables. ### Important Current Limitation The internal emitter still maintains an in-memory per-series map for the flush -interval in [src/metrics.ts](/Users/sorenschmidt/code/streams/src/metrics.ts). +interval in [src/metrics.ts](../src/metrics.ts). So the shipped system improves **storage and query-path cardinality behavior** more than **ingest-path cardinality behavior**. @@ -346,6 +346,17 @@ This implementation emits interval summaries for: - `tieredstore.auto_tune.preset_mb` - `tieredstore.auto_tune.effective_memory_limit_mb` +### Object store + +- `tieredstore.objectstore.put.latency` +- `tieredstore.objectstore.get.latency` +- `tieredstore.objectstore.head.latency` +- `tieredstore.objectstore.delete.latency` +- `tieredstore.objectstore.list.latency` + - tags: + - `artifact=manifest|schema_registry|routing_index|routing_key_lexicon|exact_index|segment|bundled_companion|stream_catalog|meta|unknown` + - `outcome=ok|miss|error` + ### Append and read throughput - `tieredstore.append.bytes` diff --git a/docs/operational-notes.md b/docs/operational-notes.md index 182d0c6..ed5e263 100644 --- a/docs/operational-notes.md +++ b/docs/operational-notes.md @@ -9,27 +9,33 @@ runtime overview and command surface, see `overview.md`. - `DS_ROOT`: data directory (default `./ds-data`) - `DS_DB_PATH`: SQLite file path (default `${DS_ROOT}/wal.sqlite`) -- `DS_SEGMENT_MAX_BYTES`: segment seal threshold (default 16 MiB; auto-tune preserves 16 MiB on every preset) +- `DS_SEGMENT_MAX_BYTES`: segment seal threshold (default 16 MiB; auto-tune uses 8 MiB through the `1024 MiB` preset and 16 MiB above that) - `DS_BLOCK_MAX_BYTES`: max uncompressed bytes per DSB3 block (default 256 KiB) -- `DS_SEGMENT_TARGET_ROWS`: segment seal threshold by row count (default 100k; auto-tune preserves 100k rows on every preset) +- `DS_SEGMENT_TARGET_ROWS`: segment seal threshold by row count (default 100k; auto-tune uses 50k rows through the `1024 MiB` preset and 100k rows above that) - `DS_SEGMENT_MAX_INTERVAL_MS`: max time between segment cuts (default 0; 0 disables time-based sealing) - `DS_SEGMENT_CHECK_MS`: segmenter tick interval (default 250ms) -- `DS_SEGMENTER_WORKERS`: background segmenter worker threads (default 0; auto-tune uses `1` on 1–2 GiB presets) +- `DS_SEGMENTER_WORKERS`: background segmenter worker threads (default 0; auto-tune keeps segmenting in-process through `1024 MiB`, then uses `1` at `2048 MiB`, `2` at `4096 MiB`, and `4` at `8192 MiB`) - `DS_UPLOAD_CHECK_MS`: uploader tick interval (default 250ms) -- `DS_UPLOAD_CONCURRENCY`: max concurrent uploads (default 4; auto-tune uses `2` on 1–2 GiB presets) +- `DS_UPLOAD_CONCURRENCY`: max concurrent uploads (default 4; auto-tune uses `1` through `1024 MiB`, `2` at `2048 MiB`, `4` at `4096 MiB`, and `8` at `8192 MiB`) - `DS_BASE_WAL_GC_CHUNK_OFFSETS`: max base-WAL rows deleted per GC sweep/manifest commit transaction (default 1,000,000) - `DS_BASE_WAL_GC_INTERVAL_MS`: minimum delay between touch-manager base-WAL GC sweeps per stream (default 1000ms) -- `DS_SEGMENT_CACHE_MAX_BYTES`: on-disk segment cache cap (default 256 MiB) +- `DS_SEGMENT_CACHE_MAX_BYTES`: on-disk segment cache cap (default 256 MiB; auto-tune disables it through the `1024 MiB` preset, then uses `256 MiB` at `2048 MiB` and above) - `DS_INDEX_L0_SPAN`: segments per L0 index run (default 16) - `DS_INDEX_BUILD_CONCURRENCY`: max parallel async segment-processing tasks inside one exact-family run build (default 4; in-process, not worker threads; auto-tune uses `1` on 1–2 GiB presets) -- `DS_INDEX_CHECK_MS`: in-process tick interval for the routing-key, exact secondary, `.col`, `.fts`, and `.agg` index managers (default 1000ms) +- `DS_INDEX_CHECK_MS`: in-process periodic sweep interval for the routing-key, + exact secondary, `.exact`, `.col`, `.fts`, and `.agg` index managers + (default 1000ms; auto-tune defers periodic sweeps to `3600000ms` through the + `1024 MiB` preset, then uses `1000ms` at `2048 MiB` and above). Segment + uploads and schema/profile changes also enqueue the affected stream and wake + the background managers promptly, so the sweep interval is not the normal + freshness target for known changed streams. - `DS_SEARCH_COMPANION_BATCH_SEGMENTS`: uploaded stale segments rebuilt per bundled-companion pass before the manager yields and republishes the manifest (default 4; auto-tune uses `1` on 1–2 GiB presets) - `DS_SEARCH_COMPANION_YIELD_BLOCKS`: decoded segment blocks processed by one bundled-companion build before it yields back to the event loop (default 4; auto-tune uses `1` on 1–2 GiB presets) -- `DS_SEARCH_COMPANION_FILE_CACHE_MAX_BYTES`: on-disk bundled-companion cache cap for local immutable `.cix` files under `${DS_ROOT}/cache/companions` (default 512 MiB, scaled up on larger backlog settings and capped at 4 GiB) +- `DS_SEARCH_COMPANION_FILE_CACHE_MAX_BYTES`: on-disk bundled-companion cache cap for local immutable `.cix` files under `${DS_ROOT}/cache/companions` (default is derived from `DS_LOCAL_BACKLOG_MAX_BYTES`: 10% of backlog, at least 512 MiB, clamped to 256 MiB..4 GiB; with the default 10 GiB backlog this is 1 GiB) - `DS_SEARCH_COMPANION_FILE_CACHE_MAX_AGE_MS`: maximum age for cached `.cix` files before startup/admission pruning retires them (default 24h) - `DS_SEARCH_COMPANION_MMAP_CACHE_ENTRIES`: hot mmap-backed companion bundles retained by the process (default 64) - `DS_SEARCH_COMPANION_TOC_CACHE_BYTES`: in-memory TOC cache for bundled companions (default 1 MiB unless auto-tune raises it) -- `DS_SEARCH_COMPANION_SECTION_CACHE_BYTES`: in-memory raw section-byte cache for bundled companions (default 16 MiB unless auto-tune raises it) +- `DS_SEARCH_COMPANION_SECTION_CACHE_BYTES`: in-memory raw section-byte cache for bundled companions (default 32 MiB when no memory limit is set; otherwise 2% of memory limit, clamped to 8 MiB..128 MiB; auto-tune sets this from the selected preset) - `DS_INDEX_RUN_CACHE_MAX_BYTES`: on-disk index-run cache cap (default 256 MiB) - `DS_INDEX_RUN_MEM_CACHE_BYTES`: in-memory index-run cache cap (default 64 MiB, auto-tuned when memory limit is set) - `DS_LEXICON_INDEX_CACHE_MAX_BYTES`: on-disk lexicon-run cache cap for local immutable `.lex` files under `${DS_ROOT}/cache/lexicon` (default derived from memory limit; auto-tune uses 8–64 MiB on 256–2048 MiB presets) @@ -67,18 +73,29 @@ Concurrency/load-shedding note: - when over the configured threshold, it reduces `DS_SEARCH_CONCURRENCY` and `DS_ASYNC_INDEX_CONCURRENCY` to `max(1, ceil(base/2))` - it never reduces ingest or read concurrency - While over the threshold, the sampler also rate-limits best-effort `Bun.gc()` calls and optional heap snapshots. +- On `1024 MiB` and smaller memory limits, append responses include + `Connection: close` and the append path performs throttled post-append GC + after request-body buffers become unreachable. This avoids Bun retaining + native request-body buffers across a long keep-alive ingestion connection. +- On those same low-memory presets, segment upload/schema/profile enqueue + signals do not immediately wake routing, secondary, lexicon, or companion + index builders when the index check interval is long. The queued work waits + for a short foreground-quiet window, then runs after a bounded deferral even + if a continuous trickle of writes keeps the server from going fully quiet. + This avoids companion/index backfill overlap with sustained ingest bursts + without starving background catch-up on always-active streams. - `GET /v1/server/_details` exposes the configured cache / concurrency budgets, selected auto-tune preset, and the node's current effective runtime state. - When `DS_MEMORY_SAMPLER_PATH` is enabled, each sampler record now also - includes `memory_subsystems`, which mirrors the grouped runtime memory - breakdown exposed by `GET /v1/server/_details`. + includes `linux_status_rss` on Linux and `memory_subsystems`, which mirrors + the grouped runtime memory breakdown exposed by `GET /v1/server/_details`. Companion-cache note: - Bundled companion reads now fetch the full remote `.cix` object once, store it locally, and mmap the local cached file. - Because Bun does not currently expose an explicit unmap primitive, a companion file that has been mmapped by the running process is treated as pinned until process restart. - Startup pruning and new cache admissions retire stale or oldest unmmapped companion files first; if the hot mapped set alone exceeds the disk budget, the process may temporarily sit above the configured cache cap until restart. - The auto-tuned 1–2 GiB presets also force companion rebuilding into one-segment / one-yield-block passes so aggregate-heavy `.cix` generation does not overlap too aggressively with append, segment cut, and upload work. -- The auto-tuned 1–2 GiB presets keep the same 16 MiB / 100k-row segment geometry as larger hosts. Only concurrency and cache budgets shrink on those presets. +- The auto-tuned presets through `1024 MiB` use 8 MiB / 50k-row segment geometry so segment build/compression does not transiently hold one large encoded cut unit on memory-clamped hosts. - Routing-key lexicon reads now use the same local immutable-file pattern: - freshly built `.lex` runs are seeded into `${DS_ROOT}/cache/lexicon` - first read of an uncached `.lex` downloads the full object once, stores it locally, and then serves it from `Bun.mmap()` @@ -108,6 +125,20 @@ Companion-cache note: MockR2 env vars (only when using `--object-store local`): - `DS_MOCK_R2_MAX_INMEM_BYTES` / `DS_MOCK_R2_MAX_INMEM_MB` - `DS_MOCK_R2_SPILL_DIR` +- `DS_MOCK_R2_PUT_DELAY_MS`, `DS_MOCK_R2_GET_DELAY_MS`, + `DS_MOCK_R2_HEAD_DELAY_MS`, and `DS_MOCK_R2_LIST_DELAY_MS`: inject + operation latency for local object-store stress tests. + +R2-compatible local stress tests can set `DURABLE_STREAMS_R2_ENDPOINT` and +`DURABLE_STREAMS_R2_REGION` to point the R2 object-store implementation at a +local S3-compatible service such as MinIO. + +The R2 object-store path uses signed `fetch()` requests instead of Bun's native +`S3Client`. It streams file uploads from Node file streams and reads response +bodies through the stream reader instead of `Response.arrayBuffer()`. Local +1 GiB stress tests showed the native S3 path retaining high anon RSS during +ingest and companion upload, while the fetch path returned near baseline after +the same work completed. Indexing note: - Full mode runs indexing in the server process via background timer loops. @@ -159,21 +190,25 @@ If you need to cap memory, set SQLite `cache_size` manually at startup. - `DS_READ_MAX_BYTES=1–4MiB` - SQLite `cache_size` around 128–256 MiB - Worker SQLite caches around 16–32 MiB each -- On hosts near the low end of this range, prefer `DS_SEGMENTER_WORKERS=1`, - `DS_UPLOAD_CONCURRENCY=2`, and `DS_SEARCH_COMPANION_BATCH_SEGMENTS=1` while - keeping the default 16 MiB / 100k-row segment geometry. +- On hosts near the low end of this range, prefer `DS_SEGMENTER_WORKERS=0`, + `DS_UPLOAD_CONCURRENCY=2`, and `DS_SEARCH_COMPANION_BATCH_SEGMENTS=1`. Segment geometry across presets: -- all auto-tune presets seal at `16 MiB` or `100,000` rows, whichever is reached first +- auto-tune presets through `1024 MiB` seal at `8 MiB` or `50,000` rows, + whichever is reached first +- auto-tune presets at `2048 MiB` and above seal at `16 MiB` or `100,000` + rows, whichever is reached first - smaller presets reduce overlap and memory pressure by lowering queue sizes, - worker counts, and background concurrency instead of emitting many more - smaller segment objects + worker counts, background concurrency, and cut-unit size - segmenting also uses a cheap trailing compression heuristic over the latest `8` sealed segments for the same stream: - it sums each segment's stored `payload_bytes` and compressed `size_bytes` - if recent compression is stronger than `2:1`, it raises the logical byte seal target so the next segment aims for at least `50%` of `DS_SEGMENT_MAX_BYTES` after compression + - the boost is capped at `5x` the base byte target, which is equivalent to + treating anything stronger than `10:1` compression as `10:1` for seal + sizing - this never lowers the target below `DS_SEGMENT_MAX_BYTES` - cut eligibility uses the same raised byte target, so the segmenter waits for enough logical backlog instead of starting immediately at the base @@ -186,23 +221,29 @@ Segment geometry across presets: presets: - `256 MiB`: - - caches: SQLite `16 MiB`, worker SQLite `8 MiB`, index-run memory `4 MiB`, lexicon cache `8 MiB`, companion TOC `1 MiB`, companion section `8 MiB` - - concurrency: ingest `1`, read `2`, search `1`, async index `1`, uploads `1`, segmenter workers `1` + - segment geometry: `8 MiB` / `50,000` rows + - caches: SQLite `16 MiB`, worker SQLite `8 MiB`, segment cache `0`, index-run memory `4 MiB`, lexicon cache `8 MiB`, companion TOC `1 MiB`, companion section `8 MiB` + - concurrency: ingest `1`, read `2`, search `1`, async index `1`, uploads `1`, segmenter workers `0`; index checks every `3600000ms` - `512 MiB`: - - caches: SQLite `32 MiB`, worker SQLite `8 MiB`, index-run memory `8 MiB`, lexicon cache `16 MiB`, companion TOC `1 MiB`, companion section `8 MiB` - - concurrency: ingest `1`, read `2`, search `1`, async index `1`, uploads `1`, segmenter workers `1` + - segment geometry: `8 MiB` / `50,000` rows + - caches: SQLite `32 MiB`, worker SQLite `8 MiB`, segment cache `0`, index-run memory `8 MiB`, lexicon cache `16 MiB`, companion TOC `1 MiB`, companion section `8 MiB` + - concurrency: ingest `1`, read `2`, search `1`, async index `1`, uploads `1`, segmenter workers `0`; index checks every `3600000ms` - `1024 MiB`: - - caches: SQLite `64 MiB`, worker SQLite `8 MiB`, index-run memory `16 MiB`, lexicon cache `32 MiB`, companion TOC `1 MiB`, companion section `16 MiB` - - concurrency: ingest `2`, read `4`, search `2`, async index `1`, uploads `2`, segmenter workers `1` + - segment geometry: `8 MiB` / `50,000` rows + - caches: SQLite `64 MiB`, worker SQLite `8 MiB`, segment cache `0`, index-run memory `16 MiB`, lexicon cache `32 MiB`, companion TOC `1 MiB`, companion section `16 MiB` + - concurrency: ingest `2`, read `4`, search `2`, async index `1`, uploads `1`, segmenter workers `0`; index checks every `3600000ms` - `2048 MiB`: - - caches: SQLite `128 MiB`, worker SQLite `16 MiB`, index-run memory `32 MiB`, lexicon cache `64 MiB`, companion TOC `1 MiB`, companion section `32 MiB` - - concurrency: ingest `2`, read `4`, search `2`, async index `1`, uploads `2`, segmenter workers `1` + - segment geometry: `16 MiB` / `100,000` rows + - caches: SQLite `128 MiB`, worker SQLite `16 MiB`, segment cache `256 MiB`, index-run memory `32 MiB`, lexicon cache `64 MiB`, companion TOC `1 MiB`, companion section `32 MiB` + - concurrency: ingest `2`, read `4`, search `2`, async index `1`, uploads `2`, segmenter workers `1`; index checks every `1000ms` - `4096 MiB`: - - caches: SQLite `256 MiB`, worker SQLite `32 MiB`, index-run memory `64 MiB`, lexicon cache `128 MiB`, companion TOC `2 MiB`, companion section `64 MiB` - - concurrency: ingest `4`, read `8`, search `4`, async index `2`, uploads `4`, segmenter workers `2` + - segment geometry: `16 MiB` / `100,000` rows + - caches: SQLite `256 MiB`, worker SQLite `32 MiB`, segment cache `256 MiB`, index-run memory `64 MiB`, lexicon cache `128 MiB`, companion TOC `2 MiB`, companion section `64 MiB` + - concurrency: ingest `4`, read `8`, search `4`, async index `2`, uploads `4`, segmenter workers `2`; index checks every `1000ms` - `8192 MiB`: - - caches: SQLite `512 MiB`, worker SQLite `32 MiB`, index-run memory `128 MiB`, lexicon cache `256 MiB`, companion TOC `4 MiB`, companion section `128 MiB` - - concurrency: ingest `8`, read `16`, search `8`, async index `4`, uploads `8`, segmenter workers `4` + - segment geometry: `16 MiB` / `100,000` rows + - caches: SQLite `512 MiB`, worker SQLite `32 MiB`, segment cache `256 MiB`, index-run memory `128 MiB`, lexicon cache `256 MiB`, companion TOC `4 MiB`, companion section `128 MiB` + - concurrency: ingest `8`, read `16`, search `8`, async index `4`, uploads `8`, segmenter workers `4`; index checks every `1000ms` ## Diagnosing stalls @@ -220,7 +261,9 @@ When throughput drops, check in this order: 3) Upload backlog (segments stuck locally) - Increase `DS_UPLOAD_CONCURRENCY` if network allows. -- Check object store latency and error rates. +- Check `tieredstore.objectstore.put.latency`, `tieredstore.objectstore.get.latency`, + and the related `head` / `delete` / `list` latency metrics in + `__stream_metrics__`, plus object-store error rates. - Inspect `GET /v1/server/_details` and `tieredstore.upload.pending_segments`. - Remember the uploader preserves a contiguous uploaded prefix per stream: - it always retries the earliest missing segment for that stream first @@ -236,9 +279,9 @@ When throughput drops, check in this order: `DS_SEARCH_COMPANION_YIELD_BLOCKS` if backfill is making the server feel sluggish under large `.fts` fields. - The internal `__stream_metrics__` system stream no longer builds routing, - lexicon, exact, `.col`, `.fts`, `.agg`, or `.mblk` families. If you still - see heavy bundled companion work after restart, look for user streams rather - than self-indexing on the internal metrics stream. + lexicon, exact secondary, `.exact`, `.col`, `.fts`, `.agg`, or `.mblk` + families. If you still see heavy bundled companion work after restart, look + for user streams rather than self-indexing on the internal metrics stream. 5) SQLite write stalls - Ensure the DB is on fast local SSD. diff --git a/docs/overview.md b/docs/overview.md index 738fac0..eea2f44 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -70,7 +70,7 @@ See [security.md](./security.md) and [auth.md](./auth.md). ## Prerequisites -- Bun `>=1.3.11` for the full self-hosted server and repository workflows +- Bun `>=1.3.6` for the full self-hosted server and repository workflows - Bun `>=1.2.0` or Node.js `>=22` for the published `@prisma/streams-local` package ## Quick Start @@ -93,6 +93,8 @@ bun run src/local/cli.ts reset --name default Notes: - Full server startup requires `--object-store local|r2`. +- Prisma Compute deployments should use `src/compute/entry.ts`, which injects `--object-store r2` for the server entrypoint and `--auto-tune` when `DS_MEMORY_LIMIT_MB` is set. +- If Compute needs worker-thread entrypoints such as `segmenter_worker.ts` or `processor_worker.ts`, prebuild the bundle with `bun run build:compute-bundle` and deploy that artifact with `--skip-build`. The Compute CLI's built-in Bun strategy only bundles the explicit entrypoint file. - Full mode binds to `127.0.0.1` by default. Set `DS_HOST=0.0.0.0` if you intentionally want a non-loopback bind inside a trusted network boundary. - Local mode is designed for development and Prisma CLI integration, not hostile-network deployment. - The default local data root remains under `envPaths("prisma-dev").data/durable-streams/` for compatibility with the Prisma development workflow. @@ -178,6 +180,72 @@ DURABLE_STREAMS_R2_SECRET_ACCESS_KEY=your-secret \ bun run src/server.ts --object-store r2 ``` +Prisma Compute: + +```bash +DS_HOST=0.0.0.0 \ +DS_ROOT=/mnt/app/prisma-streams \ + DS_MEMORY_LIMIT_MB=1024 \ +DURABLE_STREAMS_R2_BUCKET=your-bucket \ +DURABLE_STREAMS_R2_ACCOUNT_ID=your-account-id \ +DURABLE_STREAMS_R2_ACCESS_KEY_ID=your-access-key \ +DURABLE_STREAMS_R2_SECRET_ACCESS_KEY=your-secret \ + bun run src/compute/entry.ts +``` + +Use an explicit `DS_ROOT` under `/mnt/app` on Prisma Compute. Paths under +`/tmp` or other ephemeral filesystem locations are lost on crash, restart, and +VM replacement, which drops the local SQLite catalog and caches. + +Prebuild for Prisma Compute deployments that need worker threads: + +```bash +bun run build:compute-bundle + +PRISMA_API_TOKEN=... \ + bunx @prisma/compute-cli compute deploy \ + --service your-service-id \ + --path .compute-build/bundle \ + --entrypoint compute/entry.js \ + --skip-build +``` + +`src/compute/entry.ts` always injects `--object-store r2`. When +`DS_MEMORY_LIMIT_MB` is set, it also injects `--auto-tune`, so Compute +deployments can use the standard memory preset without a separate argv channel. + +Compute demo deployment with Studio and the evlog generator: + +```bash +bun run build:compute-demo-bundle + +PRISMA_API_TOKEN=... \ + bunx @prisma/compute-cli deploy \ + --service your-service-id \ + --skip-build \ + --path .compute-demo-build/bundle \ + --entrypoint compute/demo_entry.js +``` + +That artifact starts the normal Streams server plus: + +- `/studio` for the streams-only Prisma Studio UI +- `/generate` for a bulk evlog ingest page with a stream-name field defaulting + to `demo-app` and `1k`, `10k`, and `100k` actions + +See [compute-demo.md](./compute-demo.md). + +Compute verification demo: + +```bash +bun run demo:compute-verify --url https://your-service.cdg.prisma.build +``` + +The Compute verification workload uses large mixed-entropy binary rows so +segmenting still cuts by `DS_SEGMENT_MAX_BYTES` under the compression-aware +seal heuristic. That avoids needing a tiny `DS_SEGMENT_TARGET_ROWS` override +just to force cuts for an overly compressible demo payload. + ## Development Commands ```bash diff --git a/docs/schemas.md b/docs/schemas.md index ba85785..9793f22 100644 --- a/docs/schemas.md +++ b/docs/schemas.md @@ -168,8 +168,7 @@ If `routingKey` is configured: Schemas do not define: -- whether a stream is `generic`, `queue`, `evlog`, or `state-protocol` -- profile-owned endpoints +- whether a stream is `generic`, `evlog`, `metrics`, or `state-protocol` - profile-owned endpoints or runtime hooks Schemas do define payload-owned field extraction, including routing keys and diff --git a/docs/segment-performance.md b/docs/segment-performance.md index 6b34703..ba16019 100644 --- a/docs/segment-performance.md +++ b/docs/segment-performance.md @@ -4,14 +4,18 @@ Status: **informational**. This repo does not ship a dedicated segment perf tool Current behavior: -- Segment sealing uses a fixed `16 MiB` / `100,000`-row geometry across - auto-tune presets. +- Segment sealing defaults to `16 MiB` / `100,000` rows. Auto-tune presets + through `1024 MiB` use `8 MiB` / `50,000` rows to keep segment build memory + bounded on small hosts. - The segmenter keeps a trailing window over the latest `8` sealed segments for the same stream and computes a cheap compressed/logical ratio from stored metadata (`size_bytes` / `payload_bytes`). - If recent segments compressed below `50%` of the logical target, the next segment's logical byte target is raised so the expected compressed segment reaches at least `50%` of `DS_SEGMENT_MAX_BYTES`. +- The boost is capped at `5x` the base logical target, which is equivalent to + treating anything stronger than `10:1` compression as `10:1` for this + heuristic. - This heuristic is best-effort: - it never reduces the logical byte target below `DS_SEGMENT_MAX_BYTES` - cut eligibility uses the same raised logical-byte target, so the segmenter @@ -55,6 +59,18 @@ Current behavior: - `_search` uses the same planning idea when exact clauses provide a candidate segment set: it scans only candidate indexed segments plus any uncovered tail, instead of iterating the full indexed sealed prefix one segment at a time. +- `_search` append-order reverse scans (`sort: ["offset:desc"]`) use the + segment footer's block index to decode blocks newest-to-oldest and can stop + after the requested page is full. They no longer decode every block in the + segment before walking records backward. +- When bundled companions produce per-segment candidate doc IDs for an + append-order reverse search, `_search` walks those doc IDs newest-to-oldest + and decodes only the blocks containing candidate hits. Broad candidates still + behave like normal newest-first scans, but rare exact candidates no longer + force every intervening block to decode. +- For remote segments on that candidate-doc path, `_search` range-reads the + segment footer and only the needed compressed data blocks. It falls back to + the full segment source only when the footer is unavailable. - Background routing, lexicon, exact, and bundled-companion builders now yield at bounded per-record or per-block intervals and slow down further while a foreground read or search is active. That keeps hot keyed reads fast even diff --git a/docs/sqlite-schema.md b/docs/sqlite-schema.md index b633bc2..855262d 100644 --- a/docs/sqlite-schema.md +++ b/docs/sqlite-schema.md @@ -90,11 +90,10 @@ Indexes: - `(segment_in_progress, pending_bytes, last_segment_cut_ms)` Invariants: -- `0 <= uploaded_through <= sealed_through <= next_offset` +- `uploaded_through <= sealed_through < next_offset` for non-empty streams; new empty streams start with `sealed_through=-1`, `uploaded_through=-1`, and `next_offset=0` - `0 <= uploaded_segment_count <= segment_count` (see `stream_segment_meta`) -- `profile IS NULL` means the stream has no explicit declaration and is treated - as a `generic` stream -- `pending_bytes` and `pending_rows` reflect WAL rows with `offset >= sealed_through` (or `>= uploaded_through`, depending on design); pick one and enforce consistently. +- `profile IS NULL` or `profile='generic'` is treated as a `generic` stream; current stream creation stores `generic` +- `pending_bytes` and `pending_rows` reflect unsealed WAL rows with `offset > sealed_through`; sealing a segment decrements these counters - `logical_size_bytes` is the logical payload-byte size exposed by `/_details`; it is updated on append, restored from manifests for published history, and can be repaired asynchronously after bootstrap if missing. diff --git a/docs/stream-profiles.md b/docs/stream-profiles.md index 88cf238..5807ca5 100644 --- a/docs/stream-profiles.md +++ b/docs/stream-profiles.md @@ -47,10 +47,6 @@ Current built-ins: - `metrics` - `state-protocol` -Planned next built-ins: - -- `queue` - ## `generic` `generic` is the baseline meaning of “plain durable stream”. @@ -327,7 +323,8 @@ cache. The stream metadata stores the profile metadata. -- `NULL` means “no explicit declaration” +- `NULL` means “no explicit declaration”; current stream creation stores + `generic`, but readers still treat `NULL` as `generic` - `streams.profile` stores the profile kind - `stream_profiles.profile_json` stores non-generic profile configuration - if no profile is explicitly declared, the stream is treated as `generic` @@ -336,7 +333,7 @@ This keeps storage simple and avoids inventing a second metadata layer. ## Future Profiles -Additional profiles such as `queue` should follow the same rules: +Additional future profiles should follow the same rules: - the stream remains the same durable append-only storage object - the profile defines semantic meaning and profile-owned behavior diff --git a/docs/ui-search-integration.md b/docs/ui-search-integration.md index ce51028..8ea231c 100644 --- a/docs/ui-search-integration.md +++ b/docs/ui-search-integration.md @@ -122,6 +122,12 @@ Timed-out search responses include: - `search-scanned-tail-docs` - `search-scanned-tail-time-ms` - `search-exact-candidate-time-ms` + - `search-candidate-doc-ids` + - `search-decoded-records` + - `search-json-parse-time-ms` + - `search-segment-payload-bytes-fetched` + - `search-sort-time-ms` + - `search-peak-hits-held` - `search-index-families-used` Recommended UI treatment on timeout: @@ -345,7 +351,8 @@ supported source of truth: - the local lexicon cache under `${DS_ROOT}/cache/lexicon` - the local bundled-companion cache under `${DS_ROOT}/cache/companions` - `storage.companion_families` - Bundled companion byte breakdown for `col`, `fts`, `agg`, and `mblk`. + Bundled companion byte breakdown for `exact`, `col`, `fts`, `agg`, and + `mblk`. - `index_status.routing_key_index`, `index_status.exact_indexes[*]`, and `index_status.search_families[*]` Per-family progress, lag, and bytes-at-rest for index surfaces. @@ -421,8 +428,9 @@ scan uncovered published ranges or the WAL tail to preserve correctness. 1. Call `GET /v1/stream/{name}/_details`. 2. If `schema.search` is absent, hide the advanced filter/search UI. 3. Build search controls from `schema.search.fields`. -4. Choose the default chronological sort from `primaryTimestampField`, with - `offset` as the tie-breaker. +4. For filter-only event-list queries, use append-order sorting + (`["offset:desc"]` for newest first). Use `primaryTimestampField` plus + `offset` only when the UI explicitly needs event-time ordering. 5. Issue `POST /v1/stream/{name}/_search` for the event list. 6. Use `next_search_after` for infinite scroll. 7. Use `index_status` to show indexing progress or freshness indicators. @@ -432,7 +440,7 @@ scan uncovered published ranges or the WAL tail to preserve correctness. For a filtered, chronologically ordered, infinitely scrolling event list: - use `/_search` -- sort by the primary timestamp field plus `offset` +- sort by `offset` for the efficient append-order path - paginate with `search_after` - inspect `/_details` to determine whether search is available and which query controls to render diff --git a/experiments/demo/compute/verify.ts b/experiments/demo/compute/verify.ts new file mode 100644 index 0000000..04e0025 --- /dev/null +++ b/experiments/demo/compute/verify.ts @@ -0,0 +1,183 @@ +/** + * Demo: append a large binary stream and wait until all sealed segments are + * uploaded. The payload is deliberately mixed-entropy so DS_SEGMENT_MAX_BYTES + * remains the active seal threshold without needing a tiny DS_SEGMENT_TARGET_ROWS. + * + * Usage: + * bun run experiments/demo/compute/verify.ts --url https://service.example + * bun run experiments/demo/compute/verify.ts --url https://service.example --total-bytes 134217728 + * bun run experiments/demo/compute/verify.ts --url https://service.example --stream compute-demo-fixed + */ + +import { randomUUID } from "node:crypto"; +import { DEFAULT_BASE_URL, parseIntArg, parseStringArg, sleep } from "../common"; +import { buildComputeVerifyPayload } from "./verify_payload"; +import { streamHash16Hex } from "../../../src/util/stream_paths"; +import { dsError } from "../../../src/util/ds_error.ts"; + +const ARGS = process.argv.slice(2); +const baseUrl = parseStringArg(ARGS, "--url", DEFAULT_BASE_URL).replace(/\/+$/, ""); +const streamPrefix = parseStringArg(ARGS, "--stream-prefix", "compute-demo"); +const explicitStream = parseStringArg(ARGS, "--stream", ""); +const totalBytes = parseIntArg(ARGS, "--total-bytes", 1024 * 1024 * 1024); +const chunkBytes = parseIntArg(ARGS, "--chunk-bytes", 1024 * 1024); +const retryMax = parseIntArg(ARGS, "--retry-max", 20); +const retryBaseMs = parseIntArg(ARGS, "--retry-base-ms", 250); +const pollMs = parseIntArg(ARGS, "--poll-ms", 5000); + +if (chunkBytes <= 0) throw dsError("--chunk-bytes must be > 0"); +if (totalBytes <= 0) throw dsError("--total-bytes must be > 0"); +if (totalBytes % chunkBytes !== 0) throw dsError("--total-bytes must be an exact multiple of --chunk-bytes"); + +const totalChunks = totalBytes / chunkBytes; +const stream = + explicitStream.trim() !== "" + ? explicitStream.trim() + : `${streamPrefix}-${new Date().toISOString().replace(/[-:.TZ]/g, "").slice(0, 12)}`; +const producerId = `producer-${randomUUID()}`; + +async function request(path: string, init: RequestInit): Promise<{ status: number; text: string }> { + const res = await fetch(`${baseUrl}${path}`, init); + return { status: res.status, text: await res.text() }; +} + +async function ensureStream(): Promise { + const res = await request(`/v1/stream/${encodeURIComponent(stream)}`, { + method: "PUT", + headers: { "content-type": "application/octet-stream" }, + }); + if (res.status !== 200 && res.status !== 201) throw dsError(`create failed: ${res.status} ${res.text}`); +} + +async function appendChunk(seq: number): Promise { + const payload = buildComputeVerifyPayload(chunkBytes, seq); + for (let attempt = 1; attempt <= retryMax; attempt++) { + const res = await request(`/v1/stream/${encodeURIComponent(stream)}`, { + method: "POST", + headers: { + "content-type": "application/octet-stream", + "producer-id": producerId, + "producer-epoch": "0", + "producer-seq": String(seq), + }, + body: payload, + }); + if (res.status === 200 || res.status === 204) return; + if (![408, 429, 500, 502, 503, 504].includes(res.status) || attempt === retryMax) { + throw dsError(`append seq=${seq} failed: ${res.status} ${res.text}`); + } + const waitMs = Math.min(5000, retryBaseMs * attempt); + console.log(`retry seq=${seq} attempt=${attempt} status=${res.status} wait_ms=${waitMs}`); + await sleep(waitMs); + } +} + +async function closeStream(): Promise { + const res = await request(`/v1/stream/${encodeURIComponent(stream)}`, { + method: "POST", + headers: { "stream-closed": "true" }, + }); + if (res.status !== 200 && res.status !== 204) throw dsError(`close failed: ${res.status} ${res.text}`); +} + +type Details = { + stream?: { + next_offset: string; + sealed_through: string; + uploaded_through: string; + segment_count: number; + uploaded_segment_count: number; + pending_rows: string; + pending_bytes: string; + total_size_bytes: string; + closed: boolean; + }; + storage?: { + object_storage?: { + total_bytes: string; + segments_bytes: string; + indexes_bytes: string; + manifest_and_meta_bytes: string; + manifest_bytes: string; + schema_registry_bytes: string; + segment_object_count: number; + routing_index_object_count: number; + routing_lexicon_object_count: number; + exact_index_object_count: number; + bundled_companion_object_count: number; + }; + }; + object_store_requests?: unknown; +}; + +async function fetchDetails(): Promise
{ + const res = await request(`/v1/stream/${encodeURIComponent(stream)}/_details`, { method: "GET" }); + if (res.status !== 200) throw dsError(`details failed: ${res.status} ${res.text}`); + return JSON.parse(res.text); +} + +async function waitForUploaded(): Promise
{ + for (;;) { + const details = await fetchDetails(); + const row = details.stream; + if (!row) throw dsError("details response missing stream"); + const nextOffset = Number(row.next_offset); + const uploadedThrough = Number(row.uploaded_through); + if ( + nextOffset === totalChunks && + uploadedThrough === totalChunks - 1 && + row.pending_rows === "0" && + row.pending_bytes === "0" + ) { + return details; + } + console.log( + "wait_upload", + JSON.stringify({ + next_offset: row.next_offset, + sealed_through: row.sealed_through, + uploaded_through: row.uploaded_through, + pending_rows: row.pending_rows, + pending_bytes: row.pending_bytes, + }) + ); + await sleep(pollMs); + } +} + +async function main(): Promise { + await ensureStream(); + console.log(`created ${stream}`); + const startMs = Date.now(); + for (let seq = 0; seq < totalChunks; seq++) { + await appendChunk(seq); + if ((seq + 1) % 64 === 0 || seq + 1 === totalChunks) { + const mib = ((seq + 1) * chunkBytes) / (1024 * 1024); + const rate = mib / ((Date.now() - startMs) / 1000); + console.log(`chunks=${seq + 1}/${totalChunks} mib=${mib} rate_mib_s=${rate.toFixed(2)}`); + } + } + await closeStream(); + console.log("closed stream"); + const details = await waitForUploaded(); + console.log( + "upload_complete", + JSON.stringify({ + stream, + streamHash: streamHash16Hex(stream), + producerId, + next_offset: details.stream?.next_offset, + sealed_through: details.stream?.sealed_through, + uploaded_through: details.stream?.uploaded_through, + segment_count: details.stream?.segment_count, + uploaded_segment_count: details.stream?.uploaded_segment_count, + pending_rows: details.stream?.pending_rows, + pending_bytes: details.stream?.pending_bytes, + total_size_bytes: details.stream?.total_size_bytes, + storage: details.storage?.object_storage, + object_store_requests: details.object_store_requests, + }) + ); +} + +await main(); diff --git a/experiments/demo/compute/verify_payload.ts b/experiments/demo/compute/verify_payload.ts new file mode 100644 index 0000000..221500e --- /dev/null +++ b/experiments/demo/compute/verify_payload.ts @@ -0,0 +1,39 @@ +const PATTERN = new TextEncoder().encode("prisma-streams-compute-demo:"); + +function mix32(seed: number): number { + let x = seed | 0; + x ^= x >>> 16; + x = Math.imul(x, 0x7feb352d); + x ^= x >>> 15; + x = Math.imul(x, 0x846ca68b); + x ^= x >>> 16; + return x >>> 0; +} + +function fillPattern(view: Uint8Array, seq: number): void { + const seqTag = `${seq.toString(16).padStart(8, "0")}:`; + const tagBytes = new TextEncoder().encode(seqTag); + let off = 0; + while (off < view.byteLength) { + for (let i = 0; i < PATTERN.byteLength && off < view.byteLength; i++) view[off++] = PATTERN[i]!; + for (let i = 0; i < tagBytes.byteLength && off < view.byteLength; i++) view[off++] = tagBytes[i]!; + } +} + +function fillDeterministicNoise(view: Uint8Array, seq: number): void { + let state = mix32(seq + 1); + for (let i = 0; i < view.byteLength; i++) { + state ^= state << 13; + state ^= state >>> 17; + state ^= state << 5; + view[i] = state & 0xff; + } +} + +export function buildComputeVerifyPayload(sizeBytes: number, seq: number): Uint8Array { + const out = new Uint8Array(sizeBytes); + const split = Math.floor(sizeBytes / 2); + fillPattern(out.subarray(0, split), seq); + fillDeterministicNoise(out.subarray(split), seq); + return out; +} diff --git a/package.json b/package.json index 4aee11f..e31e154 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,8 @@ "build": "bun run build:npm-packages", "build:local:node": "node scripts/build-local-node.mjs", "build:npm-packages": "node scripts/build-npm-packages.mjs", + "build:compute-bundle": "bun run scripts/compute/build-bundle.mjs", + "build:compute-demo-bundle": "bun run scripts/compute/build-demo-bundle.mjs", "prepack": "bun run build:npm-packages", "start": "bun run src/server.ts", "start:full": "bun run src/server.ts", @@ -59,7 +61,8 @@ "demo:ingest": "bun run experiments/demo/wal_demo_ingest.ts", "demo:subscribe": "bun run experiments/demo/wal_demo_subscribe.ts", "demo:live-fields": "bun run experiments/demo/live_fields_app.ts", - "demo:gharchive": "bun run experiments/demo/gharchive_demo.ts" + "demo:gharchive": "bun run experiments/demo/gharchive_demo.ts", + "demo:compute-verify": "bun run experiments/demo/compute/verify.ts" }, "exports": { "./local": { diff --git a/scripts/compute/build-bundle.mjs b/scripts/compute/build-bundle.mjs new file mode 100644 index 0000000..0cd3bd1 --- /dev/null +++ b/scripts/compute/build-bundle.mjs @@ -0,0 +1,65 @@ +import { execFile } from "node:child_process"; +import { access, mkdir, rm } from "node:fs/promises"; +import path from "node:path"; +import process from "node:process"; +import { fileURLToPath } from "node:url"; +import { parseArgs, promisify } from "node:util"; + +const execFileAsync = promisify(execFile); + +export const DEFAULT_COMPUTE_BUNDLE_OUTDIR = ".compute-build/bundle"; +export const COMPUTE_BUNDLE_ENTRYPOINTS = [ + "src/compute/entry.ts", + "src/segment/segmenter_worker.ts", + "src/touch/processor_worker.ts", +]; + +export async function buildComputeBundle(options = {}) { + const cwd = path.resolve(options.cwd ?? process.cwd()); + const outDir = path.resolve(cwd, options.outDir ?? DEFAULT_COMPUTE_BUNDLE_OUTDIR); + const entrypoints = COMPUTE_BUNDLE_ENTRYPOINTS.map((entrypoint) => path.resolve(cwd, entrypoint)); + + await rm(outDir, { recursive: true, force: true }); + await mkdir(outDir, { recursive: true }); + + try { + await execFileAsync( + "bun", + [ + "build", + ...entrypoints, + "--outdir", + outDir, + "--target", + "bun", + "--sourcemap=external", + ], + { cwd } + ); + } catch (error) { + const stderr = error?.stderr?.trim?.(); + const message = stderr && stderr.length > 0 ? stderr : error instanceof Error ? error.message : String(error); + throw new Error(`bun build failed:\n${message}`); + } + + await access(path.join(outDir, "compute", "entry.js")); + await access(path.join(outDir, "segment", "segmenter_worker.js")); + await access(path.join(outDir, "touch", "processor_worker.js")); + + return { bundleDir: outDir, entrypoint: "compute/entry.js" }; +} + +const isMain = process.argv[1] != null && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url); + +if (isMain) { + const { values } = parseArgs({ + args: process.argv.slice(2), + options: { + outdir: { type: "string" }, + }, + allowPositionals: false, + }); + + const { bundleDir, entrypoint } = await buildComputeBundle({ outDir: values.outdir }); + console.log(`Built Compute bundle at ${bundleDir} (entrypoint ${entrypoint})`); +} diff --git a/scripts/compute/build-demo-bundle.mjs b/scripts/compute/build-demo-bundle.mjs new file mode 100644 index 0000000..bff67b8 --- /dev/null +++ b/scripts/compute/build-demo-bundle.mjs @@ -0,0 +1,227 @@ +import { access, mkdir, readFile, rm } from "node:fs/promises"; +import { createRequire } from "node:module"; +import path from "node:path"; +import process from "node:process"; +import { fileURLToPath, pathToFileURL } from "node:url"; +import { parseArgs } from "node:util"; + +export const DEFAULT_COMPUTE_DEMO_BUNDLE_OUTDIR = ".compute-demo-build/bundle"; +export const COMPUTE_DEMO_ENTRYPOINTS = [ + "src/compute/demo_entry.ts", + "src/segment/segmenter_worker.ts", + "src/touch/processor_worker.ts", +]; + +function contentTypeForExt(ext) { + const types = { + ".css": "text/css; charset=utf-8", + ".gif": "image/gif", + ".html": "text/html; charset=utf-8", + ".jpeg": "image/jpeg", + ".jpg": "image/jpeg", + ".js": "application/javascript; charset=utf-8", + ".json": "application/json; charset=utf-8", + ".png": "image/png", + ".svg": "image/svg+xml; charset=utf-8", + ".woff": "font/woff", + ".woff2": "font/woff2", + }; + return types[ext] ?? "application/octet-stream"; +} + +function assertStudioRoot(studioRoot) { + const requiredPaths = [ + "package.json", + "demo/ppg-dev/client.tsx", + "ui/index.css", + "postcss.config.mjs", + ]; + for (const requiredPath of requiredPaths) { + const candidate = path.join(studioRoot, requiredPath); + if (!Bun.file(candidate).exists()) { + throw new Error( + `missing Studio asset source at ${candidate}; set PRISMA_STUDIO_ROOT to the Studio repository root`, + ); + } + } +} + +function resolveStudioRoot(cwd, explicitStudioRoot) { + const candidate = path.resolve( + explicitStudioRoot ?? process.env.PRISMA_STUDIO_ROOT ?? path.join(cwd, "..", "studio"), + ); + assertStudioRoot(candidate); + return candidate; +} + +async function buildStudioAssets(studioRoot) { + const studioPackageJson = path.join(studioRoot, "package.json"); + const studioPackage = await Bun.file(studioPackageJson).json(); + const clientBuild = await Bun.build({ + define: { + VERSION_INJECTED_AT_BUILD_TIME: JSON.stringify(studioPackage.version), + }, + entrypoints: [path.join(studioRoot, "demo/ppg-dev/client.tsx")], + format: "esm", + minify: true, + sourcemap: "inline", + splitting: false, + target: "browser", + write: false, + }); + + if (!clientBuild.success) { + const details = clientBuild.logs.map((log) => log.message).join("\n"); + throw new Error(`Studio client build failed:\n${details}`); + } + + const jsOutput = clientBuild.outputs.find((output) => output.path.endsWith(".js")); + if (!jsOutput) { + throw new Error("Studio client build produced no JavaScript output"); + } + + const assetEntries = []; + for (const output of clientBuild.outputs) { + if (output === jsOutput) continue; + assetEntries.push({ + base64: Buffer.from(await output.arrayBuffer()).toString("base64"), + contentType: contentTypeForExt(path.extname(output.path)), + path: `/${path.basename(output.path)}`, + }); + } + + const studioRequire = createRequire(studioPackageJson); + const postcssModuleUrl = pathToFileURL(studioRequire.resolve("postcss")).href; + const postcssConfigUrl = pathToFileURL(path.join(studioRoot, "postcss.config.mjs")).href; + const postcssModule = await import(postcssModuleUrl); + const postcss = postcssModule.default; + const postcssConfig = await import(postcssConfigUrl); + const cssEntrypoint = path.join(studioRoot, "ui/index.css"); + const cssSource = await Bun.file(cssEntrypoint).text(); + const cssResult = await postcss(postcssConfig.default.plugins).process(cssSource, { + from: cssEntrypoint, + }); + const sharedFontDir = path.resolve( + studioRoot, + "../web/packages/eclipse/dist/static/fonts", + ); + const sharedFontNames = [ + "MonaSansMonoVF[wght].woff2", + "MonaSansVF[wdth,wght,opsz,ital].woff2", + ]; + + for (const fontName of sharedFontNames) { + const absolutePath = path.join(sharedFontDir, fontName); + try { + const bytes = await readFile(absolutePath); + assetEntries.push({ + base64: bytes.toString("base64"), + contentType: "font/woff2", + path: `/web/packages/eclipse/dist/static/fonts/${fontName}`, + }); + } catch { + // Keep the bundle build working when the sibling web repo is absent. + } + } + + return { + appScript: await jsOutput.text(), + appStyles: cssResult.css, + assetEntries, + }; +} + +function generateAssetsModule(studioAssets) { + const mapEntries = []; + + for (const asset of studioAssets.assetEntries) { + const entrySource = `{ bytes: new Uint8Array(Buffer.from(${JSON.stringify(asset.base64)}, "base64")), contentType: ${JSON.stringify(asset.contentType)} }`; + mapEntries.push(` [${JSON.stringify(asset.path)}, ${entrySource}]`); + mapEntries.push( + ` [${JSON.stringify(`/studio${asset.path}`)}, ${entrySource}]`, + ); + } + + return [ + `export const appScript = ${JSON.stringify(studioAssets.appScript)};`, + `export const appStyles = ${JSON.stringify(studioAssets.appStyles)};`, + `export const builtAssets = new Map([\n${mapEntries.join(",\n")}\n]);`, + ].join("\n"); +} + +export async function buildComputeDemoBundle(options = {}) { + const cwd = path.resolve(options.cwd ?? process.cwd()); + const studioRoot = resolveStudioRoot(cwd, options.studioRoot); + const outDir = path.resolve( + cwd, + options.outDir ?? DEFAULT_COMPUTE_DEMO_BUNDLE_OUTDIR, + ); + const studioAssets = await buildStudioAssets(studioRoot); + const assetsModuleSource = generateAssetsModule(studioAssets); + const entrypoints = COMPUTE_DEMO_ENTRYPOINTS.map((entrypoint) => + path.resolve(cwd, entrypoint), + ); + + await rm(outDir, { force: true, recursive: true }); + await mkdir(outDir, { recursive: true }); + + const buildResult = await Bun.build({ + entrypoints, + outdir: outDir, + plugins: [ + { + name: "prebuilt-studio-assets", + setup(build) { + build.onResolve({ filter: /^virtual:prebuilt-studio-assets$/ }, () => ({ + namespace: "prebuilt-studio-assets", + path: "virtual:prebuilt-studio-assets", + })); + build.onLoad({ filter: /.*/, namespace: "prebuilt-studio-assets" }, () => ({ + contents: assetsModuleSource, + loader: "js", + })); + }, + }, + ], + sourcemap: "external", + target: "bun", + }); + + if (!buildResult.success) { + const details = buildResult.logs.map((log) => log.message).join("\n"); + throw new Error(`Compute demo bundle build failed:\n${details}`); + } + + await access(path.join(outDir, "compute", "demo_entry.js")); + await access(path.join(outDir, "segment", "segmenter_worker.js")); + await access(path.join(outDir, "touch", "processor_worker.js")); + + return { + bundleDir: outDir, + entrypoint: "compute/demo_entry.js", + studioRoot, + }; +} + +const isMain = + process.argv[1] != null && + path.resolve(process.argv[1]) === fileURLToPath(import.meta.url); + +if (isMain) { + const { values } = parseArgs({ + args: process.argv.slice(2), + allowPositionals: false, + options: { + outdir: { type: "string" }, + "studio-root": { type: "string" }, + }, + }); + + const result = await buildComputeDemoBundle({ + outDir: values.outdir, + studioRoot: values["studio-root"], + }); + console.log( + `Built Compute demo bundle at ${result.bundleDir} (entrypoint ${result.entrypoint}, studio root ${result.studioRoot})`, + ); +} diff --git a/src/app.ts b/src/app.ts index 73f8a1c..2041ee0 100644 --- a/src/app.ts +++ b/src/app.ts @@ -70,6 +70,10 @@ class CombinedIndexController implements StreamIndexLookup { return this.companionIndex.getColSegmentCompanion(stream, segmentIndex); } + getExactSegmentCompanion(stream: string, segmentIndex: number) { + return this.companionIndex.getExactSegmentCompanion(stream, segmentIndex); + } + getFtsSegmentCompanion(stream: string, segmentIndex: number) { return this.companionIndex.getFtsSegmentCompanion(stream, segmentIndex); } @@ -101,7 +105,7 @@ export function createApp(cfg: Config, os?: ObjectStore, opts: CreateAppOptions stats: opts.stats, createRuntime: ({ config, db, ingest, registry, notifier, stats, backpressure, metrics, memorySampler, memory, asyncIndexGate, foregroundActivity }) => { const rawStore = os ?? new MockR2Store(); - const store = new AccountingObjectStore(rawStore, db); + const store = new AccountingObjectStore(rawStore, db, metrics); const segmenterHooks: SegmenterHooks = { onSegmentSealed: (stream, payloadBytes, segmentBytes) => { if (stats) stats.recordSegmentSealed(payloadBytes, segmentBytes); diff --git a/src/app_core.ts b/src/app_core.ts index 46c7f5a..36c1604 100644 --- a/src/app_core.ts +++ b/src/app_core.ts @@ -132,6 +132,12 @@ function searchResponseHeaders(search: SearchResultBatch): HeadersInit { "search-scanned-tail-docs": String(search.coverage.scannedTailDocs), "search-scanned-tail-time-ms": String(search.coverage.scannedTailTimeMs), "search-exact-candidate-time-ms": String(search.coverage.exactCandidateTimeMs), + "search-candidate-doc-ids": String(search.coverage.candidateDocIds), + "search-decoded-records": String(search.coverage.decodedRecords), + "search-json-parse-time-ms": String(search.coverage.jsonParseTimeMs), + "search-segment-payload-bytes-fetched": String(search.coverage.segmentPayloadBytesFetched), + "search-sort-time-ms": String(search.coverage.sortTimeMs), + "search-peak-hits-held": String(search.coverage.peakHitsHeld), "search-index-families-used": search.coverage.indexFamiliesUsed.join(","), }; } @@ -311,9 +317,12 @@ function bodyBufferFromBytes(bytes: Uint8Array): ArrayBuffer { return buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength) as ArrayBuffer; } +const JSON_TEXT_DECODER = new TextDecoder(); +const JSON_TEXT_ENCODER = new TextEncoder(); + function keyBytesFromString(s: string | null): Uint8Array | null { if (s == null) return null; - return new TextEncoder().encode(s); + return JSON_TEXT_ENCODER.encode(s); } function extractRoutingKey(reg: SchemaRegistry, value: any): Result { @@ -351,9 +360,14 @@ function configuredExactIndexes(search: SearchConfig | undefined): Array<{ name: .sort((a, b) => a.name.localeCompare(b.name)); } -function configuredSearchFamilies(search: SearchConfig | undefined): Array<{ family: "col" | "fts" | "agg" | "mblk"; fields: string[] }> { +function configuredSearchFamilies(search: SearchConfig | undefined): Array<{ family: "exact" | "col" | "fts" | "agg" | "mblk"; fields: string[] }> { if (!search) return []; - const out: Array<{ family: "col" | "fts" | "agg" | "mblk"; fields: string[] }> = []; + const out: Array<{ family: "exact" | "col" | "fts" | "agg" | "mblk"; fields: string[] }> = []; + const exactFields = Object.entries(search.fields) + .filter(([, field]) => field.exact === true && field.kind !== "text") + .map(([name]) => name) + .sort((a, b) => a.localeCompare(b)); + if (exactFields.length > 0) out.push({ family: "exact", fields: exactFields }); const colFields = Object.entries(search.fields) .filter(([, field]) => field.column === true) .map(([name]) => name) @@ -524,6 +538,45 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { heapSnapshotPath: cfg.heapSnapshotPath ?? undefined, }); memory.start(); + let httpAppendGcBytesSinceLast = 0; + let httpAppendGcLastMs = 0; + const maybeCollectAfterHttpAppend = (bodyBytes: number): void => { + if (cfg.memoryLimitBytes <= 0 || bodyBytes <= 0) return; + const limit = cfg.memoryLimitBytes; + httpAppendGcBytesSinceLast += Math.max(0, Math.floor(bodyBytes)); + const usage = process.memoryUsage(); + const smallMemoryPreset = limit <= 1024 * 1024 * 1024; + const byteCadence = smallMemoryPreset ? 8 * 1024 * 1024 : 64 * 1024 * 1024; + const abovePressureBand = + usage.rss > limit * 0.55 || + usage.external > limit * 0.2 || + usage.arrayBuffers > limit * 0.15; + if (!abovePressureBand && httpAppendGcBytesSinceLast < byteCadence) return; + const now = Date.now(); + if (now - httpAppendGcLastMs < 1_000) return; + const gc = (globalThis as { Bun?: { gc?: (force?: boolean) => void } }).Bun?.gc; + if (typeof gc !== "function") return; + httpAppendGcLastMs = now; + httpAppendGcBytesSinceLast = 0; + try { + gc(true); + } catch { + try { + gc(); + } catch { + return; + } + } + }; + const appendResponseHeaders = (headers: HeadersInit = {}): HeadersInit => { + if (cfg.memoryLimitBytes > 0 && cfg.memoryLimitBytes <= 1024 * 1024 * 1024) { + return withNosniff({ + ...headers, + connection: "close", + }); + } + return withNosniff(headers); + }; const ingest = new IngestQueue(cfg, db, stats, backpressure, metrics); const notifier = new StreamNotifier(); const registry = new SchemaRegistryStore(db); @@ -1068,7 +1121,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { } const reg = regRes.value; const jsonIngest = resolveJsonIngestCapability(profileRes.value); - const text = new TextDecoder().decode(bodyBytes); + const text = JSON_TEXT_DECODER.decode(bodyBytes); let arr: any; try { arr = JSON.parse(text); @@ -1107,7 +1160,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { rows.push({ routingKey: rkRes.value, contentType: "application/json", - payload: new TextEncoder().encode(JSON.stringify(value)), + payload: JSON_TEXT_ENCODER.encode(JSON.stringify(value)), }); } return Result.ok({ rows }); @@ -1331,6 +1384,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { sqlite_shared_total_bytes: sqliteSharedBytes.toString(), }, companion_families: { + exact_bytes: String(familyBytes.get("exact") ?? 0n), col_bytes: String(familyBytes.get("col") ?? 0n), fts_bytes: String(familyBytes.get("fts") ?? 0n), agg_bytes: String(familyBytes.get("agg") ?? 0n), @@ -1866,7 +1920,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { if (regRes.value.routingKey == null) return badRequest("routing key not configured"); const limitRaw = url.searchParams.get("limit"); const limit = limitRaw == null ? 100 : Number(limitRaw); - if (!Number.isFinite(limit) || limit <= 0 || !Number.isInteger(limit)) return badRequest("invalid limit"); + if (!Number.isFinite(limit) || limit <= 0 || !Number.isInteger(limit) || limit > 500) return badRequest("invalid limit"); const after = url.searchParams.get("after"); const listRes = indexer?.listRoutingKeysResult ? await runForeground(() => indexer.listRoutingKeysResult!(stream, after, limit)) @@ -1950,6 +2004,12 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { scanned_tail_docs: searchRes.value.coverage.scannedTailDocs, scanned_tail_time_ms: searchRes.value.coverage.scannedTailTimeMs, exact_candidate_time_ms: searchRes.value.coverage.exactCandidateTimeMs, + candidate_doc_ids: searchRes.value.coverage.candidateDocIds, + decoded_records: searchRes.value.coverage.decodedRecords, + json_parse_time_ms: searchRes.value.coverage.jsonParseTimeMs, + segment_payload_bytes_fetched: searchRes.value.coverage.segmentPayloadBytesFetched, + sort_time_ms: searchRes.value.coverage.sortTimeMs, + peak_hits_held: searchRes.value.coverage.peakHitsHeld, index_families_used: searchRes.value.coverage.indexFamiliesUsed, }, total: searchRes.value.total, @@ -2106,7 +2166,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { }; if (existingClosed) headers["stream-closed"] = "true"; if (srow.expires_at_ms != null) headers["stream-expires-at"] = new Date(Number(srow.expires_at_ms)).toISOString(); - return new Response(null, { status: 200, headers: withNosniff(headers) }); + return new Response(null, { status: 200, headers: appendResponseHeaders(headers) }); } db.ensureStream(stream, { contentType, expiresAtMs, ttlSeconds, closed: false }); @@ -2177,7 +2237,7 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { }; if (streamClosed || closedNow) headers["stream-closed"] = "true"; if (createdRow.expires_at_ms != null) headers["stream-expires-at"] = new Date(Number(createdRow.expires_at_ms)).toISOString(); - return new Response(null, { status: 201, headers: withNosniff(headers) }); + return new Response(null, { status: 201, headers: appendResponseHeaders(headers) }); }); } finally { leaveAppendPhase?.(); @@ -2253,11 +2313,13 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { stream, stream_content_type: streamContentType, }); + let appendBodyBytesForGc = 0; try { - return await runWithGate(ingestGate, async () => { + const response = await runWithGate(ingestGate, async () => { const ab = await req.arrayBuffer(); if (ab.byteLength > cfg.appendMaxBodyBytes) return tooLarge(`body too large (max ${cfg.appendMaxBodyBytes})`); const bodyBytes = new Uint8Array(ab); + appendBodyBytesForGc = bodyBytes.byteLength; const isCloseOnly = streamClosed && bodyBytes.byteLength === 0; if (bodyBytes.byteLength === 0 && !streamClosed) return badRequest("empty body"); @@ -2305,18 +2367,18 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { "stream-next-offset": encodeOffset(srow.epoch, err.lastOffset), "stream-closed": "true", }; - return new Response(null, { status: 409, headers: withNosniff(headers) }); + return new Response(null, { status: 409, headers: appendResponseHeaders(headers) }); } if (err.kind === "producer_stale_epoch") { return new Response(null, { status: 403, - headers: withNosniff({ "producer-epoch": String(err.producerEpoch) }), + headers: appendResponseHeaders({ "producer-epoch": String(err.producerEpoch) }), }); } if (err.kind === "producer_gap") { return new Response(null, { status: 409, - headers: withNosniff({ + headers: appendResponseHeaders({ "producer-expected-seq": String(err.expected), "producer-received-seq": String(err.received), }), @@ -2348,8 +2410,10 @@ export function createAppCore(cfg: Config, opts: CreateAppCoreOptions): App { } const status = producer && res.appendedRows > 0 ? 200 : 204; - return new Response(null, { status, headers: withNosniff(headers) }); + return new Response(null, { status, headers: appendResponseHeaders(headers) }); }); + maybeCollectAfterHttpAppend(appendBodyBytesForGc); + return response; } finally { leaveAppendPhase?.(); } diff --git a/src/app_local.ts b/src/app_local.ts index 3b3d855..8b7927c 100644 --- a/src/app_local.ts +++ b/src/app_local.ts @@ -53,6 +53,10 @@ class LocalIndexLookup implements StreamIndexLookup { return null; } + async getExactSegmentCompanion(_stream: string, _segmentIndex: number): Promise { + return null; + } + async getFtsSegmentCompanion(_stream: string, _segmentIndex: number): Promise { return null; } diff --git a/src/auto_tune.ts b/src/auto_tune.ts index 705a944..3f2f596 100644 --- a/src/auto_tune.ts +++ b/src/auto_tune.ts @@ -1,6 +1,8 @@ export type AutoTuneConfig = { segmentMaxMiB: number; segmentTargetRows: number; + segmentCacheMb: number; + indexCheckMs: number; sqliteCacheMb: number; workerSqliteCacheMb: number; indexMemMb: number; @@ -29,12 +31,17 @@ export function memoryLimitForPreset(preset: number): number { export function tuneForPreset(p: number): AutoTuneConfig { return { - // Segment geometry is fixed across presets. Smaller hosts still scale - // concurrency and cache budgets down, but they keep the same 16 MiB / - // 100k-row seal thresholds so upload throughput is not dominated by many - // tiny compressed segment objects. - segmentMaxMiB: 16, - segmentTargetRows: 100_000, + // <=1 GiB hosts need smaller cut units because segment build/compression + // can transiently hold several encoded copies of the candidate rows. + segmentMaxMiB: p <= 1024 ? 8 : 16, + segmentTargetRows: p <= 1024 ? 50_000 : 100_000, + // The 1 GiB Compute host only has about 685 MiB of usable RSS after the + // platform clamp, so it cannot afford a persistent 256 MiB local segment + // cache on top of active ingest and background reads. + segmentCacheMb: p >= 2048 ? 256 : 0, + // Small hosts defer background sweeps so routing/exact backfill does not + // immediately start re-reading uploaded history during a large ingest burst. + indexCheckMs: p >= 2048 ? 1_000 : 3_600_000, sqliteCacheMb: Math.max(8, Math.floor(p / 16)), workerSqliteCacheMb: Math.max(8, Math.min(32, Math.floor(p / 128))), indexMemMb: Math.max(4, Math.floor(p / 64)), @@ -54,8 +61,8 @@ export function tuneForPreset(p: number): AutoTuneConfig { // to overlap aggressively under the GH Archive "all" workload. indexBuildConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1, indexCompactConcurrency: p >= 8192 ? 4 : p >= 4096 ? 2 : 1, - segmenterWorkers: p >= 8192 ? 4 : p >= 4096 ? 2 : 1, - uploadConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 1024 ? 2 : 1, + segmenterWorkers: p >= 8192 ? 4 : p >= 4096 ? 2 : p >= 2048 ? 1 : 0, + uploadConcurrency: p >= 8192 ? 8 : p >= 4096 ? 4 : p >= 2048 ? 2 : 1, searchCompanionBatchSegments: p >= 8192 ? 4 : p >= 4096 ? 2 : 1, searchCompanionYieldBlocks: p >= 8192 ? 4 : p >= 4096 ? 2 : 1, }; diff --git a/src/compute/demo_entry.ts b/src/compute/demo_entry.ts new file mode 100644 index 0000000..fd767a0 --- /dev/null +++ b/src/compute/demo_entry.ts @@ -0,0 +1,406 @@ +import { bootstrapFromR2 } from "../bootstrap"; +import { createApp, type App } from "../app"; +import { loadConfig } from "../config"; +import type { AppendRow } from "../ingest"; +import { MockR2Store } from "../objectstore/mock_r2"; +import { R2ObjectStore } from "../objectstore/r2"; +import { resolveJsonIngestCapability } from "../profiles"; +import type { SchemaRegistry } from "../schema/registry"; +import { dsError } from "../util/ds_error.ts"; +import { resolvePointerResult } from "../util/json_pointer"; +import { initConsoleLogging } from "../util/log"; +import { ensureComputeArgv } from "./entry"; +import { createComputeDemoSite, type PrebuiltStudioAssets } from "./demo_site"; +import { applyAutoTune, AutoTuneApplyError, parseAutoTuneArg } from "../server_auto_tune"; +import { Result } from "better-result"; + +initConsoleLogging(); + +export type StreamsFetchTarget = { + appendGenerateBatch?: (stream: string, events: Array>) => Promise; + beginGenerateJob?: (stream: string) => void; + endGenerateJob?: (stream: string) => void; + ensureGenerateStream?: (stream: string) => Promise; + fetch(request: Request): Promise; +}; + +const EXTERNAL_STREAMS_URL_ENVS = [ + "COMPUTE_DEMO_STREAMS_SERVER_URL", + "STREAMS_SERVER_URL", +] as const; + +function fallbackStudioAssets(): PrebuiltStudioAssets { + const message = + "Studio assets were not bundled. Build this entrypoint with bun run build:compute-demo-bundle."; + + return { + appScript: `const root = document.getElementById("root"); if (root) root.innerHTML = "
${message}
";`, + appStyles: + "html,body{margin:0;background:#08111b;color:#e9f3fb;font-family:ui-sans-serif,system-ui,sans-serif;}", + builtAssets: new Map(), + }; +} + +async function loadStudioAssets(): Promise { + try { + return (await import("virtual:prebuilt-studio-assets")) as PrebuiltStudioAssets; + } catch { + return fallbackStudioAssets(); + } +} + +function loadIdleTimeoutSeconds(): number { + const raw = process.env.DS_HTTP_IDLE_TIMEOUT_SECONDS; + if (raw == null || raw.trim() === "") return 180; + const value = Number(raw); + if (!Number.isFinite(value) || value <= 0) { + console.error(`invalid DS_HTTP_IDLE_TIMEOUT_SECONDS: ${raw}`); + process.exit(1); + } + return value; +} + +function normalizeExternalStreamsServerUrl(value: string): string { + const trimmed = value.trim(); + if (trimmed === "") { + throw dsError("external Streams server URL must not be empty"); + } + const withScheme = /^[a-z][a-z0-9+.-]*:\/\//i.test(trimmed) + ? trimmed + : `https://${trimmed}`; + return withScheme.endsWith("/") ? withScheme.slice(0, -1) : withScheme; +} + +export function resolveExternalStreamsServerUrl( + env: NodeJS.ProcessEnv = process.env, +): string | null { + for (const name of EXTERNAL_STREAMS_URL_ENVS) { + const raw = env[name]; + if (raw == null || raw.trim() === "") continue; + return normalizeExternalStreamsServerUrl(raw); + } + return null; +} + +export function createExternalStreamsTarget(baseUrl: string): StreamsFetchTarget { + const normalizedBaseUrl = normalizeExternalStreamsServerUrl(baseUrl); + + return { + async fetch(request: Request): Promise { + const requestUrl = new URL(request.url); + const upstreamUrl = new URL( + `${requestUrl.pathname}${requestUrl.search}`, + `${normalizedBaseUrl}/`, + ); + const headers = new Headers(request.headers); + headers.delete("host"); + const body = + request.method === "GET" || request.method === "HEAD" + ? undefined + : await request.arrayBuffer(); + + const response = await fetch(upstreamUrl, { + body, + headers, + method: request.method, + redirect: "manual", + signal: request.signal, + }); + + return new Response(response.body, { + headers: response.headers, + status: response.status, + statusText: response.statusText, + }); + }, + }; +} + +export function applyColocatedComputeDemoArgv( + argv: string[], + env: NodeJS.ProcessEnv = process.env, + opts: { log?: (message: string) => void } = {}, +): string[] { + const next = ensureComputeArgv(argv, env); + const args = next.slice(2); + const autoTune = parseAutoTuneArg(args); + if (autoTune.enabled) { + applyAutoTune(autoTune.valueMb, { env, log: opts.log }); + } + return next; +} + +const DIRECT_APPEND_TEXT_ENCODER = new TextEncoder(); + +function keyBytesFromString(value: string | null): Uint8Array | null { + return value == null ? null : DIRECT_APPEND_TEXT_ENCODER.encode(value); +} + +function extractRoutingKey(reg: SchemaRegistry, value: unknown): Uint8Array | null { + if (!reg.routingKey) return null; + const resolvedRes = resolvePointerResult(value, reg.routingKey.jsonPointer); + if (Result.isError(resolvedRes)) { + throw dsError(resolvedRes.error.message); + } + const resolved = resolvedRes.value; + if (!resolved.exists) { + if (reg.routingKey.required) throw dsError("routing key missing"); + return null; + } + if (typeof resolved.value !== "string") throw dsError("routing key must be string"); + return keyBytesFromString(resolved.value); +} + +function appendErrorMessage(kind: string): string { + if (kind === "not_found") return "stream not found"; + if (kind === "gone") return "stream expired"; + if (kind === "content_type_mismatch") return "content-type mismatch"; + if (kind === "overloaded") return "ingest queue full"; + if (kind === "closed") return "stream is closed"; + return "append failed"; +} + +function createColocatedStreamsTarget(streamsApp: App): StreamsFetchTarget { + let activeGenerateJobs = 0; + const generateStreamsToEnqueue = new Set(); + + return { + appendGenerateBatch: async (stream, events) => { + const { db, ingest, metrics, notifier, profiles, registry, stats, touch } = streamsApp.deps; + const streamRow = db.getStream(stream); + if (!streamRow || db.isDeleted(streamRow)) throw dsError("stream not found"); + + const regRes = registry.getRegistryResult(stream); + if (Result.isError(regRes)) throw dsError(regRes.error.message); + const profileRes = profiles.getProfileResult(stream, streamRow); + if (Result.isError(profileRes)) throw dsError(profileRes.error.message); + const jsonIngest = resolveJsonIngestCapability(profileRes.value); + const reg = regRes.value; + const validator = reg.currentVersion > 0 ? registry.getValidatorForVersion(reg, reg.currentVersion) : null; + if (reg.currentVersion > 0 && !validator) throw dsError("schema validator missing"); + + const rows: AppendRow[] = []; + let encodedBytes = 0; + for (const event of events) { + let value: unknown = event; + let profileRoutingKey: Uint8Array | null = null; + if (jsonIngest) { + const preparedRes = jsonIngest.prepareRecordResult({ stream, profile: profileRes.value, value: event }); + if (Result.isError(preparedRes)) throw dsError(preparedRes.error.message); + value = preparedRes.value.value; + profileRoutingKey = keyBytesFromString(preparedRes.value.routingKey); + } + if (validator && !validator(value)) { + const message = validator.errors ? validator.errors.map((error) => error.message).join("; ") : "schema validation failed"; + throw dsError(message); + } + const payload = DIRECT_APPEND_TEXT_ENCODER.encode(JSON.stringify(value)); + encodedBytes += payload.byteLength; + rows.push({ + routingKey: reg.routingKey ? extractRoutingKey(reg, value) : profileRoutingKey, + contentType: "application/json", + payload, + }); + } + + const appendRes = await ingest.append({ + stream, + baseAppendMs: db.nowMs(), + rows, + contentType: "application/json", + }); + if (Result.isError(appendRes)) { + throw dsError(appendErrorMessage(appendRes.error.kind)); + } + if (appendRes.value.appendedRows > 0) { + metrics.recordAppend(encodedBytes, appendRes.value.appendedRows); + notifier.notify(stream, appendRes.value.lastOffset); + notifier.notifyDetailsChanged(stream); + touch.notify(stream); + stats?.recordStreamTouched(stream); + stats?.recordIngested(encodedBytes); + } + }, + beginGenerateJob: (stream) => { + activeGenerateJobs += 1; + generateStreamsToEnqueue.add(stream); + if (activeGenerateJobs !== 1) return; + streamsApp.deps.indexer?.stop(); + streamsApp.deps.segmenter.stop(true); + }, + endGenerateJob: (stream) => { + generateStreamsToEnqueue.add(stream); + activeGenerateJobs = Math.max(0, activeGenerateJobs - 1); + if (activeGenerateJobs !== 0) return; + streamsApp.deps.segmenter.start(); + streamsApp.deps.indexer?.start(); + for (const pendingStream of generateStreamsToEnqueue) { + streamsApp.deps.indexer?.enqueue(pendingStream); + } + generateStreamsToEnqueue.clear(); + }, + fetch: (request) => streamsApp.fetch(request), + }; +} + +async function main(): Promise { + const studioAssets = await loadStudioAssets(); + const externalStreamsServerUrl = resolveExternalStreamsServerUrl(); + let streamsTarget: StreamsFetchTarget; + let closeStreamsTarget: (() => void) | null = null; + let cfg: ReturnType; + + if (externalStreamsServerUrl) { + cfg = loadConfig(); + streamsTarget = createExternalStreamsTarget(externalStreamsServerUrl); + console.log( + `prisma-streams compute demo using external Streams server ${externalStreamsServerUrl}`, + ); + } else { + try { + process.argv = applyColocatedComputeDemoArgv(process.argv); + } catch (error) { + if (error instanceof AutoTuneApplyError) { + console.error(error.message); + process.exit(1); + } + throw error; + } + cfg = loadConfig(); + const args = process.argv.slice(2); + + const storeIdx = args.indexOf("--object-store"); + const storeChoice = storeIdx >= 0 ? args[storeIdx + 1] : null; + if (!storeChoice || (storeChoice !== "r2" && storeChoice !== "local")) { + console.error("missing or invalid --object-store (expected: r2 | local)"); + process.exit(1); + } + const bootstrapEnabled = args.includes("--bootstrap-from-r2"); + + let store; + if (storeChoice === "local") { + const memBytesRaw = process.env.DS_MOCK_R2_MAX_INMEM_BYTES; + const memMbRaw = process.env.DS_MOCK_R2_MAX_INMEM_MB; + const putDelayRaw = process.env.DS_MOCK_R2_PUT_DELAY_MS; + const getDelayRaw = process.env.DS_MOCK_R2_GET_DELAY_MS; + const headDelayRaw = process.env.DS_MOCK_R2_HEAD_DELAY_MS; + const listDelayRaw = process.env.DS_MOCK_R2_LIST_DELAY_MS; + const memBytes = memBytesRaw + ? Number(memBytesRaw) + : memMbRaw + ? Number(memMbRaw) * 1024 * 1024 + : null; + const putDelayMs = putDelayRaw ? Number(putDelayRaw) : 0; + const getDelayMs = getDelayRaw ? Number(getDelayRaw) : 0; + const headDelayMs = headDelayRaw ? Number(headDelayRaw) : 0; + const listDelayMs = listDelayRaw ? Number(listDelayRaw) : 0; + if (memBytesRaw && !Number.isFinite(memBytes)) { + console.error(`invalid DS_MOCK_R2_MAX_INMEM_BYTES: ${memBytesRaw}`); + process.exit(1); + } + if (memMbRaw && !Number.isFinite(Number(memMbRaw))) { + console.error(`invalid DS_MOCK_R2_MAX_INMEM_MB: ${memMbRaw}`); + process.exit(1); + } + for (const [name, value] of [ + ["DS_MOCK_R2_PUT_DELAY_MS", putDelayMs], + ["DS_MOCK_R2_GET_DELAY_MS", getDelayMs], + ["DS_MOCK_R2_HEAD_DELAY_MS", headDelayMs], + ["DS_MOCK_R2_LIST_DELAY_MS", listDelayMs], + ] as const) { + if (!Number.isFinite(value) || value < 0) { + console.error(`invalid ${name}: ${process.env[name]}`); + process.exit(1); + } + } + const spillDir = process.env.DS_MOCK_R2_SPILL_DIR; + store = new MockR2Store({ + maxInMemoryBytes: memBytes ?? undefined, + spillDir, + faults: { + putDelayMs, + getDelayMs, + headDelayMs, + listDelayMs, + }, + }); + } else { + const bucket = process.env.DURABLE_STREAMS_R2_BUCKET; + const accountId = process.env.DURABLE_STREAMS_R2_ACCOUNT_ID; + const accessKeyId = process.env.DURABLE_STREAMS_R2_ACCESS_KEY_ID; + const secretAccessKey = process.env.DURABLE_STREAMS_R2_SECRET_ACCESS_KEY; + const endpoint = process.env.DURABLE_STREAMS_R2_ENDPOINT; + const region = process.env.DURABLE_STREAMS_R2_REGION; + if (!bucket || !accountId || !accessKeyId || !secretAccessKey) { + console.error( + "missing R2 env vars: DURABLE_STREAMS_R2_BUCKET, DURABLE_STREAMS_R2_ACCOUNT_ID, DURABLE_STREAMS_R2_ACCESS_KEY_ID, DURABLE_STREAMS_R2_SECRET_ACCESS_KEY", + ); + process.exit(1); + } + store = new R2ObjectStore({ + accessKeyId, + accountId, + bucket, + secretAccessKey, + endpoint, + region, + }); + } + + if (bootstrapEnabled) { + await bootstrapFromR2(cfg, store, { clearLocal: true }); + } + + const streamsApp = createApp(cfg, store); + streamsTarget = createColocatedStreamsTarget(streamsApp); + closeStreamsTarget = () => streamsApp.close(); + } + const demoSite = createComputeDemoSite({ + studioAssets, + streamsApp: streamsTarget, + }); + + const server = Bun.serve({ + fetch: (request) => demoSite.fetch(request), + hostname: cfg.host, + idleTimeout: loadIdleTimeoutSeconds(), + port: cfg.port, + }); + + let shuttingDown = false; + const shutdown = (signal: NodeJS.Signals): void => { + if (shuttingDown) return; + shuttingDown = true; + console.log(`received ${signal}, shutting down prisma-streams compute demo`); + try { + server.stop(true); + } catch (error) { + console.error("failed to stop HTTP server cleanly", error); + } + try { + demoSite.close(); + } catch (error) { + console.error("failed to close compute demo cleanly", error); + } + if (closeStreamsTarget) { + try { + closeStreamsTarget(); + } catch (error) { + console.error("failed to close streams application cleanly", error); + process.exitCode = 1; + } + } + }; + + process.once("SIGINT", () => shutdown("SIGINT")); + process.once("SIGTERM", () => shutdown("SIGTERM")); + + const listenTarget = cfg.host.includes(":") + ? `[${cfg.host}]:${server.port}` + : `${cfg.host}:${server.port}`; + console.log(`prisma-streams compute demo listening on ${listenTarget}`); +} + +if (import.meta.main) { + await main(); +} diff --git a/src/compute/demo_site.ts b/src/compute/demo_site.ts new file mode 100644 index 0000000..91c1301 --- /dev/null +++ b/src/compute/demo_site.ts @@ -0,0 +1,1242 @@ +import { dsError } from "../util/ds_error.ts"; + +type BuiltAsset = { + bytes: Uint8Array | ArrayBuffer; + contentType: string; +}; + +export type PrebuiltStudioAssets = { + appScript: string; + appStyles: string; + builtAssets: Map; +}; + +type GenerateJobStatus = "pending" | "running" | "succeeded" | "failed"; + +type GenerateJobState = { + id: string; + stream: string; + total: number; + inserted: number; + batchSize: number; + status: GenerateJobStatus; + error: string | null; + startedAt: string; + finishedAt: string | null; +}; + +export type ComputeDemoSite = { + close(): void; + fetch(request: Request): Promise; +}; + +type GenerateStreamsTarget = { + appendGenerateBatch?: (stream: string, events: Array>) => Promise; + beginGenerateJob?: (stream: string) => void; + endGenerateJob?: (stream: string) => void; + ensureGenerateStream?: (stream: string) => Promise; + fetch(request: Request): Promise; +}; + +type CreateComputeDemoSiteOptions = { + bootId?: string; + studioAssets: PrebuiltStudioAssets; + streamsApp: GenerateStreamsTarget; +}; + +const APP_CACHE_CONTROL = "public, max-age=31536000, immutable"; +const NO_STORE_CACHE_CONTROL = "no-cache, no-store, must-revalidate"; +const STUDIO_ROOT_PATH = "/studio"; +const STUDIO_STREAMS_PROXY_BASE_PATH = "/studio/api/streams"; +const STUDIO_CONFIG_PATH = "/api/config"; +const STUDIO_QUERY_PATH = "/api/query"; +const STUDIO_AI_PATH = "/api/ai"; +const GENERATE_ROOT_PATH = "/generate"; +const GENERATE_JOBS_BASE_PATH = "/api/generate/jobs"; +const JOB_RETENTION_MS = 10 * 60_000; +const GENERATE_BUTTON_COUNTS = new Set([1_000, 10_000, 100_000]); +const GENERATE_BATCH_TARGET_BYTES = 256 * 1024; +const GENERATE_BATCH_MIN_EVENTS = 100; +const GENERATE_BATCH_MAX_EVENTS = 500; +const GENERATE_GC_MIN_TOTAL_EVENTS = 100_000; +const GENERATE_GC_ROW_INTERVAL = 50_000; +const DEFAULT_GENERATE_STREAM = "demo-app"; +const GENERATE_STREAM_NAME_MAX_LENGTH = 128; +const GENERATE_STREAM_NAME_PATTERN = /^[A-Za-z0-9][A-Za-z0-9._/-]*$/; + +const GENERATE_METHODS = ["GET", "POST", "PUT", "PATCH"] as const; +const GENERATE_PATHS = [ + "/api/orders", + "/api/orders/:id", + "/api/checkout", + "/api/invoices", + "/api/shipments/:id", + "/api/catalog/search", +] as const; +const GENERATE_SERVICES = [ + "billing", + "checkout", + "fulfillment", + "identity", + "inventory", + "search", +] as const; +const GENERATE_ENVIRONMENTS = ["prod", "staging"] as const; +const GENERATE_REGIONS = ["cdg", "fra", "iad"] as const; + +function clamp(value: number, min: number, max: number): number { + return Math.max(min, Math.min(max, value)); +} + +function jsonResponse( + status: number, + payload: unknown, + headers?: HeadersInit, +): Response { + return new Response(JSON.stringify(payload), { + headers: { + "cache-control": NO_STORE_CACHE_CONTROL, + "content-type": "application/json; charset=utf-8", + ...headers, + }, + status, + }); +} + +function methodNotAllowed(allow: string): Response { + return new Response("Method Not Allowed", { + headers: { + Allow: allow, + "cache-control": NO_STORE_CACHE_CONTROL, + }, + status: 405, + }); +} + +function collectGenerateBatchMemory(job: GenerateJobState, previousInserted: number): void { + if (job.total < GENERATE_GC_MIN_TOTAL_EVENTS) return; + const crossedBoundary = + Math.floor(previousInserted / GENERATE_GC_ROW_INTERVAL) !== + Math.floor(job.inserted / GENERATE_GC_ROW_INTERVAL); + if (!crossedBoundary && job.inserted < job.total) return; + + const gc = (globalThis as { Bun?: { gc?: (force?: boolean) => void } }).Bun?.gc; + if (typeof gc !== "function") return; + + try { + gc(true); + } catch { + return; + } +} + +function responseBodyForMethod( + request: Request, + body: BodyInit | null, +): BodyInit | null { + return request.method === "HEAD" ? null : body; +} + +function textResponse( + request: Request, + body: string, + init: ResponseInit & { contentType: string }, +): Response { + const headers = new Headers(init.headers); + if (!headers.has("cache-control")) { + headers.set("cache-control", NO_STORE_CACHE_CONTROL); + } + headers.set("content-type", init.contentType); + + return new Response(responseBodyForMethod(request, body), { + headers, + status: init.status, + }); +} + +function normalizeBuiltAssetPath(pathname: string): string[] { + const paths = [pathname]; + try { + const decoded = decodeURIComponent(pathname); + if (decoded !== pathname) { + paths.push(decoded); + } + } catch { + // Ignore malformed URI sequences and keep the raw pathname only. + } + + for (const candidate of [...paths]) { + if (candidate.startsWith(`${STUDIO_ROOT_PATH}/`)) { + paths.push(candidate.slice(STUDIO_ROOT_PATH.length)); + } else if (candidate.startsWith("/") && candidate !== STUDIO_ROOT_PATH) { + paths.push(`${STUDIO_ROOT_PATH}${candidate}`); + } + } + return [...new Set(paths)]; +} + +function createEntropyHex(stream: string, index: number, byteLength = 64): string { + let state = 0x811c9dc5; + for (let position = 0; position < stream.length; position += 1) { + state ^= stream.charCodeAt(position); + state = Math.imul(state, 0x01000193); + } + state ^= index >>> 0; + state = Math.imul(state, 0x9e3779b1); + + const alphabet = "0123456789abcdef"; + let out = ""; + for (let emitted = 0; emitted < byteLength; emitted += 1) { + state ^= state << 13; + state ^= state >>> 17; + state ^= state << 5; + const value = state >>> 0; + out += alphabet[(value >>> 4) & 0x0f]; + out += alphabet[value & 0x0f]; + } + return out; +} + +function createStudioHtmlDocument(): string { + return ` + + + + + Streams Studio + + + +
+ + +`; +} + +function createLandingHtmlDocument(): string { + return ` + + + + + Streams Compute Demo + + + +
+ Compute Demo +

Streams, with Studio and an evlog generator.

+

+ This deployment keeps the normal Streams API on /v1/*, serves Prisma Studio on /studio, + and adds a write generator on /generate for bulk evlog ingestion against the same server. +

+ +
+ +`; +} + +function createGenerateHtmlDocument(): string { + return ` + + + + + Streams Evlog Generator + + + +
+
Evlog Ingest
+

Generate canonical request events.

+

+ Each run uses the selected application/json stream, installs the evlog profile, + and appends the selected number of events in server-side chunks sized for steady progress updates. +

+ +
+ + +
Use letters, numbers, dot, underscore, slash, or hyphen. Default: ${DEFAULT_GENERATE_STREAM}.
+
+ +
+ + + +
+ +
+
Progress
+
Ready to start.
+
+
+
0 / 0
+
0%
+
+
+
+ Stream + Not started +
+
+ Batch Size + - +
+
+ Status + idle +
+
+ + +
+
+ + + +`; +} + +function createConfigPayload(bootId: string): { + ai: { enabled: boolean }; + bootId: string; + database: { enabled: boolean }; + streams: { url: string }; +} { + return { + ai: { enabled: false }, + bootId, + database: { enabled: false }, + streams: { url: STUDIO_STREAMS_PROXY_BASE_PATH }, + }; +} + +function buildGenerateEvent(stream: string, index: number, baseMs: number): Record { + const method = GENERATE_METHODS[index % GENERATE_METHODS.length]; + const service = GENERATE_SERVICES[index % GENERATE_SERVICES.length]; + const environment = GENERATE_ENVIRONMENTS[index % GENERATE_ENVIRONMENTS.length]; + const region = GENERATE_REGIONS[index % GENERATE_REGIONS.length]; + const path = GENERATE_PATHS[index % GENERATE_PATHS.length]; + const timestamp = new Date(baseMs + index * 1_000).toISOString(); + const status = + index % 41 === 0 ? 503 : index % 17 === 0 ? 429 : index % 7 === 0 ? 404 : 200; + const isError = status >= 500; + const isWarn = !isError && status >= 400; + + const entropyHex = createEntropyHex(stream, index, 96); + + return { + timestamp, + requestId: `${stream}-req-${index.toString().padStart(8, "0")}`, + traceContext: { + traceId: `${stream}-trace-${Math.floor(index / 4) + .toString() + .padStart(6, "0")}`, + spanId: `${service}-span-${index.toString().padStart(8, "0")}`, + }, + method, + path, + service, + environment, + version: "compute-demo-v1", + region, + status, + duration: 22 + (index % 900), + message: isError + ? "Upstream dependency timed out" + : isWarn + ? "Request rejected by policy" + : "Request completed", + why: isError + ? "Payment provider exceeded deadline budget" + : isWarn + ? "Concurrency limiter rejected this request" + : undefined, + fix: isError + ? "Retry this operation with exponential backoff" + : isWarn + ? "Reduce request rate or retry after the limiter window" + : undefined, + link: + status >= 400 + ? `https://example.internal/runbooks/${service}/${status}` + : undefined, + sampling: { + kept: true, + source: "compute-demo-generate", + }, + tenant: `tenant-${(index % 24).toString().padStart(2, "0")}`, + host: `${service}-${region}-${index % 6}`, + releaseChannel: environment === "prod" ? "stable" : "preview", + context: { + actor: { + id: `user-${(index % 5_000).toString().padStart(5, "0")}`, + plan: index % 13 === 0 ? "enterprise" : "pro", + }, + fingerprint: entropyHex, + request: { + bytes: 512 + (index % 8_192), + routeGroup: path.split("/")[2] ?? "root", + traceToken: entropyHex.slice(0, 48), + }, + }, + }; +} + +async function responseText(response: Response): Promise { + const text = await response.text(); + return text.trim(); +} + +function assertAllowedGenerateCount(value: unknown): number | null { + if (typeof value !== "number" || !Number.isInteger(value)) { + return null; + } + return GENERATE_BUTTON_COUNTS.has(value) ? value : null; +} + +function normalizeGenerateStreamName(value: unknown): string | null { + if (value == null) { + return DEFAULT_GENERATE_STREAM; + } + if (typeof value !== "string") { + return null; + } + + const stream = value.trim(); + if ( + stream.length === 0 || + stream.length > GENERATE_STREAM_NAME_MAX_LENGTH || + !GENERATE_STREAM_NAME_PATTERN.test(stream) + ) { + return null; + } + + return stream; +} + +export function createComputeDemoSite( + options: CreateComputeDemoSiteOptions, +): ComputeDemoSite { + const bootId = options.bootId ?? crypto.randomUUID(); + const jobs = new Map(); + const cleanupTimers = new Set(); + const studioAssets = options.studioAssets; + + const clearTimer = (timer: Timer): void => { + cleanupTimers.delete(timer); + clearTimeout(timer); + }; + + const scheduleCleanup = (jobId: string): void => { + const timer = setTimeout(() => { + jobs.delete(jobId); + clearTimer(timer); + }, JOB_RETENTION_MS); + (timer as { unref?: () => void }).unref?.(); + cleanupTimers.add(timer); + }; + + const writeHtml = (request: Request, html: string): Response => + textResponse(request, html, { + contentType: "text/html; charset=utf-8", + headers: { + "cache-control": NO_STORE_CACHE_CONTROL, + }, + status: 200, + }); + + const writeAsset = (request: Request, body: BodyInit, contentType: string): Response => + new Response(responseBodyForMethod(request, body), { + headers: { + "cache-control": APP_CACHE_CONTROL, + "content-type": contentType, + }, + status: 200, + }); + + const proxyRequestToStreamsApp = async ( + request: Request, + url: URL, + ): Promise => { + const proxyPathname = url.pathname.slice(STUDIO_STREAMS_PROXY_BASE_PATH.length); + const normalizedPathname = + proxyPathname.length > 0 ? proxyPathname : "/"; + const upstreamUrl = new URL( + `${normalizedPathname}${url.search}`, + "http://streams.internal/", + ); + const headers = new Headers(request.headers); + headers.delete("host"); + const body = + request.method === "GET" || request.method === "HEAD" + ? undefined + : await request.arrayBuffer(); + + const upstreamRequest = new Request(upstreamUrl, { + body, + headers, + method: request.method, + redirect: "manual", + signal: request.signal, + }); + const response = await options.streamsApp.fetch(upstreamRequest); + const responseHeaders = new Headers(response.headers); + + responseHeaders.set("cache-control", NO_STORE_CACHE_CONTROL); + + return new Response(response.body, { + headers: responseHeaders, + status: response.status, + statusText: response.statusText, + }); + }; + + const ensureGenerateStream = async (stream: string): Promise => { + if (options.streamsApp.ensureGenerateStream) { + await options.streamsApp.ensureGenerateStream(stream); + return; + } + + const createResponse = await options.streamsApp.fetch( + new Request(`http://streams.internal/v1/stream/${encodeURIComponent(stream)}`, { + headers: { + "content-type": "application/json", + }, + method: "PUT", + }), + ); + if (!createResponse.ok && createResponse.status !== 204) { + throw dsError( + `stream create failed (${createResponse.status}): ${await responseText(createResponse)}`, + ); + } + + const profileResponse = await options.streamsApp.fetch( + new Request( + `http://streams.internal/v1/stream/${encodeURIComponent(stream)}/_profile`, + { + body: JSON.stringify({ + apiVersion: "durable.streams/profile/v1", + profile: { + kind: "evlog", + }, + }), + headers: { + "content-type": "application/json", + }, + method: "POST", + }, + ), + ); + if (!profileResponse.ok) { + throw dsError( + `profile install failed (${profileResponse.status}): ${await responseText(profileResponse)}`, + ); + } + }; + + const appendGenerateBatch = async ( + stream: string, + events: Array>, + ): Promise => { + if (options.streamsApp.appendGenerateBatch) { + await options.streamsApp.appendGenerateBatch(stream, events); + return; + } + + const response = await options.streamsApp.fetch( + new Request(`http://streams.internal/v1/stream/${encodeURIComponent(stream)}`, { + body: JSON.stringify(events), + headers: { + "content-type": "application/json", + }, + method: "POST", + }), + ); + if (!response.ok && response.status !== 204) { + throw dsError( + `append failed (${response.status}): ${await responseText(response)}`, + ); + } + }; + + const runGenerateJob = async (job: GenerateJobState): Promise => { + const sampleEvent = buildGenerateEvent(job.stream, 0, Date.now()); + const estimatedBytes = new TextEncoder().encode( + JSON.stringify(sampleEvent), + ).byteLength; + const batchSize = clamp( + Math.floor(GENERATE_BATCH_TARGET_BYTES / Math.max(estimatedBytes, 1)), + GENERATE_BATCH_MIN_EVENTS, + GENERATE_BATCH_MAX_EVENTS, + ); + + job.batchSize = batchSize; + job.status = "running"; + + let beganGenerateJob = false; + try { + await ensureGenerateStream(job.stream); + options.streamsApp.beginGenerateJob?.(job.stream); + beganGenerateJob = true; + const baseMs = Date.now() - Math.max(0, job.total - 1) * 1_000; + + while (job.inserted < job.total) { + const remaining = job.total - job.inserted; + const nextBatchSize = Math.min(job.batchSize, remaining); + const events: Array> = []; + + for (let index = 0; index < nextBatchSize; index += 1) { + events.push( + buildGenerateEvent(job.stream, job.inserted + index, baseMs), + ); + } + + await appendGenerateBatch(job.stream, events); + const previousInserted = job.inserted; + job.inserted += events.length; + collectGenerateBatchMemory(job, previousInserted); + + await new Promise((resolve) => setTimeout(resolve, 0)); + } + + job.status = "succeeded"; + job.finishedAt = new Date().toISOString(); + scheduleCleanup(job.id); + } catch (error) { + job.error = error instanceof Error ? error.message : String(error); + job.status = "failed"; + job.finishedAt = new Date().toISOString(); + scheduleCleanup(job.id); + } finally { + if (beganGenerateJob) options.streamsApp.endGenerateJob?.(job.stream); + } + }; + + const handleGenerateJobCreate = async (request: Request): Promise => { + if (request.method === "OPTIONS") { + return new Response(null, { + headers: { + Allow: "POST,OPTIONS", + }, + status: 204, + }); + } + if (request.method !== "POST") { + return methodNotAllowed("POST,OPTIONS"); + } + + let payload: { count?: unknown; stream?: unknown }; + + try { + payload = (await request.json()) as { count?: unknown; stream?: unknown }; + } catch { + return jsonResponse(400, { error: "Invalid JSON payload." }); + } + + const count = assertAllowedGenerateCount(payload.count); + if (count == null) { + return jsonResponse(400, { + error: "count must be one of 1000, 10000, or 100000.", + }); + } + + const stream = normalizeGenerateStreamName(payload.stream); + if (stream == null) { + return jsonResponse(400, { + error: + "stream must be 1-128 characters, start with a letter or number, and contain only letters, numbers, dot, underscore, slash, or hyphen.", + }); + } + + const id = crypto.randomUUID(); + const job: GenerateJobState = { + batchSize: 0, + error: null, + finishedAt: null, + id, + inserted: 0, + startedAt: new Date().toISOString(), + status: "pending", + stream, + total: count, + }; + + jobs.set(id, job); + void runGenerateJob(job); + + return jsonResponse(202, { job }); + }; + + const handleGenerateJobRead = (request: Request, url: URL): Response => { + if (request.method === "OPTIONS") { + return new Response(null, { + headers: { + Allow: "GET,OPTIONS", + }, + status: 204, + }); + } + if (request.method !== "GET") { + return methodNotAllowed("GET,OPTIONS"); + } + + const suffix = url.pathname.slice(GENERATE_JOBS_BASE_PATH.length + 1); + const jobId = decodeURIComponent(suffix); + const job = jobs.get(jobId); + if (!job) { + return jsonResponse(404, { error: "Job not found." }); + } + return jsonResponse(200, { job }); + }; + + return { + close(): void { + for (const timer of cleanupTimers) { + clearTimer(timer); + } + }, + + async fetch(request: Request): Promise { + const url = new URL(request.url); + + if (url.pathname === "/") { + return writeHtml(request, createLandingHtmlDocument()); + } + + if (url.pathname === STUDIO_ROOT_PATH || url.pathname === `${STUDIO_ROOT_PATH}/`) { + return writeHtml(request, createStudioHtmlDocument()); + } + + if (url.pathname === `${STUDIO_ROOT_PATH}/app.js`) { + return writeAsset( + request, + studioAssets.appScript, + "application/javascript; charset=utf-8", + ); + } + + if (url.pathname === `${STUDIO_ROOT_PATH}/app.css`) { + return writeAsset(request, studioAssets.appStyles, "text/css; charset=utf-8"); + } + + if ( + url.pathname === STUDIO_STREAMS_PROXY_BASE_PATH || + url.pathname.startsWith(`${STUDIO_STREAMS_PROXY_BASE_PATH}/`) + ) { + return await proxyRequestToStreamsApp(request, url); + } + + if (url.pathname === STUDIO_CONFIG_PATH) { + return jsonResponse(200, createConfigPayload(bootId)); + } + + if (url.pathname === STUDIO_QUERY_PATH) { + if (request.method === "OPTIONS") { + return new Response(null, { + headers: { + Allow: "POST,OPTIONS", + }, + status: 204, + }); + } + if (request.method !== "POST") { + return methodNotAllowed("POST,OPTIONS"); + } + return jsonResponse(503, { + error: "Database access is disabled for this deployment.", + }); + } + + if (url.pathname === STUDIO_AI_PATH) { + if (request.method === "OPTIONS") { + return new Response(null, { + headers: { + Allow: "POST,OPTIONS", + }, + status: 204, + }); + } + if (request.method !== "POST") { + return methodNotAllowed("POST,OPTIONS"); + } + return jsonResponse(503, { + code: "llm_disabled", + message: "AI is disabled for this deployment.", + ok: false, + }); + } + + if (url.pathname === GENERATE_ROOT_PATH || url.pathname === `${GENERATE_ROOT_PATH}/`) { + return writeHtml(request, createGenerateHtmlDocument()); + } + + if (url.pathname === GENERATE_JOBS_BASE_PATH) { + return await handleGenerateJobCreate(request); + } + + if (url.pathname.startsWith(`${GENERATE_JOBS_BASE_PATH}/`)) { + return handleGenerateJobRead(request, url); + } + + for (const assetPath of normalizeBuiltAssetPath(url.pathname)) { + const asset = studioAssets.builtAssets.get(assetPath); + if (asset) { + const assetBytes = asset.bytes instanceof Uint8Array + ? new Uint8Array(asset.bytes) + : new Uint8Array(asset.bytes); + return writeAsset( + request, + new Blob([assetBytes]), + asset.contentType, + ); + } + } + + return await options.streamsApp.fetch(request); + }, + }; +} diff --git a/src/compute/entry.ts b/src/compute/entry.ts new file mode 100644 index 0000000..754dd71 --- /dev/null +++ b/src/compute/entry.ts @@ -0,0 +1,19 @@ +function hasFlag(argv: string[], flag: string): boolean { + return argv.includes(flag) || argv.some((arg) => arg.startsWith(`${flag}=`)); +} + +export function ensureComputeArgv(argv: string[], env: NodeJS.ProcessEnv = process.env): string[] { + const next = [...argv]; + if (!hasFlag(next, "--object-store")) { + next.push("--object-store", "r2"); + } + if (env.DS_MEMORY_LIMIT_MB != null && !hasFlag(next, "--auto-tune")) { + next.push("--auto-tune"); + } + return next; +} + +if (import.meta.main) { + process.argv = ensureComputeArgv(process.argv); + await import("../server"); +} diff --git a/src/compute/virtual-modules.d.ts b/src/compute/virtual-modules.d.ts new file mode 100644 index 0000000..dc3756b --- /dev/null +++ b/src/compute/virtual-modules.d.ts @@ -0,0 +1,15 @@ +declare module "virtual:prebuilt-studio-assets" { + const mod: { + appScript: string; + appStyles: string; + builtAssets: Map< + string, + { + bytes: Uint8Array; + contentType: string; + } + >; + }; + + export = mod; +} diff --git a/src/compute/worker_module_url.ts b/src/compute/worker_module_url.ts new file mode 100644 index 0000000..c75b6b1 --- /dev/null +++ b/src/compute/worker_module_url.ts @@ -0,0 +1,9 @@ +export function resolveWorkerModuleUrl( + currentModuleUrl: string, + sourceRelativePath: string, + builtRelativePath = sourceRelativePath.endsWith(".ts") + ? `${sourceRelativePath.slice(0, -3)}.js` + : sourceRelativePath +): URL { + return new URL(currentModuleUrl.endsWith(".js") ? builtRelativePath : sourceRelativePath, currentModuleUrl); +} diff --git a/src/config.ts b/src/config.ts index 10d92e4..873f254 100644 --- a/src/config.ts +++ b/src/config.ts @@ -83,6 +83,8 @@ const KNOWN_DS_ENVS = new Set([ "DS_SEGMENTER_WORKERS", "DS_UPLOAD_CHECK_MS", "DS_UPLOAD_CONCURRENCY", + "DS_BASE_WAL_GC_CHUNK_OFFSETS", + "DS_BASE_WAL_GC_INTERVAL_MS", "DS_SEGMENT_CACHE_MAX_BYTES", "DS_SEGMENT_FOOTER_CACHE_ENTRIES", "DS_INDEX_RUN_CACHE_MAX_BYTES", @@ -149,6 +151,10 @@ const KNOWN_DS_ENVS = new Set([ "DS_MOCK_R2_MAX_INMEM_BYTES", "DS_MOCK_R2_MAX_INMEM_MB", "DS_MOCK_R2_SPILL_DIR", + "DS_MOCK_R2_PUT_DELAY_MS", + "DS_MOCK_R2_GET_DELAY_MS", + "DS_MOCK_R2_HEAD_DELAY_MS", + "DS_MOCK_R2_LIST_DELAY_MS", "DS_BENCH_URL", "DS_BENCH_DURATION_MS", "DS_BENCH_INTERVAL_MS", diff --git a/src/index/indexer.ts b/src/index/indexer.ts index e54290d..c56b422 100644 --- a/src/index/indexer.ts +++ b/src/index/indexer.ts @@ -19,8 +19,10 @@ import { yieldToEventLoop } from "../util/yield"; import { RuntimeMemorySampler } from "../runtime_memory_sampler"; import { ConcurrencyGate } from "../concurrency_gate"; import type { ForegroundActivityTracker } from "../foreground_activity"; +import { LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS, shouldDeferEnqueuedIndexWork, shouldWaitForLowMemoryIndexQuiet } from "./schedule"; import type { AggSectionView } from "../search/agg_format"; import type { ColSectionView } from "../search/col_format"; +import type { ExactSectionView } from "../search/exact_format"; import type { FtsSectionView } from "../search/fts_format"; import type { MetricsBlockSectionView } from "../profiles/metrics/block_format"; import type { SchemaRegistryStore } from "../schema/registry"; @@ -41,6 +43,7 @@ export type StreamIndexLookup = { candidateSegmentsForSecondaryIndex(stream: string, indexName: string, keyBytes: Uint8Array): Promise; getAggSegmentCompanion(stream: string, segmentIndex: number): Promise; getColSegmentCompanion(stream: string, segmentIndex: number): Promise; + getExactSegmentCompanion(stream: string, segmentIndex: number): Promise; getFtsSegmentCompanion(stream: string, segmentIndex: number): Promise; getFtsSegmentCompanionWithStats?( stream: string, @@ -90,6 +93,7 @@ export class IndexManager { private lastDiskEvictions = 0; private lastDiskBytesAdded = 0; private timer: any | null = null; + private wakeTimer: any | null = null; private running = false; private readonly publishManifest?: (stream: string) => Promise; private readonly onMetadataChanged?: (stream: string) => void; @@ -97,6 +101,7 @@ export class IndexManager { private readonly registry?: SchemaRegistryStore; private readonly asyncGate: ConcurrencyGate; private readonly foregroundActivity?: ForegroundActivityTracker; + private firstQueuedAtMs: number | null = null; constructor( cfg: Config, @@ -151,12 +156,43 @@ export class IndexManager { stop(): void { if (this.timer) clearInterval(this.timer); + if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; + this.wakeTimer = null; } enqueue(stream: string): void { if (this.span <= 0) return; + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); + if (shouldDeferEnqueuedIndexWork(this.cfg)) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + this.scheduleTick(); + } + + private scheduleTick(delayMs = 0): void { + if (!this.timer || this.wakeTimer) return; + this.wakeTimer = setTimeout(() => { + this.wakeTimer = null; + if ( + shouldWaitForLowMemoryIndexQuiet( + this.cfg, + this.firstQueuedAtMs, + this.foregroundActivity?.wasActiveWithin(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS) ?? false + ) + ) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + if (this.running) { + this.scheduleTick(250); + return; + } + void this.tick(); + }, delayMs); + (this.wakeTimer as { unref?: () => void }).unref?.(); } async candidateSegmentsForRoutingKey(stream: string, keyBytes: Uint8Array): Promise { @@ -295,6 +331,12 @@ export class IndexManager { this.recordCacheStats(); } finally { this.running = false; + if (this.queue.size > 0) { + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); + this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); + } else { + this.firstQueuedAtMs = null; + } } } diff --git a/src/index/lexicon_indexer.ts b/src/index/lexicon_indexer.ts index e521f37..1ff732a 100644 --- a/src/index/lexicon_indexer.ts +++ b/src/index/lexicon_indexer.ts @@ -16,6 +16,7 @@ import { yieldToEventLoop } from "../util/yield"; import { ConcurrencyGate } from "../concurrency_gate"; import type { ForegroundActivityTracker } from "../foreground_activity"; import { LexiconFileCache } from "./lexicon_file_cache"; +import { LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS, shouldDeferEnqueuedIndexWork, shouldWaitForLowMemoryIndexQuiet } from "./schedule"; import { buildLexiconRunPayload, decodeLexiconRunResult, @@ -92,7 +93,9 @@ export class LexiconIndexManager { private readonly building = new Set(); private readonly compacting = new Set(); private timer: any | null = null; + private wakeTimer: any | null = null; private running = false; + private firstQueuedAtMs: number | null = null; constructor( private readonly cfg: Config, @@ -135,13 +138,44 @@ export class LexiconIndexManager { stop(): void { if (this.timer) clearInterval(this.timer); + if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; + this.wakeTimer = null; this.fileCache?.clearMapped(); } enqueue(stream: string): void { if (this.span <= 0) return; + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); + if (shouldDeferEnqueuedIndexWork(this.cfg)) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + this.scheduleTick(); + } + + private scheduleTick(delayMs = 0): void { + if (!this.timer || this.wakeTimer) return; + this.wakeTimer = setTimeout(() => { + this.wakeTimer = null; + if ( + shouldWaitForLowMemoryIndexQuiet( + this.cfg, + this.firstQueuedAtMs, + this.foregroundActivity?.wasActiveWithin(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS) ?? false + ) + ) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + if (this.running) { + this.scheduleTick(250); + return; + } + void this.tick(); + }, delayMs); + (this.wakeTimer as { unref?: () => void }).unref?.(); } getLocalCacheBytes(stream: string): number { @@ -253,6 +287,12 @@ export class LexiconIndexManager { } } finally { this.running = false; + if (this.queue.size > 0) { + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); + this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); + } else { + this.firstQueuedAtMs = null; + } } } diff --git a/src/index/schedule.ts b/src/index/schedule.ts new file mode 100644 index 0000000..8ee9b7e --- /dev/null +++ b/src/index/schedule.ts @@ -0,0 +1,28 @@ +import type { Config } from "../config"; + +const LOW_MEMORY_INDEX_WAKE_LIMIT_BYTES = 1024 * 1024 * 1024; +const DEFERRED_INDEX_INTERVAL_MS = 60_000; +export const LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS = 30_000; +export const LOW_MEMORY_INDEX_ENQUEUE_MAX_DEFER_MS = LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS * 2; + +export function shouldDeferEnqueuedIndexWork( + cfg: Pick +): boolean { + return ( + cfg.memoryLimitBytes > 0 && + cfg.memoryLimitBytes <= LOW_MEMORY_INDEX_WAKE_LIMIT_BYTES && + cfg.indexCheckIntervalMs >= DEFERRED_INDEX_INTERVAL_MS + ); +} + +export function shouldWaitForLowMemoryIndexQuiet( + cfg: Pick, + firstQueuedAtMs: number | null, + recentForegroundActivity: boolean, + nowMs = Date.now() +): boolean { + if (!shouldDeferEnqueuedIndexWork(cfg)) return false; + if (!recentForegroundActivity) return false; + if (firstQueuedAtMs != null && nowMs - firstQueuedAtMs >= LOW_MEMORY_INDEX_ENQUEUE_MAX_DEFER_MS) return false; + return true; +} diff --git a/src/index/secondary_indexer.ts b/src/index/secondary_indexer.ts index 18b712c..de7f3d6 100644 --- a/src/index/secondary_indexer.ts +++ b/src/index/secondary_indexer.ts @@ -16,6 +16,7 @@ import { yieldToEventLoop } from "../util/yield"; import { RuntimeMemorySampler } from "../runtime_memory_sampler"; import { ConcurrencyGate } from "../concurrency_gate"; import type { ForegroundActivityTracker } from "../foreground_activity"; +import { LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS, shouldDeferEnqueuedIndexWork, shouldWaitForLowMemoryIndexQuiet } from "./schedule"; import { binaryFuseContains, buildBinaryFuseResult } from "./binary_fuse"; import { IndexRunCache } from "./run_cache"; import { @@ -74,12 +75,14 @@ export class SecondaryIndexManager { private readonly compacting = new Set(); private readonly streamIdleTicks = new Map(); private timer: any | null = null; + private wakeTimer: any | null = null; private running = false; private readonly publishManifest?: (stream: string) => Promise; private readonly onMetadataChanged?: (stream: string) => void; private readonly memorySampler?: RuntimeMemorySampler; private readonly asyncGate: ConcurrencyGate; private readonly foregroundActivity?: ForegroundActivityTracker; + private firstQueuedAtMs: number | null = null; constructor( cfg: Config, @@ -135,13 +138,44 @@ export class SecondaryIndexManager { stop(): void { if (this.timer) clearInterval(this.timer); + if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; + this.wakeTimer = null; this.streamIdleTicks.clear(); } enqueue(stream: string): void { if (this.span <= 0) return; + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); + if (shouldDeferEnqueuedIndexWork(this.cfg)) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + this.scheduleTick(); + } + + private scheduleTick(delayMs = 0): void { + if (!this.timer || this.wakeTimer) return; + this.wakeTimer = setTimeout(() => { + this.wakeTimer = null; + if ( + shouldWaitForLowMemoryIndexQuiet( + this.cfg, + this.firstQueuedAtMs, + this.foregroundActivity?.wasActiveWithin(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS) ?? false + ) + ) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + if (this.running) { + this.scheduleTick(250); + return; + } + void this.tick(); + }, delayMs); + (this.wakeTimer as { unref?: () => void }).unref?.(); } async candidateSegmentsForSecondaryIndex( @@ -271,6 +305,12 @@ export class SecondaryIndexManager { } } finally { this.running = false; + if (this.queue.size > 0) { + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); + this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); + } else { + this.firstQueuedAtMs = null; + } } } diff --git a/src/objectstore/accounting.ts b/src/objectstore/accounting.ts index 664df93..be82139 100644 --- a/src/objectstore/accounting.ts +++ b/src/objectstore/accounting.ts @@ -1,4 +1,5 @@ import type { SqliteDurableStore } from "../db/db"; +import type { Metrics } from "../metrics"; import type { GetOptions, ObjectStore, PutResult } from "./interface"; type ClassifiedRequest = { @@ -36,57 +37,115 @@ function classifyListPrefix(prefix: string): ClassifiedRequest | null { export class AccountingObjectStore implements ObjectStore { constructor( private readonly inner: ObjectStore, - private readonly db: SqliteDurableStore + private readonly db: SqliteDurableStore, + private readonly metrics?: Metrics ) {} + private recordLatency(op: "put" | "get" | "head" | "delete" | "list", artifact: string, startedNs: bigint, outcome: "ok" | "miss" | "error"): void { + if (!this.metrics) return; + const elapsedNs = Number(process.hrtime.bigint() - startedNs); + this.metrics.record(`tieredstore.objectstore.${op}.latency`, elapsedNs, "ns", { + artifact, + outcome, + }); + } + async put(key: string, data: Uint8Array, opts?: { contentType?: string; contentLength?: number }): Promise { - const res = await this.inner.put(key, data, opts); + const startedNs = process.hrtime.bigint(); const classified = classifyKey(key); - if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "put", data.byteLength); - return res; + const artifact = classified?.artifact ?? "unknown"; + try { + const res = await this.inner.put(key, data, opts); + if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "put", data.byteLength); + this.recordLatency("put", artifact, startedNs, "ok"); + return res; + } catch (error) { + this.recordLatency("put", artifact, startedNs, "error"); + throw error; + } } async putFile(key: string, path: string, size: number, opts?: { contentType?: string }): Promise { - if (!this.inner.putFile) { - const bytes = await Bun.file(path).bytes(); - const res = await this.inner.put(key, bytes, { - contentType: opts?.contentType, - contentLength: size, - }); - const classified = classifyKey(key); + const startedNs = process.hrtime.bigint(); + const classified = classifyKey(key); + const artifact = classified?.artifact ?? "unknown"; + try { + if (!this.inner.putFile) { + const bytes = await Bun.file(path).bytes(); + const res = await this.inner.put(key, bytes, { + contentType: opts?.contentType, + contentLength: size, + }); + if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "put", size); + this.recordLatency("put", artifact, startedNs, "ok"); + return res; + } + const res = await this.inner.putFile(key, path, size, opts); if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "put", size); + this.recordLatency("put", artifact, startedNs, "ok"); return res; + } catch (error) { + this.recordLatency("put", artifact, startedNs, "error"); + throw error; } - const res = await this.inner.putFile(key, path, size, opts); - const classified = classifyKey(key); - if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "put", size); - return res; } async get(key: string, opts?: GetOptions): Promise { - const res = await this.inner.get(key, opts); + const startedNs = process.hrtime.bigint(); const classified = classifyKey(key); - if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "get", res?.byteLength ?? 0); - return res; + const artifact = classified?.artifact ?? "unknown"; + try { + const res = await this.inner.get(key, opts); + if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "get", res?.byteLength ?? 0); + this.recordLatency("get", artifact, startedNs, res == null ? "miss" : "ok"); + return res; + } catch (error) { + this.recordLatency("get", artifact, startedNs, "error"); + throw error; + } } async head(key: string): Promise<{ etag: string; size: number } | null> { - const res = await this.inner.head(key); + const startedNs = process.hrtime.bigint(); const classified = classifyKey(key); - if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "head", res?.size ?? 0); - return res; + const artifact = classified?.artifact ?? "unknown"; + try { + const res = await this.inner.head(key); + if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "head", res?.size ?? 0); + this.recordLatency("head", artifact, startedNs, res == null ? "miss" : "ok"); + return res; + } catch (error) { + this.recordLatency("head", artifact, startedNs, "error"); + throw error; + } } async delete(key: string): Promise { - await this.inner.delete(key); + const startedNs = process.hrtime.bigint(); const classified = classifyKey(key); - if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "delete", 0); + const artifact = classified?.artifact ?? "unknown"; + try { + await this.inner.delete(key); + if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "delete", 0); + this.recordLatency("delete", artifact, startedNs, "ok"); + } catch (error) { + this.recordLatency("delete", artifact, startedNs, "error"); + throw error; + } } async list(prefix: string): Promise { - const res = await this.inner.list(prefix); + const startedNs = process.hrtime.bigint(); const classified = classifyListPrefix(prefix); - if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "list", 0); - return res; + const artifact = classified?.artifact ?? (prefix.replace(/\/+$/, "") === "streams" ? "stream_catalog" : "unknown"); + try { + const res = await this.inner.list(prefix); + if (classified) this.db.recordObjectStoreRequestByHash(classified.streamHash, classified.artifact, "list", 0); + this.recordLatency("list", artifact, startedNs, "ok"); + return res; + } catch (error) { + this.recordLatency("list", artifact, startedNs, "error"); + throw error; + } } } diff --git a/src/objectstore/r2.ts b/src/objectstore/r2.ts index 146ce4c..126c599 100644 --- a/src/objectstore/r2.ts +++ b/src/objectstore/r2.ts @@ -1,5 +1,6 @@ -import { createHash } from "node:crypto"; +import { createHash, createHmac } from "node:crypto"; import { createReadStream } from "node:fs"; +import { Readable } from "node:stream"; import type { GetOptions, ObjectStore, PutResult } from "./interface"; import { dsError } from "../util/ds_error.ts"; @@ -9,12 +10,50 @@ export type R2Config = { accessKeyId: string; secretAccessKey: string; region?: string; + endpoint?: string; }; +const EMPTY_SHA256 = sha256Hex(new Uint8Array(0)); +const XML_DECODER = new TextDecoder(); + function sha256Hex(data: Uint8Array | string): string { return createHash("sha256").update(data).digest("hex"); } +async function readResponseBytes(res: Response): Promise { + if (!res.body) return new Uint8Array(0); + const reader = res.body.getReader(); + const chunks: Uint8Array[] = []; + let total = 0; + try { + for (;;) { + const next = await reader.read(); + if (next.done) break; + const chunk = next.value; + chunks.push(chunk); + total += chunk.byteLength; + } + } finally { + reader.releaseLock(); + } + if (chunks.length === 1) return chunks[0]!; + const out = new Uint8Array(total); + let offset = 0; + for (const chunk of chunks) { + out.set(chunk, offset); + offset += chunk.byteLength; + } + return out; +} + +async function readResponseText(res: Response): Promise { + return XML_DECODER.decode(await readResponseBytes(res)); +} + +function fileStreamBody(path: string): BodyInit { + return Readable.toWeb(createReadStream(path)) as unknown as BodyInit; +} + async function sha256FileHex(path: string): Promise { const hash = createHash("sha256"); await new Promise((resolve, reject) => { @@ -31,37 +70,142 @@ function stripQuotes(value: string | null): string { return value.replace(/^\"|\"$/g, ""); } -function isMissingObjectError(err: unknown): boolean { - const record = err as Record | null | undefined; - const status = record?.status; - const statusCode = record?.statusCode; - const code = String(record?.code ?? ""); - const message = String(record?.message ?? err ?? "").toLowerCase(); - if (status === 404 || statusCode === 404) return true; - if (code === "NoSuchKey" || code === "NotFound") return true; - return ( - message.includes("not found") || - message.includes("no such key") || - message.includes("does not exist") || - message === "missing" - ); +function encodePathPart(part: string): string { + return encodeURIComponent(part).replace(/[!'()*]/g, (ch) => `%${ch.charCodeAt(0).toString(16).toUpperCase()}`); +} + +function encodeKeyPath(key: string): string { + return key.split("/").map(encodePathPart).join("/"); +} + +function encodeQueryPart(part: string): string { + return encodePathPart(part).replace(/%7E/g, "~"); +} + +function hmac(key: string | Buffer, data: string): Buffer { + return createHmac("sha256", key).update(data).digest(); +} + +function xmlDecode(value: string): string { + return value + .replace(/</g, "<") + .replace(/>/g, ">") + .replace(/"/g, "\"") + .replace(/'/g, "'") + .replace(/&/g, "&"); +} + +function firstXmlTag(xml: string, tag: string): string | null { + const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)`)); + return match ? xmlDecode(match[1] ?? "") : null; +} + +function allXmlTags(xml: string, tag: string): string[] { + const out: string[] = []; + const re = new RegExp(`<${tag}>([\\s\\S]*?)`, "g"); + for (;;) { + const match = re.exec(xml); + if (!match) break; + out.push(xmlDecode(match[1] ?? "")); + } + return out; } export class R2ObjectStore implements ObjectStore { - private readonly client: Bun.S3Client; + private readonly endpoint: URL; + private readonly bucket: string; + private readonly accessKeyId: string; + private readonly secretAccessKey: string; + private readonly region: string; constructor(cfg: R2Config) { - this.client = new Bun.S3Client({ - bucket: cfg.bucket, - accessKeyId: cfg.accessKeyId, - secretAccessKey: cfg.secretAccessKey, - region: cfg.region ?? "auto", - endpoint: `https://${cfg.accountId}.r2.cloudflarestorage.com`, + this.endpoint = new URL(cfg.endpoint ?? `https://${cfg.accountId}.r2.cloudflarestorage.com`); + this.bucket = cfg.bucket; + this.accessKeyId = cfg.accessKeyId; + this.secretAccessKey = cfg.secretAccessKey; + this.region = cfg.region ?? "auto"; + } + + private objectPath(key: string): string { + return `/${encodePathPart(this.bucket)}/${encodeKeyPath(key)}`; + } + + private requestUrl(path: string, query: Array<[string, string]> = []): URL { + const url = new URL(this.endpoint.href); + const basePath = url.pathname.replace(/\/+$/, ""); + url.pathname = `${basePath}${path}`; + url.search = ""; + for (const [key, value] of query) url.searchParams.append(key, value); + return url; + } + + private authorization(method: string, url: URL, headers: Headers, payloadHash: string, amzDate: string): string { + const date = amzDate.slice(0, 8); + const host = url.host; + headers.set("host", host); + headers.set("x-amz-content-sha256", payloadHash); + headers.set("x-amz-date", amzDate); + + const signedHeaderNames = [...headers.keys()].map((h) => h.toLowerCase()).sort(); + const canonicalHeaders = signedHeaderNames + .map((name) => `${name}:${headers.get(name)?.trim().replace(/\s+/g, " ") ?? ""}\n`) + .join(""); + const signedHeaders = signedHeaderNames.join(";"); + const queryEntries = [...url.searchParams.entries()].sort(([ak, av], [bk, bv]) => { + if (ak === bk) return av < bv ? -1 : av > bv ? 1 : 0; + return ak < bk ? -1 : 1; }); + const canonicalQuery = queryEntries + .map(([key, value]) => `${encodeQueryPart(key)}=${encodeQueryPart(value)}`) + .join("&"); + const canonicalRequest = [ + method, + url.pathname, + canonicalQuery, + canonicalHeaders, + signedHeaders, + payloadHash, + ].join("\n"); + const scope = `${date}/${this.region}/s3/aws4_request`; + const stringToSign = [ + "AWS4-HMAC-SHA256", + amzDate, + scope, + sha256Hex(canonicalRequest), + ].join("\n"); + const kDate = hmac(`AWS4${this.secretAccessKey}`, date); + const kRegion = hmac(kDate, this.region); + const kService = hmac(kRegion, "s3"); + const kSigning = hmac(kService, "aws4_request"); + const signature = createHmac("sha256", kSigning).update(stringToSign).digest("hex"); + return `AWS4-HMAC-SHA256 Credential=${this.accessKeyId}/${scope}, SignedHeaders=${signedHeaders}, Signature=${signature}`; } - private file(key: string): Bun.S3File { - return this.client.file(key); + private async request( + method: string, + path: string, + opts: { + query?: Array<[string, string]>; + headers?: HeadersInit; + body?: BodyInit; + payloadHash?: string; + } = {} + ): Promise { + const url = this.requestUrl(path, opts.query ?? []); + const headers = new Headers(opts.headers); + const now = new Date(); + const amzDate = now.toISOString().replace(/[:-]|\.\d{3}/g, ""); + const payloadHash = opts.payloadHash ?? EMPTY_SHA256; + headers.set("authorization", this.authorization(method, url, headers, payloadHash, amzDate)); + return fetch(url, { + method, + headers, + body: opts.body, + }); + } + + private wrapStatus(op: string, key: string, res: Response): never { + throw dsError(`R2 ${op} failed for ${key}: HTTP ${res.status} ${res.statusText}`); } private wrapError(op: string, key: string, err: unknown): never { @@ -70,20 +214,38 @@ export class R2ObjectStore implements ObjectStore { } async put(key: string, data: Uint8Array, opts: { contentType?: string; contentLength?: number } = {}): Promise { + const payloadHash = sha256Hex(data); try { - await this.file(key).write(data, { type: opts.contentType }); - const stat = await this.file(key).stat(); - return { etag: stripQuotes(stat.etag) || sha256Hex(data) }; + const headers: Record = { + "content-length": String(opts.contentLength ?? data.byteLength), + }; + if (opts.contentType) headers["content-type"] = opts.contentType; + const res = await this.request("PUT", this.objectPath(key), { + headers, + body: data as unknown as BodyInit, + payloadHash, + }); + if (!res.ok) this.wrapStatus("PUT", key, res); + return { etag: payloadHash }; } catch (err) { this.wrapError("PUT", key, err); } } - async putFile(key: string, path: string, _size: number, opts: { contentType?: string } = {}): Promise { + async putFile(key: string, path: string, size: number, opts: { contentType?: string } = {}): Promise { + const payloadHash = await sha256FileHex(path); try { - await this.file(key).write(Bun.file(path), { type: opts.contentType }); - const stat = await this.file(key).stat(); - return { etag: stripQuotes(stat.etag) || (await sha256FileHex(path)) }; + const headers: Record = { + "content-length": String(size), + }; + if (opts.contentType) headers["content-type"] = opts.contentType; + const res = await this.request("PUT", this.objectPath(key), { + headers, + body: fileStreamBody(path), + payloadHash, + }); + if (!res.ok) this.wrapStatus("PUT", key, res); + return { etag: payloadHash }; } catch (err) { this.wrapError("PUT", key, err); } @@ -91,24 +253,29 @@ export class R2ObjectStore implements ObjectStore { async get(key: string, opts: GetOptions = {}): Promise { try { - const file = this.file(key); - const body = - opts.range == null - ? file - : file.slice(opts.range.start, opts.range.end == null ? undefined : opts.range.end + 1); - return new Uint8Array(await body.arrayBuffer()); + const headers: Record = {}; + if (opts.range) { + const end = opts.range.end == null ? "" : String(opts.range.end); + headers.range = `bytes=${opts.range.start}-${end}`; + } + const res = await this.request("GET", this.objectPath(key), { headers }); + if (res.status === 404) return null; + if (!res.ok && res.status !== 206) this.wrapStatus("GET", key, res); + return readResponseBytes(res); } catch (err) { - if (isMissingObjectError(err)) return null; this.wrapError("GET", key, err); } } async head(key: string): Promise<{ etag: string; size: number } | null> { try { - const file = this.file(key); - if (!(await file.exists())) return null; - const stat = await file.stat(); - return { etag: stripQuotes(stat.etag), size: stat.size }; + const res = await this.request("HEAD", this.objectPath(key)); + if (res.status === 404) return null; + if (!res.ok) this.wrapStatus("HEAD", key, res); + return { + etag: stripQuotes(res.headers.get("etag")), + size: Number(res.headers.get("content-length") ?? "0"), + }; } catch (err) { this.wrapError("HEAD", key, err); } @@ -116,9 +283,9 @@ export class R2ObjectStore implements ObjectStore { async delete(key: string): Promise { try { - const file = this.file(key); - if (!(await file.exists())) return; - await file.delete(); + const res = await this.request("DELETE", this.objectPath(key)); + if (res.status === 404) return; + if (!res.ok && res.status !== 204) this.wrapStatus("DELETE", key, res); } catch (err) { this.wrapError("DELETE", key, err); } @@ -127,14 +294,21 @@ export class R2ObjectStore implements ObjectStore { async list(prefix: string): Promise { try { const keys: string[] = []; - let continuationToken: string | undefined; + let continuationToken: string | null = null; for (;;) { - const res = await this.client.list({ prefix, continuationToken }); - for (const entry of res.contents ?? []) { - keys.push(entry.key); - } - if (!res.isTruncated || !res.nextContinuationToken) break; - continuationToken = res.nextContinuationToken; + const query: Array<[string, string]> = [ + ["list-type", "2"], + ["prefix", prefix], + ]; + if (continuationToken) query.push(["continuation-token", continuationToken]); + const res = await this.request("GET", `/${encodePathPart(this.bucket)}`, { query }); + if (!res.ok) this.wrapStatus("LIST", prefix, res); + const xml = await readResponseText(res); + keys.push(...allXmlTags(xml, "Key")); + const truncated = firstXmlTag(xml, "IsTruncated") === "true"; + if (!truncated) break; + continuationToken = firstXmlTag(xml, "NextContinuationToken"); + if (!continuationToken) break; } return keys; } catch (err) { diff --git a/src/reader.ts b/src/reader.ts index 387cfbf..2071c58 100644 --- a/src/reader.ts +++ b/src/reader.ts @@ -1,5 +1,5 @@ import type { Config } from "./config"; -import type { SqliteDurableStore, SegmentRow } from "./db/db"; +import type { SearchSegmentCompanionRow, SqliteDurableStore, SegmentRow } from "./db/db"; import type { ObjectStore } from "./objectstore/interface"; import { type CompiledReadFilter, @@ -25,10 +25,13 @@ import { loadSegmentBytesCached, loadSegmentSource, readRangeFromSource, type Se import { Bloom256 } from "./util/bloom256"; import { readU32BE } from "./util/endian"; import { type RetryOptions } from "./util/retry"; +import { retry } from "./util/retry"; import type { IndexCandidate, StreamIndexLookup } from "./index/indexer"; +import { segmentObjectKey, streamHash16Hex } from "./util/stream_paths"; import { dsError } from "./util/ds_error.ts"; import { Result } from "better-result"; import { filterDocIdsByColumnResult } from "./search/col_runtime"; +import { filterDocIdsByExactClausesResult } from "./search/exact_runtime"; import { type AggregateRequest, cloneAggMeasureState, @@ -45,6 +48,7 @@ import { type SearchFtsClause, type SearchRequest, type SearchSortSpec, + buildSearchDocumentResult, collectPositiveSearchColumnClauses, collectPositiveSearchExactClauses, collectPositiveSearchFtsClauses, @@ -54,7 +58,7 @@ import { import { filterDocIdsByFtsClausesResult } from "./search/fts_runtime"; import { canonicalizeColumnValue, canonicalizeExactValue } from "./search/schema"; import { encodeSortableBool, encodeSortableFloat64, encodeSortableInt64 } from "./search/column_encoding"; -import type { SearchRollupConfig } from "./schema/registry"; +import type { SchemaRegistry, SearchRollupConfig } from "./schema/registry"; import type { AggMeasureState } from "./search/agg_format"; import type { MetricsBlockSectionView } from "./profiles/metrics/block_format"; import { materializeMetricsBlockRecord } from "./profiles/metrics/normalize"; @@ -114,6 +118,12 @@ export type SearchResultBatch = { scannedTailDocs: number; scannedTailTimeMs: number; exactCandidateTimeMs: number; + candidateDocIds: number; + decodedRecords: number; + jsonParseTimeMs: number; + segmentPayloadBytesFetched: number; + sortTimeMs: number; + peakHitsHeld: number; indexFamiliesUsed: string[]; }; total: { @@ -166,6 +176,17 @@ export type ReaderError = const READ_FILTER_SCAN_LIMIT_BYTES = 100 * 1024 * 1024; type SegmentCandidateInfo = { segments: Set | null; indexedThrough: number }; type SearchFamilyCandidateInfo = { docIds: Set | null; usedFamilies: Set }; +type HotWalExactCache = { + startSeq: bigint; + endSeq: bigint; + schemaKey: string; + values: Map>; +}; +type SegmentRangeBlockReader = { + blocks: BlockIndexEntry[]; + readBlock: (block: BlockIndexEntry) => Promise>; + fetchedBytes: () => number; +}; type SearchHitInternal = { offsetSeq: bigint; offset: string; @@ -207,6 +228,7 @@ type PlannedReadSegments = { sealedEndSeq: bigint; }; type PlannedReadOrder = "asc" | "desc"; +type PrimaryTimestampTopKSort = Extract; function errorMessage(e: unknown): string { return String((e as any)?.message ?? e); @@ -283,6 +305,7 @@ export class StreamReader { private readonly index?: StreamIndexLookup; private readonly memorySampler?: RuntimeMemorySampler; private readonly memory?: MemoryPressureMonitor; + private readonly hotWalExact = new Map(); constructor( config: Config, @@ -370,6 +393,50 @@ export class StreamReader { return { segments: plannedSegments, sealedEndSeq }; } + private planAllSealedReadSegments( + stream: string, + startSeq: bigint, + sealedEndSeq: bigint, + order: PlannedReadOrder = "asc" + ): PlannedReadSegments | null { + if (startSeq > sealedEndSeq) return { segments: [], sealedEndSeq }; + const startSeg = this.db.findSegmentForOffset(stream, startSeq); + const endSeg = this.db.findSegmentForOffset(stream, sealedEndSeq); + if (!startSeg || !endSeg) return null; + const plannedSegments: SegmentRow[] = []; + if (order === "asc") { + for (let segmentIndex = startSeg.segment_index; segmentIndex <= endSeg.segment_index; segmentIndex++) { + const seg = this.db.getSegmentByIndex(stream, segmentIndex); + if (!seg) return null; + plannedSegments.push(seg); + } + } else { + for (let segmentIndex = endSeg.segment_index; segmentIndex >= startSeg.segment_index; segmentIndex--) { + const seg = this.db.getSegmentByIndex(stream, segmentIndex); + if (!seg) return null; + plannedSegments.push(seg); + } + } + return { segments: plannedSegments, sealedEndSeq }; + } + + private currentSearchCompanionRowsBySegment(stream: string, registry: SchemaRegistry): Map { + const desiredPlan = buildDesiredSearchCompanionPlan(registry); + const desiredHash = hashSearchCompanionPlan(desiredPlan); + const companionPlanRow = this.db.getSearchCompanionPlan(stream); + const desiredGeneration = + companionPlanRow == null + ? 1 + : companionPlanRow.plan_hash === desiredHash + ? companionPlanRow.generation + : companionPlanRow.generation + 1; + const rowsBySegment = new Map(); + for (const row of this.db.listSearchSegmentCompanions(stream)) { + if (row.plan_generation === desiredGeneration) rowsBySegment.set(row.segment_index, row); + } + return rowsBySegment; + } + cacheStats(): SegmentCacheStats | null { return this.diskCache ? this.diskCache.stats() : null; } @@ -1123,6 +1190,9 @@ export class StreamReader { const offsetSearchAfter = request.searchAfter && leadingSort?.kind === "offset" ? normalizeSearchAfterValue(leadingSort, request.searchAfter[0]) : null; const cursorFieldBound = resolveSearchCursorFieldBound(request); + const primaryTimestampTopKSort = resolvePrimaryTimestampTopKSort(registry, request); + const primaryTimestampRowsBySegment = + primaryTimestampTopKSort && request.size > 0 ? this.currentSearchCompanionRowsBySegment(stream, registry) : null; const hits: SearchHitInternal[] = []; let timedOut = false; @@ -1140,7 +1210,14 @@ export class StreamReader { let scannedSegmentTimeMs = 0; let scannedTailDocs = 0; let scannedTailTimeMs = 0; + let candidateDocIds = 0; + let decodedRecords = 0; + let jsonParseTimeMs = 0; + let segmentPayloadBytesFetched = 0; + let sortTimeMs = 0; + let peakHitsHeld = 0; const indexFamiliesUsed = new Set(); + const exactClauses = collectPositiveSearchExactClauses(request.q); const columnClauses = collectPositiveSearchColumnClauses(request.q); const ftsClauses = collectPositiveSearchFtsClauses(request.q); let exactCandidateInfo: SegmentCandidateInfo = { segments: null, indexedThrough: 0 }; @@ -1156,7 +1233,9 @@ export class StreamReader { offsetSeq: bigint, payload: Uint8Array ): Result => { + const parseStartedAt = Date.now(); const parsedRes = decodeJsonPayloadResult(this.registry, stream, offsetSeq, payload); + jsonParseTimeMs += Date.now() - parseStartedAt; if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message }); const evalRes = evaluateSearchQueryResult(registry, offsetSeq, request.q, parsedRes.value); if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message }); @@ -1167,7 +1246,7 @@ export class StreamReader { if (request.searchAfter && compareSearchAfterValues(sortInternal, request.sort, request.searchAfter) <= 0) { return Result.ok(undefined); } - hits.push({ + const hit: SearchHitInternal = { offsetSeq, offset: encodeOffset(srow.epoch, offsetSeq), score: evalRes.value.score, @@ -1175,10 +1254,32 @@ export class StreamReader { sortResponse: buildSearchSortResponseValues(request.sort, sortInternal, encodeOffset(srow.epoch, offsetSeq)), fields: fieldsRes.value, source: parsedRes.value, - }); + }; + hits.push(hit); + if (primaryTimestampTopKSort && request.size > 0 && hits.length > request.size) { + hits.splice(worstSearchHitIndex(hits, request.sort), 1); + } + if (hits.length > peakHitsHeld) peakHitsHeld = hits.length; return Result.ok(undefined); }; + const primaryTimestampTopKCutoff = (): bigint | null => { + if (!primaryTimestampTopKSort || hits.length < request.size) return null; + const worstHit = hits[worstSearchHitIndex(hits, request.sort)]; + const value = worstHit?.sortInternal[0]; + return typeof value === "bigint" ? value : null; + }; + + const primaryTimestampSegmentMayBeatTopK = (seg: SegmentRow): boolean => { + if (!primaryTimestampTopKSort || !primaryTimestampRowsBySegment) return true; + const cutoff = primaryTimestampTopKCutoff(); + if (cutoff == null) return true; + const row = primaryTimestampRowsBySegment.get(seg.segment_index); + if (row?.primary_timestamp_min_ms == null || row.primary_timestamp_max_ms == null) return true; + if (primaryTimestampTopKSort.direction === "desc") return row.primary_timestamp_max_ms >= cutoff; + return row.primary_timestamp_min_ms <= cutoff; + }; + const scanSegmentForSearchResult = async ( seg: SegmentRow, allowedDocIds: Set | null, @@ -1187,10 +1288,12 @@ export class StreamReader { ): Promise> => { if (markTimedOutIfNeeded()) return Result.ok(undefined); const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts()); + segmentPayloadBytesFetched += seg.size_bytes; if (markTimedOutIfNeeded()) return Result.ok(undefined); let curOffset = seg.start_offset; for (const blockRes of iterateBlocksResult(segBytes)) { if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message }); + decodedRecords += blockRes.value.decoded.recordCount; for (const record of blockRes.value.decoded.records) { if (curOffset > rangeEndSeq) return Result.ok(undefined); if (curOffset < rangeStartSeq) { @@ -1237,6 +1340,7 @@ export class StreamReader { const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult( stream, seg.segment_index, + exactClauses, columnClauses, ftsClauses, { @@ -1254,6 +1358,7 @@ export class StreamReader { if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message }); if (markTimedOutIfNeeded()) return Result.ok(undefined); const familyCandidates = familyCandidatesRes.value; + if (familyCandidates.docIds) candidateDocIds += familyCandidates.docIds.size; if (familyCandidates.docIds && familyCandidates.docIds.size === 0) { indexedSegments += familyCandidates.usedFamilies.size > 0 ? 1 : 0; for (const family of familyCandidates.usedFamilies) indexFamiliesUsed.add(family); @@ -1276,6 +1381,46 @@ export class StreamReader { }; const stopIfPageComplete = (): boolean => hits.length >= request.size; + const scanWalTailResult = ( + startSeq: bigint, + endSeq: bigint, + direction: "asc" | "desc", + stopOnPageComplete: boolean + ): Result => { + const tailStartedAt = Date.now(); + const hotOffsetsRes = this.hotWalExactOffsetsResult(stream, startSeq, endSeq, exactClauses, registry); + if (Result.isError(hotOffsetsRes)) return hotOffsetsRes; + const hotOffsets = hotOffsetsRes.value; + if (hotOffsets) { + candidateDocIds += hotOffsets.length; + const orderedOffsets = direction === "desc" ? [...hotOffsets].reverse() : hotOffsets; + for (const offsetSeq of orderedOffsets) { + const record = this.walRecordAt(stream, offsetSeq); + if (!record) continue; + scannedTailDocs += 1; + const matchRes = collectSearchMatchResult(record.offset, record.payload); + if (Result.isError(matchRes)) return matchRes; + if (markTimedOutIfNeeded()) break; + if (stopOnPageComplete && stopIfPageComplete()) break; + } + scannedTailTimeMs += Date.now() - tailStartedAt; + return Result.ok(undefined); + } + + const rows = + direction === "desc" + ? this.db.iterWalRangeDesc(stream, startSeq, endSeq) + : this.db.iterWalRange(stream, startSeq, endSeq); + for (const record of rows) { + scannedTailDocs += 1; + const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload); + if (Result.isError(matchRes)) return matchRes; + if (markTimedOutIfNeeded()) break; + if (stopOnPageComplete && stopIfPageComplete()) break; + } + scannedTailTimeMs += Date.now() - tailStartedAt; + return Result.ok(undefined); + }; if (leadingSort?.kind === "offset") { const descending = leadingSort.direction === "desc"; @@ -1290,17 +1435,10 @@ export class StreamReader { const walStart = rangeStartSeq > tailStart ? rangeStartSeq : tailStart; const walEnd = rangeEndSeq; if (walStart <= walEnd) { - const tailStartedAt = Date.now(); - for (const record of this.db.iterWalRangeDesc(stream, walStart, walEnd)) { - scannedTailDocs += 1; - const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload); - if (Result.isError(matchRes)) return matchRes; - if (markTimedOutIfNeeded()) break; - if (stopIfPageComplete()) break; + const tailRes = scanWalTailResult(walStart, walEnd, "desc", true); + if (Result.isError(tailRes)) return tailRes; } - scannedTailTimeMs += Date.now() - tailStartedAt; } - } if (!timedOut && !stopIfPageComplete()) { const sealedEnd = rangeEndSeq < visibleSealedThrough ? rangeEndSeq : visibleSealedThrough; if (sealedEnd >= rangeStartSeq) { @@ -1319,6 +1457,7 @@ export class StreamReader { seg, exactCandidateInfo, cursorFieldBound, + exactClauses, columnClauses, ftsClauses, rangeStartSeq, @@ -1353,6 +1492,15 @@ export class StreamReader { addScannedSegmentTimeMs: (deltaMs) => { scannedSegmentTimeMs += deltaMs; }, + addCandidateDocIds: (count) => { + candidateDocIds += count; + }, + addDecodedRecords: (count) => { + decodedRecords += count; + }, + addSegmentPayloadBytesFetched: (count) => { + segmentPayloadBytesFetched += count; + }, } ); if (Result.isError(scanRes)) return scanRes; @@ -1377,6 +1525,7 @@ export class StreamReader { seg, exactCandidateInfo, cursorFieldBound, + exactClauses, columnClauses, ftsClauses, rangeStartSeq, @@ -1411,6 +1560,15 @@ export class StreamReader { addScannedSegmentTimeMs: (deltaMs) => { scannedSegmentTimeMs += deltaMs; }, + addCandidateDocIds: (count) => { + candidateDocIds += count; + }, + addDecodedRecords: (count) => { + decodedRecords += count; + }, + addSegmentPayloadBytesFetched: (count) => { + segmentPayloadBytesFetched += count; + }, } ); if (Result.isError(scanRes)) return scanRes; @@ -1450,15 +1608,8 @@ export class StreamReader { } } if (!timedOut && !stopIfPageComplete() && coverageState.canSearchWalTail && seq <= rangeEndSeq) { - const tailStartedAt = Date.now(); - for (const record of this.db.iterWalRange(stream, seq, rangeEndSeq)) { - scannedTailDocs += 1; - const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload); - if (Result.isError(matchRes)) return matchRes; - if (markTimedOutIfNeeded()) break; - if (stopIfPageComplete()) break; - } - scannedTailTimeMs += Date.now() - tailStartedAt; + const tailRes = scanWalTailResult(seq, rangeEndSeq, "asc", true); + if (Result.isError(tailRes)) return tailRes; } } } @@ -1493,6 +1644,12 @@ export class StreamReader { scannedTailDocs, scannedTailTimeMs, exactCandidateTimeMs, + candidateDocIds, + decodedRecords, + jsonParseTimeMs, + segmentPayloadBytesFetched, + sortTimeMs, + peakHitsHeld, indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(), }, total: { @@ -1520,18 +1677,27 @@ export class StreamReader { exactCandidateInfo.indexedThrough, "asc" ); - if (plannedSealedSegments) { - for (const seg of plannedSealedSegments.segments) { + const allSealedSegments = + primaryTimestampTopKSort && !plannedSealedSegments ? this.planAllSealedReadSegments(stream, 0n, sealedEnd, "asc") : null; + const sealedSegmentPlan = plannedSealedSegments ?? allSealedSegments; + if (sealedSegmentPlan) { + const sealedSegments = + primaryTimestampTopKSort && primaryTimestampRowsBySegment + ? orderSegmentsByPrimaryTimestampBounds(sealedSegmentPlan.segments, primaryTimestampRowsBySegment, primaryTimestampTopKSort.direction) + : sealedSegmentPlan.segments; + for (const seg of sealedSegments) { + if (!primaryTimestampSegmentMayBeatTopK(seg)) break; const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq); if (Result.isError(scanRes)) return scanRes; - seq = seg.end_offset + 1n; + if (seg.end_offset >= seq) seq = seg.end_offset + 1n; if (timedOut) break; } - if (seq <= plannedSealedSegments.sealedEndSeq) seq = plannedSealedSegments.sealedEndSeq + 1n; + if (seq <= sealedSegmentPlan.sealedEndSeq) seq = sealedSegmentPlan.sealedEndSeq + 1n; } else { while (seq <= visibleSnapshotEndSeq && seq <= visibleSealedThrough) { const seg = this.db.findSegmentForOffset(stream, seq); if (!seg) break; + if (!primaryTimestampSegmentMayBeatTopK(seg)) break; const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq); if (Result.isError(scanRes)) return scanRes; seq = seg.end_offset + 1n; @@ -1540,17 +1706,13 @@ export class StreamReader { } if (!timedOut && coverageState.canSearchWalTail && seq <= snapshotEndSeq) { - const tailStartedAt = Date.now(); - for (const record of this.db.iterWalRange(stream, seq, snapshotEndSeq)) { - scannedTailDocs += 1; - const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload); - if (Result.isError(matchRes)) return matchRes; - if (markTimedOutIfNeeded()) break; - } - scannedTailTimeMs += Date.now() - tailStartedAt; + const tailRes = scanWalTailResult(seq, snapshotEndSeq, "asc", false); + if (Result.isError(tailRes)) return tailRes; } + const sortStartedAt = Date.now(); hits.sort((left, right) => compareSearchHits(left, right, request.sort)); + sortTimeMs += Date.now() - sortStartedAt; const pageHits = hits.slice(0, request.size); const nextSearchAfter = pageHits.length === request.size ? pageHits[pageHits.length - 1].sortResponse : null; const exactTotalKnown = !timedOut && coverageState.complete && nextSearchAfter == null; @@ -1582,6 +1744,12 @@ export class StreamReader { scannedTailDocs, scannedTailTimeMs, exactCandidateTimeMs, + candidateDocIds, + decodedRecords, + jsonParseTimeMs, + segmentPayloadBytesFetched, + sortTimeMs, + peakHitsHeld, indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(), }, total: { @@ -1903,11 +2071,53 @@ export class StreamReader { return res.value; } + private async loadSegmentRangeBlockReaderResult(seg: SegmentRow): Promise> { + const objectKey = segmentObjectKey(streamHash16Hex(seg.stream), seg.segment_index); + let fetchedBytes = 0; + const readRange = async (start: number, end: number): Promise> => { + const bytes = await retry( + async () => { + const res = await this.os.get(objectKey, { range: { start, end } }); + if (!res) throw dsError(`object store missing segment: ${objectKey}`); + return res; + }, + this.retryOpts() + ); + fetchedBytes += bytes.byteLength; + return Result.ok(bytes); + }; + + if (seg.size_bytes < 8) return Result.ok(null); + const tailRes = await readRange(seg.size_bytes - 8, seg.size_bytes - 1); + if (Result.isError(tailRes)) return tailRes; + const tail = tailRes.value; + if (tail.byteLength < 8) return Result.ok(null); + const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]); + if (magic !== "DSF1") return Result.ok(null); + const footerLen = readU32BE(tail, 0); + const footerStart = seg.size_bytes - 8 - footerLen; + if (footerStart < 0) return Result.ok(null); + const footerRes = await readRange(footerStart, footerStart + footerLen - 1); + if (Result.isError(footerRes)) return footerRes; + const footer = parseFooterBytes(footerRes.value); + if (!footer?.blocks) return Result.ok(null); + + return Result.ok({ + blocks: footer.blocks, + readBlock: async (block) => { + const totalLen = DSB3_HEADER_BYTES + block.compressedLen; + return readRange(block.blockOffset, block.blockOffset + totalLen - 1); + }, + fetchedBytes: () => fetchedBytes, + }); + } + private async scanSegmentReverseForSearchResult( stream: string, seg: SegmentRow, exactCandidateInfo: SegmentCandidateInfo, cursorFieldBound: SearchCursorFieldBound | null, + exactClauses: SearchExactClause[], columnClauses: SearchColumnClause[], ftsClauses: SearchFtsClause[], rangeStartSeq: bigint, @@ -1926,6 +2136,9 @@ export class StreamReader { addFtsDecodeMs: (deltaMs: number) => void; addFtsClauseEstimateMs: (deltaMs: number) => void; addScannedSegmentTimeMs: (deltaMs: number) => void; + addCandidateDocIds: (count: number) => void; + addDecodedRecords: (count: number) => void; + addSegmentPayloadBytesFetched: (count: number) => void; } ): Promise> { const segmentStartedAt = Date.now(); @@ -1956,6 +2169,7 @@ export class StreamReader { const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult( stream, seg.segment_index, + exactClauses, columnClauses, ftsClauses, { @@ -1967,6 +2181,7 @@ export class StreamReader { if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message }); if (markTimedOutIfNeeded()) return Result.ok(undefined); const familyCandidates = familyCandidatesRes.value; + if (familyCandidates.docIds) state.addCandidateDocIds(familyCandidates.docIds.size); if (familyCandidates.docIds && familyCandidates.docIds.size === 0) { if (familyCandidates.usedFamilies.size > 0) state.addIndexedSegment(); for (const family of familyCandidates.usedFamilies) state.indexFamiliesUsed.add(family); @@ -1981,16 +2196,122 @@ export class StreamReader { state.addScannedSegment(); } + const addSegmentTime = (): void => { + if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt); + else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt); + }; + const scanCandidateDocIdsWithBlocksResult = async ( + blocks: BlockIndexEntry[], + readBlock: (block: BlockIndexEntry) => Promise> + ): Promise> => { + const candidateDocIds = Array.from(familyCandidates.docIds!) + .filter((docId) => { + const offsetSeq = seg.start_offset + BigInt(docId); + return offsetSeq >= rangeStartSeq && offsetSeq <= rangeEndSeq; + }) + .sort((left, right) => right - left); + let currentBlockIndex = -1; + let currentBlockStartOffset = 0n; + let currentRecords: Array<{ payload: Uint8Array }> = []; + for (const docId of candidateDocIds) { + const offsetSeq = seg.start_offset + BigInt(docId); + const blockIndex = findFirstRelevantBlockIndex(blocks, offsetSeq); + const block = blocks[blockIndex]!; + const blockStartOffset = block.firstOffset; + const blockEndOffset = blockStartOffset + BigInt(block.recordCount) - 1n; + if (offsetSeq < blockStartOffset || offsetSeq > blockEndOffset) continue; + if (blockIndex !== currentBlockIndex) { + const blockBytesRes = await readBlock(block); + if (Result.isError(blockBytesRes)) return blockBytesRes; + const decodedRes = decodeBlockResult(blockBytesRes.value); + if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message }); + currentBlockIndex = blockIndex; + currentBlockStartOffset = blockStartOffset; + currentRecords = decodedRes.value.records; + state.addDecodedRecords(decodedRes.value.recordCount); + } + const recordIndex = Number(offsetSeq - currentBlockStartOffset); + const record = currentRecords[recordIndex]; + if (!record) continue; + const matchRes = state.collectSearchMatchResult(offsetSeq, record.payload); + if (Result.isError(matchRes)) return matchRes; + if (markTimedOutIfNeeded()) return Result.ok(undefined); + if (state.stopIfPageComplete()) return Result.ok(undefined); + } + return Result.ok(undefined); + }; + if (markTimedOutIfNeeded()) return Result.ok(undefined); - const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts()); + if (familyCandidates.docIds) { + const rangeReaderRes = await this.loadSegmentRangeBlockReaderResult(seg); + if (Result.isError(rangeReaderRes)) return rangeReaderRes; + if (rangeReaderRes.value) { + const rangeReader = rangeReaderRes.value; + const scanRes = await scanCandidateDocIdsWithBlocksResult(rangeReader.blocks, rangeReader.readBlock); + state.addSegmentPayloadBytesFetched(rangeReader.fetchedBytes()); + addSegmentTime(); + return scanRes; + } + } + + const source = await loadSegmentSource(this.os, seg, this.diskCache, this.retryOpts()); + state.addSegmentPayloadBytesFetched(seg.size_bytes); if (markTimedOutIfNeeded()) return Result.ok(undefined); + const footerBlocks = loadSegmentFooterBlocksFromSource(seg, source); + if (footerBlocks) { + if (familyCandidates.docIds) { + const scanRes = await scanCandidateDocIdsWithBlocksResult(footerBlocks, async (block) => { + const totalLen = DSB3_HEADER_BYTES + block.compressedLen; + return Result.ok(readRangeFromSource(source, block.blockOffset, block.blockOffset + totalLen - 1)); + }); + addSegmentTime(); + return scanRes; + } + + for (let blockIndex = findFirstRelevantBlockIndex(footerBlocks, rangeEndSeq); blockIndex >= 0; blockIndex--) { + const block = footerBlocks[blockIndex]!; + const blockStartOffset = block.firstOffset; + const blockEndOffset = blockStartOffset + BigInt(block.recordCount) - 1n; + if (blockStartOffset > rangeEndSeq) continue; + if (blockEndOffset < rangeStartSeq) break; + + const totalLen = DSB3_HEADER_BYTES + block.compressedLen; + const blockBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + totalLen - 1); + const decodedRes = decodeBlockResult(blockBytes); + if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message }); + const decoded = decodedRes.value; + state.addDecodedRecords(decoded.recordCount); + for (let recordIndex = decoded.records.length - 1; recordIndex >= 0; recordIndex--) { + const offsetSeq = blockStartOffset + BigInt(recordIndex); + if (offsetSeq > rangeEndSeq) continue; + if (offsetSeq < rangeStartSeq) { + addSegmentTime(); + return Result.ok(undefined); + } + const matchRes = state.collectSearchMatchResult(offsetSeq, decoded.records[recordIndex]!.payload); + if (Result.isError(matchRes)) return matchRes; + if (markTimedOutIfNeeded()) { + addSegmentTime(); + return Result.ok(undefined); + } + if (state.stopIfPageComplete()) { + addSegmentTime(); + return Result.ok(undefined); + } + } + } + + addSegmentTime(); + return Result.ok(undefined); + } + const decodedBlocks: Array<{ records: Array<{ payload: Uint8Array }> }> = []; - for (const blockRes of iterateBlocksResult(segBytes)) { + for (const blockRes of iterateBlocksResult(source.bytes)) { if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message }); decodedBlocks.push({ records: blockRes.value.decoded.records }); + state.addDecodedRecords(blockRes.value.decoded.recordCount); if (markTimedOutIfNeeded()) { - if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt); - else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt); + addSegmentTime(); return Result.ok(undefined); } } @@ -2003,8 +2324,7 @@ export class StreamReader { const offsetSeq = blockStartOffset + BigInt(recordIndex); if (offsetSeq > rangeEndSeq) continue; if (offsetSeq < rangeStartSeq) { - if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt); - else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt); + addSegmentTime(); return Result.ok(undefined); } const localDocId = Number(offsetSeq - seg.start_offset); @@ -2013,24 +2333,94 @@ export class StreamReader { if (Result.isError(matchRes)) return matchRes; } if (markTimedOutIfNeeded()) { - if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt); - else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt); + addSegmentTime(); return Result.ok(undefined); } if (state.stopIfPageComplete()) { - if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt); - else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt); + addSegmentTime(); return Result.ok(undefined); } } blockEndOffset = blockStartOffset - 1n; } - if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt); - else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt); + addSegmentTime(); return Result.ok(undefined); } + private searchSchemaKey(registry: SchemaRegistry): string { + return `${registry.currentVersion}:${JSON.stringify(registry.search ?? null)}`; + } + + private buildHotWalExactCacheResult( + stream: string, + startSeq: bigint, + endSeq: bigint, + registry: SchemaRegistry + ): Result { + const schemaKey = this.searchSchemaKey(registry); + const cached = this.hotWalExact.get(stream); + if (cached && cached.startSeq === startSeq && cached.endSeq === endSeq && cached.schemaKey === schemaKey) { + return Result.ok(cached); + } + + const values = new Map>(); + if (startSeq <= endSeq) { + for (const record of this.db.iterWalRange(stream, startSeq, endSeq)) { + const offsetSeq = BigInt(record.offset); + const parsedRes = decodeJsonPayloadResult(this.registry, stream, offsetSeq, record.payload); + if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message }); + const docRes = buildSearchDocumentResult(registry, offsetSeq, parsedRes.value); + if (Result.isError(docRes)) return Result.err({ kind: "internal", message: docRes.error.message }); + for (const [field, fieldValues] of docRes.value.exactValues) { + let byValue = values.get(field); + if (!byValue) { + byValue = new Map(); + values.set(field, byValue); + } + for (const value of fieldValues) { + let offsets = byValue.get(value); + if (!offsets) { + offsets = []; + byValue.set(value, offsets); + } + offsets.push(offsetSeq); + } + } + } + } + + const next: HotWalExactCache = { startSeq, endSeq, schemaKey, values }; + this.hotWalExact.set(stream, next); + return Result.ok(next); + } + + private hotWalExactOffsetsResult( + stream: string, + startSeq: bigint, + endSeq: bigint, + clauses: SearchExactClause[], + registry: SchemaRegistry + ): Result { + if (clauses.length === 0 || startSeq > endSeq) return Result.ok(null); + const cacheRes = this.buildHotWalExactCacheResult(stream, startSeq, endSeq, registry); + if (Result.isError(cacheRes)) return cacheRes; + + const postings = clauses.map((clause) => cacheRes.value.values.get(clause.field)?.get(clause.canonicalValue) ?? []); + if (postings.some((offsets) => offsets.length === 0)) return Result.ok([]); + postings.sort((left, right) => left.length - right.length); + const [smallest, ...rest] = postings; + const restSets = rest.map((offsets) => new Set(offsets)); + return Result.ok(smallest!.filter((offset) => restSets.every((set) => set.has(offset)))); + } + + private walRecordAt(stream: string, offsetSeq: bigint): { offset: bigint; payload: Uint8Array } | null { + for (const record of this.db.iterWalRange(stream, offsetSeq, offsetSeq)) { + return { offset: BigInt(record.offset), payload: record.payload }; + } + return null; + } + private async segmentMayOverlapSearchCursor( stream: string, segmentIndex: number, @@ -2253,6 +2643,7 @@ export class StreamReader { private async resolveSearchFamilyCandidatesResult( stream: string, segmentIndex: number, + exactClauses: SearchExactClause[], columnClauses: SearchColumnClause[], ftsClauses: SearchFtsClause[], stats?: { @@ -2264,11 +2655,26 @@ export class StreamReader { let intersection: Set | null = null; const usedFamilies = new Set(); + if (exactClauses.length > 0) { + const exactCompanion = await this.index?.getExactSegmentCompanion(stream, segmentIndex); + if (exactCompanion) { + const exactRes = filterDocIdsByExactClausesResult({ companion: exactCompanion, clauses: exactClauses }); + if (Result.isError(exactRes)) return exactRes; + intersection = exactRes.value; + usedFamilies.add("exact"); + } + } + if (columnClauses.length > 0) { const columnRes = await this.resolveSearchColumnCandidateDocIdsResult(stream, segmentIndex, columnClauses); if (Result.isError(columnRes)) return columnRes; if (columnRes.value) { - intersection = columnRes.value; + if (intersection == null) intersection = columnRes.value; + else { + for (const docId of Array.from(intersection)) { + if (!columnRes.value.has(docId)) intersection.delete(docId); + } + } usedFamilies.add("col"); } } @@ -2439,3 +2845,48 @@ function normalizeSearchAfterValue(sort: SearchSortSpec, raw: unknown): bigint | function compareSearchAfter(hit: SearchHitInternal, sorts: SearchSortSpec[], searchAfter: unknown[]): number { return compareSearchAfterValues(hit.sortInternal, sorts, searchAfter); } + +function resolvePrimaryTimestampTopKSort(registry: SchemaRegistry, request: SearchRequest): PrimaryTimestampTopKSort | null { + const leadingSort = request.sort[0]; + if (!leadingSort || leadingSort.kind !== "field") return null; + if (registry.search?.primaryTimestampField !== leadingSort.field) return null; + if (leadingSort.config.kind !== "date") return null; + return leadingSort; +} + +function worstSearchHitIndex(hits: SearchHitInternal[], sorts: SearchSortSpec[]): number { + let worstIndex = 0; + for (let index = 1; index < hits.length; index++) { + if (compareSearchHits(hits[index]!, hits[worstIndex]!, sorts) > 0) worstIndex = index; + } + return worstIndex; +} + +function orderSegmentsByPrimaryTimestampBounds( + segments: SegmentRow[], + rowsBySegment: Map, + direction: "asc" | "desc" +): SegmentRow[] { + const unknown: SegmentRow[] = []; + const known: SegmentRow[] = []; + for (const seg of segments) { + const row = rowsBySegment.get(seg.segment_index); + if (row?.primary_timestamp_min_ms == null || row.primary_timestamp_max_ms == null) unknown.push(seg); + else known.push(seg); + } + known.sort((left, right) => { + const leftRow = rowsBySegment.get(left.segment_index)!; + const rightRow = rowsBySegment.get(right.segment_index)!; + if (direction === "desc") { + if (leftRow.primary_timestamp_max_ms !== rightRow.primary_timestamp_max_ms) { + return leftRow.primary_timestamp_max_ms! > rightRow.primary_timestamp_max_ms! ? -1 : 1; + } + return right.segment_index - left.segment_index; + } + if (leftRow.primary_timestamp_min_ms !== rightRow.primary_timestamp_min_ms) { + return leftRow.primary_timestamp_min_ms! < rightRow.primary_timestamp_min_ms! ? -1 : 1; + } + return left.segment_index - right.segment_index; + }); + return [...unknown, ...known]; +} diff --git a/src/runtime_memory_sampler.ts b/src/runtime_memory_sampler.ts index f6341ed..f77dc41 100644 --- a/src/runtime_memory_sampler.ts +++ b/src/runtime_memory_sampler.ts @@ -1,6 +1,7 @@ import { mkdirSync, createWriteStream, type WriteStream } from "node:fs"; import { dirname, extname } from "node:path"; import { isMainThread, threadId } from "node:worker_threads"; +import { readLinuxStatusRssBreakdown } from "./runtime_memory"; type BunJscModule = { heapStats?: () => unknown; @@ -174,6 +175,7 @@ export class RuntimeMemorySampler { reason, data, process_memory_usage: process.memoryUsage(), + linux_status_rss: readLinuxStatusRssBreakdown(0), jsc_heap_stats: await this.readJscHeapStats(), jsc_memory_usage: await this.readJscMemoryUsage(), memory_subsystems: this.readSubsystems(), diff --git a/src/search/companion_format.ts b/src/search/companion_format.ts index 648eb58..a954a21 100644 --- a/src/search/companion_format.ts +++ b/src/search/companion_format.ts @@ -1,6 +1,7 @@ import { Result } from "better-result"; import { decodeAggSegmentCompanionResult, encodeAggSegmentCompanion, type AggSectionInput, type AggSectionView } from "./agg_format"; import { decodeColSegmentCompanionResult, encodeColSegmentCompanion, type ColSectionInput, type ColSectionView } from "./col_format"; +import { decodeExactSegmentCompanionResult, encodeExactSegmentCompanion, type ExactSectionInput, type ExactSectionView } from "./exact_format"; import { decodeFtsSegmentCompanionResult, encodeFtsSegmentCompanion, type FtsSectionInput, type FtsSectionView } from "./fts_format"; import { decodeMetricsBlockSegmentCompanionResult, @@ -16,26 +17,29 @@ const MAGIC = new TextEncoder().encode("PSCIX2"); const MAJOR_VERSION = 2; const HEADER_BYTES = 58; const SECTION_ENTRY_BYTES = 28; -export const PSCIX2_MAX_SECTION_COUNT = 4; +export const PSCIX2_MAX_SECTION_COUNT = 5; export const PSCIX2_MAX_TOC_BYTES = HEADER_BYTES + SECTION_ENTRY_BYTES * PSCIX2_MAX_SECTION_COUNT; const SECTION_KIND_CODE = { - col: 1, - fts: 2, - agg: 3, - mblk: 4, + exact: 1, + col: 2, + fts: 3, + agg: 4, + mblk: 5, } as const; const CODE_SECTION_KIND = { - 1: "col", - 2: "fts", - 3: "agg", - 4: "mblk", + 1: "exact", + 2: "col", + 3: "fts", + 4: "agg", + 5: "mblk", } as const; -export type CompanionSectionKind = "col" | "fts" | "agg" | "mblk"; +export type CompanionSectionKind = "exact" | "col" | "fts" | "agg" | "mblk"; export type CompanionSectionInputMap = { + exact?: ExactSectionInput; col?: ColSectionInput; fts?: FtsSectionInput; agg?: AggSectionInput; @@ -43,6 +47,7 @@ export type CompanionSectionInputMap = { }; export type CompanionSectionMap = { + exact?: ExactSectionView; col?: ColSectionView; fts?: FtsSectionView; agg?: AggSectionView; @@ -98,6 +103,10 @@ function encodeSectionPayload( section: CompanionSectionInputMap[CompanionSectionKind], plan: SearchCompanionPlan ): EncodedCompanionSectionPayload { + if (kind === "exact") { + const payload = encodeExactSegmentCompanion(section as ExactSectionInput, plan); + return { kind, version: 2, compression: 0, flags: 0, dirLength: 8, logicalLength: payload.byteLength, payload }; + } if (kind === "col") { const payload = encodeColSegmentCompanion(section as ColSectionInput, plan); return { kind, version: 2, compression: 0, flags: 0, dirLength: 8, logicalLength: payload.byteLength, payload }; @@ -127,6 +136,11 @@ function decodeSectionResult( bytes: Uint8Array, plan: SearchCompanionPlan ): Result { + if (kind === "exact") { + const decoded = decodeExactSegmentCompanionResult(bytes, plan); + if (Result.isError(decoded)) return invalidCompanion(decoded.error.message); + return Result.ok(decoded.value as CompanionSectionMap[CompanionSectionKind]); + } if (kind === "col") { const decoded = decodeColSegmentCompanionResult(bytes, plan); if (Result.isError(decoded)) return invalidCompanion(decoded.error.message); @@ -155,7 +169,7 @@ export function encodeBundledSegmentCompanion(companion: { sections: CompanionSectionInputMap; }): Uint8Array { const sectionPayloads: EncodedCompanionSectionPayload[] = []; - for (const kind of ["col", "fts", "agg", "mblk"] as CompanionSectionKind[]) { + for (const kind of ["exact", "col", "fts", "agg", "mblk"] as CompanionSectionKind[]) { const section = companion.sections[kind]; if (!section) continue; sectionPayloads.push(encodeSectionPayload(kind, section, companion.plan)); diff --git a/src/search/companion_manager.ts b/src/search/companion_manager.ts index 857bf7c..956717d 100644 --- a/src/search/companion_manager.ts +++ b/src/search/companion_manager.ts @@ -17,6 +17,7 @@ import { dsError } from "../util/ds_error.ts"; import { RuntimeMemorySampler } from "../runtime_memory_sampler"; import { ConcurrencyGate } from "../concurrency_gate"; import type { ForegroundActivityTracker } from "../foreground_activity"; +import { LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS, shouldDeferEnqueuedIndexWork, shouldWaitForLowMemoryIndexQuiet } from "../index/schedule"; import { retry } from "../util/retry"; import { yieldToEventLoop } from "../util/yield"; import { searchCompanionObjectKey, streamHash16Hex } from "../util/stream_paths"; @@ -39,10 +40,12 @@ import { CompanionFileCache } from "./companion_file_cache"; import type { ColFieldInput, ColScalar, ColSectionInput, ColSectionView } from "./col_format"; import { analyzeTextValue, + canonicalizeExactValue, canonicalizeColumnValue, extractRawSearchValuesForFieldsResult, normalizeKeywordValue, } from "./schema"; +import type { ExactFieldInput, ExactSectionInput, ExactSectionView } from "./exact_format"; import type { FtsFieldInput, FtsSectionInput, FtsSectionView, FtsTermInput } from "./fts_format"; import { buildMetricsBlockRecord } from "../profiles/metrics/normalize"; import type { MetricsBlockSectionInput, MetricsBlockSectionView } from "../profiles/metrics/block_format"; @@ -76,6 +79,11 @@ type FtsFieldBuilder = { companion: FtsFieldInput; }; +type ExactFieldBuilder = { + config: SearchFieldConfig; + companion: ExactFieldInput; +}; + type GroupBuilder = { key: string; measures: Record; @@ -99,6 +107,9 @@ type CompanionBuildProgress = { docCount: number; colFields: number; colValues: number; + exactFields: number; + exactTerms: number; + exactPostings: number; ftsFields: number; ftsTerms: number; ftsPostings: number; @@ -166,24 +177,47 @@ function parseSectionKinds(row: SearchSegmentCompanionRow): Set value === "col" || value === "fts" || value === "agg" || value === "mblk") + parsed.filter( + (value): value is CompanionSectionKind => value === "exact" || value === "col" || value === "fts" || value === "agg" || value === "mblk" + ) ); } catch { return new Set(); } } +function parseSectionSizes(row: SearchSegmentCompanionRow): Record { + try { + const parsed = JSON.parse(row.section_sizes_json); + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return {}; + const out: Record = {}; + for (const [kind, size] of Object.entries(parsed)) { + if (typeof size === "number" && Number.isFinite(size) && size > 0) out[kind] = size; + } + return out; + } catch { + return {}; + } +} + export class SearchCompanionManager { private readonly queue = new Set(); private readonly building = new Set(); private readonly fileCache: CompanionFileCache; + private readonly decodedSectionCache = new Map< + string, + { bytes: number; companion: CompanionSectionMap[CompanionSectionKind] } + >(); + private decodedSectionCacheBytes = 0; private readonly segmentCache?: SegmentDiskCache; private readonly yieldBlocks: number; private readonly memorySampler?: RuntimeMemorySampler; private readonly asyncGate: ConcurrencyGate; private readonly foregroundActivity?: ForegroundActivityTracker; private timer: any | null = null; + private wakeTimer: any | null = null; private running = false; + private firstQueuedAtMs: number | null = null; constructor( private readonly cfg: Config, @@ -228,18 +262,53 @@ export class SearchCompanionManager { stop(): void { if (this.timer) clearInterval(this.timer); + if (this.wakeTimer) clearTimeout(this.wakeTimer); this.timer = null; + this.wakeTimer = null; this.fileCache.clearMapped(); } enqueue(stream: string): void { + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); this.queue.add(stream); + if (shouldDeferEnqueuedIndexWork(this.cfg)) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + this.scheduleTick(); + } + + private scheduleTick(delayMs = 0): void { + if (!this.timer || this.wakeTimer) return; + this.wakeTimer = setTimeout(() => { + this.wakeTimer = null; + if ( + shouldWaitForLowMemoryIndexQuiet( + this.cfg, + this.firstQueuedAtMs, + this.foregroundActivity?.wasActiveWithin(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS) ?? false + ) + ) { + this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS); + return; + } + if (this.running) { + this.scheduleTick(250); + return; + } + void this.tick(); + }, delayMs); + (this.wakeTimer as { unref?: () => void }).unref?.(); } async getColSegmentCompanion(stream: string, segmentIndex: number): Promise { return (await this.getSectionCompanion(stream, segmentIndex, "col")) ?? null; } + async getExactSegmentCompanion(stream: string, segmentIndex: number): Promise { + return (await this.getSectionCompanion(stream, segmentIndex, "exact")) ?? null; + } + async getFtsSegmentCompanion(stream: string, segmentIndex: number): Promise { return (await this.getFtsSegmentCompanionWithStats(stream, segmentIndex)).companion; } @@ -303,6 +372,9 @@ export class SearchCompanionManager { const row = this.db.getSearchSegmentCompanion(stream, segmentIndex); if (!row || row.plan_generation !== planRow.generation) return { companion: null, stats: { sectionGetMs, decodeMs } }; if (!parseSectionKinds(row).has(kind)) return { companion: null, stats: { sectionGetMs, decodeMs } }; + const cacheKey = this.decodedSectionCacheKey(row, kind); + const cached = this.getDecodedSectionCache(cacheKey); + if (cached) return { companion: cached as CompanionSectionMap[K], stats: { sectionGetMs, decodeMs } }; const sectionStartedAt = Date.now(); const bundle = await this.loadBundleResult(row); if (Result.isError(bundle)) throw dsError(bundle.error.message); @@ -315,12 +387,50 @@ export class SearchCompanionManager { const decoded = decodeCompanionSectionPayloadResult(kind, sectionBytes.value, plan.value); if (Result.isError(decoded)) throw dsError(decoded.error.message); decodeMs = Date.now() - decodeStartedAt; + this.setDecodedSectionCache(cacheKey, decoded.value ?? null, parseSectionSizes(row)[kind] ?? sectionBytes.value.byteLength); return { companion: decoded.value ?? null, stats: { sectionGetMs, decodeMs } }; } finally { leave?.(); } } + private decodedSectionCacheKey(row: SearchSegmentCompanionRow, kind: CompanionSectionKind): string { + return `${row.object_key}:${row.plan_generation}:${kind}`; + } + + private getDecodedSectionCache(key: string): CompanionSectionMap[CompanionSectionKind] | null { + const entry = this.decodedSectionCache.get(key); + if (!entry) return null; + this.decodedSectionCache.delete(key); + this.decodedSectionCache.set(key, entry); + return entry.companion; + } + + private setDecodedSectionCache( + key: string, + companion: CompanionSectionMap[CompanionSectionKind] | null, + bytes: number + ): void { + const budget = Math.max(0, this.cfg.searchCompanionSectionCacheBytes); + if (budget <= 0 || companion == null) return; + const safeBytes = Math.max(1, Math.ceil(bytes)); + if (safeBytes > budget) return; + const existing = this.decodedSectionCache.get(key); + if (existing) { + this.decodedSectionCacheBytes -= existing.bytes; + this.decodedSectionCache.delete(key); + } + this.decodedSectionCache.set(key, { bytes: safeBytes, companion }); + this.decodedSectionCacheBytes += safeBytes; + while (this.decodedSectionCacheBytes > budget) { + const oldestKey = this.decodedSectionCache.keys().next().value; + if (oldestKey == null) break; + const oldest = this.decodedSectionCache.get(oldestKey); + this.decodedSectionCache.delete(oldestKey); + this.decodedSectionCacheBytes -= oldest?.bytes ?? 0; + } + } + private getCurrentPlanRow(stream: string): SearchCompanionPlanRow | null { const regRes = this.registry.getRegistryResult(stream); if (Result.isError(regRes)) return null; @@ -412,6 +522,12 @@ export class SearchCompanionManager { } } finally { this.running = false; + if (this.queue.size > 0) { + if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now(); + this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0); + } else { + this.firstQueuedAtMs = null; + } } } @@ -618,6 +734,7 @@ export class SearchCompanionManager { leaveLoad?.(); if (Result.isError(bytesRes)) return bytesRes; const segmentBytes = bytesRes.value; + const exactBuilders = plan.families.exact ? this.createExactBuilders(registry) : new Map(); const colBuilders = plan.families.col ? this.createColBuilders(registry) : new Map(); const ftsBuilders = plan.families.fts ? this.createFtsBuilders(registry) : new Map(); const aggBuildersRes = plan.families.agg ? this.createAggRollupBuildersResult(registry) : Result.ok(new Map()); @@ -627,6 +744,7 @@ export class SearchCompanionManager { ? { records: [], minWindowStartMs: undefined, maxWindowEndMs: undefined } : null; const requiredFieldNames = new Set(); + for (const fieldName of exactBuilders.keys()) requiredFieldNames.add(fieldName); for (const fieldName of colBuilders.keys()) requiredFieldNames.add(fieldName); for (const fieldName of ftsBuilders.keys()) requiredFieldNames.add(fieldName); for (const builder of aggBuilders.values()) { @@ -647,6 +765,9 @@ export class SearchCompanionManager { rawSearchValues = rawValuesRes.value; } if (rawSearchValues) { + const leaveExact = this.memorySampler?.enter("companion_record_exact", { doc_count: docCount }); + this.recordExactBuilders(exactBuilders, rawSearchValues, docCount); + leaveExact?.(); const leaveCol = this.memorySampler?.enter("companion_record_col", { doc_count: docCount }); this.recordColBuilders(colBuilders, rawSearchValues, docCount); leaveCol?.(); @@ -680,7 +801,7 @@ export class SearchCompanionManager { this.memorySampler.capture("companion_progress", { stream: seg.stream, segment_index: seg.segment_index, - ...this.summarizeCompanionBuildProgress(colBuilders, ftsBuilders, aggBuilders, metricsBuilder, docCount + 1), + ...this.summarizeCompanionBuildProgress(exactBuilders, colBuilders, ftsBuilders, aggBuilders, metricsBuilder, docCount + 1), }); } return Result.ok(undefined); @@ -700,6 +821,16 @@ export class SearchCompanionManager { sectionSizes[kind] = payload.payload.byteLength; }; + if (plan.families.exact) { + const leaveExactEncode = this.memorySampler?.enter("companion_encode_exact", { + stream: seg.stream, + segment_index: seg.segment_index, + doc_count: docCountRes.value, + }); + addSection(encodeCompanionSectionPayload("exact", this.finalizeExactSection(exactBuilders, docCountRes.value), plan)); + exactBuilders.clear(); + leaveExactEncode?.(); + } if (plan.families.col) { const leaveColEncode = this.memorySampler?.enter("companion_encode_col", { stream: seg.stream, @@ -780,6 +911,49 @@ export class SearchCompanionManager { return builders; } + private createExactBuilders(registry: SchemaRegistry): Map { + const builders = new Map(); + for (const [fieldName, field] of Object.entries(registry.search?.fields ?? {}).sort((a, b) => a[0].localeCompare(b[0]))) { + if (field.exact !== true || field.kind === "text") continue; + builders.set(fieldName, { + config: field, + companion: { + kind: field.kind, + exists_docs: [], + terms: Object.create(null) as Record, + }, + }); + } + return builders; + } + + private recordExactBuilders(builders: Map, rawSearchValues: Map, docCount: number): void { + for (const [fieldName, builder] of builders) { + const fieldCompanion = builder.companion; + let hasValue = false; + for (const rawValue of rawSearchValues.get(fieldName) ?? []) { + const canonical = canonicalizeExactValue(builder.config, rawValue); + if (canonical == null) continue; + hasValue = true; + const postings = fieldCompanion.terms[canonical] ?? []; + if (postings.length === 0 || postings[postings.length - 1] !== docCount) postings.push(docCount); + fieldCompanion.terms[canonical] = postings; + } + if (hasValue) fieldCompanion.exists_docs.push(docCount); + } + } + + private finalizeExactSection(builders: Map, docCount: number): ExactSectionInput { + const orderedFields = Object.create(null) as Record; + for (const [fieldName, builder] of Array.from(builders.entries()).sort((a, b) => a[0].localeCompare(b[0]))) { + orderedFields[fieldName] = builder.companion; + } + return { + doc_count: docCount, + fields: orderedFields, + }; + } + private recordColBuilders(builders: Map, rawSearchValues: Map, docCount: number): void { for (const [fieldName, builder] of builders) { if (builder.invalid) continue; @@ -948,12 +1122,22 @@ export class SearchCompanionManager { } private summarizeCompanionBuildProgress( + exactBuilders: Map, colBuilders: Map, ftsBuilders: Map, aggBuilders: Map, metricsBuilder: MetricsBlockBuilder | null, docCount: number ): CompanionBuildProgress { + let exactTerms = 0; + let exactPostings = 0; + for (const builder of exactBuilders.values()) { + for (const postings of Object.values(builder.companion.terms)) { + exactTerms += 1; + exactPostings += postings.length; + } + } + let colValues = 0; for (const builder of colBuilders.values()) colValues += builder.values.length; @@ -979,6 +1163,9 @@ export class SearchCompanionManager { return { docCount, + exactFields: exactBuilders.size, + exactTerms, + exactPostings, colFields: colBuilders.size, colValues, ftsFields: ftsBuilders.size, diff --git a/src/search/companion_plan.ts b/src/search/companion_plan.ts index 4cdb155..a742e34 100644 --- a/src/search/companion_plan.ts +++ b/src/search/companion_plan.ts @@ -10,7 +10,7 @@ import type { import { parseDurationMsResult } from "../util/duration"; import { dsError } from "../util/ds_error"; -export type SearchCompanionFamily = "col" | "fts" | "agg" | "mblk"; +export type SearchCompanionFamily = "exact" | "col" | "fts" | "agg" | "mblk"; export type SearchCompanionPlanField = { ordinal: number; @@ -62,6 +62,7 @@ export type SearchCompanionPlan = { export function buildDesiredSearchCompanionPlan(registry: SchemaRegistry): SearchCompanionPlan { const search = registry.search; const families: Record = { + exact: false, col: false, fts: false, agg: false, @@ -73,6 +74,7 @@ export function buildDesiredSearchCompanionPlan(registry: SchemaRegistry): Searc const wantedFieldNames = new Set(); for (const [name, field] of Object.entries(search.fields)) { + if (field.exact === true && field.kind !== "text") wantedFieldNames.add(name); if (field.column === true) wantedFieldNames.add(name); if (field.kind === "text" || (field.kind === "keyword" && field.prefix === true)) wantedFieldNames.add(name); } @@ -109,6 +111,7 @@ export function buildDesiredSearchCompanionPlan(registry: SchemaRegistry): Searc }); const colFields = fields.filter((field) => field.column); + const exactFields = fields.filter((field) => field.exact && field.kind !== "text"); const ftsFields = fields.filter((field) => field.kind === "text" || (field.kind === "keyword" && field.prefix)); const rollups = Object.entries(search.rollups ?? {}) .sort((a, b) => a[0].localeCompare(b[0])) @@ -148,6 +151,7 @@ export function buildDesiredSearchCompanionPlan(registry: SchemaRegistry): Searc } satisfies SearchCompanionPlanRollup; }); + families.exact = exactFields.length > 0; families.col = colFields.length > 0; families.fts = ftsFields.length > 0; families.agg = rollups.length > 0; @@ -160,6 +164,13 @@ export function buildDesiredSearchCompanionPlan(registry: SchemaRegistry): Searc primaryTimestampField: search.primaryTimestampField ?? null, primaryTimestampFieldOrdinal: fieldOrdinalByName.get(search.primaryTimestampField) ?? null, profile: search.profile ?? null, + exactFields: exactFields.map((field) => ({ + ordinal: field.ordinal, + name: field.name, + kind: field.kind, + bindings: field.bindings, + normalizer: field.normalizer, + })), colFields: colFields.map((field) => ({ ordinal: field.ordinal, name: field.name, diff --git a/src/search/exact_format.ts b/src/search/exact_format.ts new file mode 100644 index 0000000..92c41e9 --- /dev/null +++ b/src/search/exact_format.ts @@ -0,0 +1,281 @@ +import { Result } from "better-result"; +import type { SearchFieldKind } from "../schema/registry"; +import { decodeDocIds, encodeDocSet } from "./binary/docset"; +import { BinaryCursor, BinaryPayloadError, BinaryWriter, concatBytes, readU16, readU32 } from "./binary/codec"; +import { RestartStringTableView, encodeRestartStringTable } from "./binary/restart_strings"; +import type { SearchCompanionPlan } from "./companion_plan"; + +export type ExactFieldInput = { + kind: SearchFieldKind; + exists_docs: number[]; + terms: Record; +}; + +export type ExactSectionInput = { + doc_count: number; + fields: Record; +}; + +const KIND_CODE: Record = { + keyword: 0, + text: 1, + integer: 2, + float: 3, + date: 4, + bool: 5, +}; + +const CODE_KIND: Record = { + 0: "keyword", + 1: "text", + 2: "integer", + 3: "float", + 4: "date", + 5: "bool", +}; + +const FIELD_DIR_ENTRY_BYTES = 52; + +export type ExactFormatError = { kind: "invalid_exact_segment"; message: string }; + +function invalidExact(message: string): Result { + return Result.err({ kind: "invalid_exact_segment", message }); +} + +class U32LeView { + private readonly view: DataView; + readonly length: number; + + constructor(private readonly bytes: Uint8Array) { + this.view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength); + this.length = Math.floor(bytes.byteLength / 4); + } + + get(index: number): number { + if (index < 0 || index >= this.length) return 0; + return this.view.getUint32(index * 4, true); + } +} + +export class ExactFieldView { + private readonly termsView: RestartStringTableView; + private readonly docFreqs: U32LeView; + private readonly postingOffsets: U32LeView; + private existsDocIdsCache: number[] | null = null; + private readonly termDocIdsCache = new Map(); + + constructor( + readonly name: string, + readonly kind: SearchFieldKind, + private readonly docCount: number, + private readonly existsCodec: number, + private readonly existsPayload: Uint8Array, + dictPayload: Uint8Array, + docFreqPayload: Uint8Array, + postingOffsetsPayload: Uint8Array, + private readonly postingsPayload: Uint8Array + ) { + this.termsView = new RestartStringTableView(dictPayload); + this.docFreqs = new U32LeView(docFreqPayload); + this.postingOffsets = new U32LeView(postingOffsetsPayload); + } + + existsDocIds(): number[] { + if (!this.existsDocIdsCache) { + this.existsDocIdsCache = decodeDocIds(this.docCount, this.existsCodec, this.existsPayload); + } + return this.existsDocIdsCache; + } + + lookupTerm(term: string): number | null { + return this.termsView.lookup(term); + } + + docFreq(termOrdinal: number): number { + return this.docFreqs.get(termOrdinal); + } + + docIds(termOrdinal: number): number[] { + const cached = this.termDocIdsCache.get(termOrdinal); + if (cached) return cached; + const start = this.postingOffsets.get(termOrdinal); + const end = this.postingOffsets.get(termOrdinal + 1) || start; + const bytes = this.postingsPayload.subarray(start, end); + const docIds = decodeDocIds(this.docCount, 0xff & (bytes[0] ?? 0), bytes.subarray(1)); + this.termDocIdsCache.set(termOrdinal, docIds); + return docIds; + } +} + +export class ExactSectionView { + private readonly fieldByName = new Map(); + + constructor(readonly docCount: number, readonly fields: ExactFieldView[]) { + for (const field of fields) this.fieldByName.set(field.name, field); + } + + getField(fieldName: string): ExactFieldView | null { + return this.fieldByName.get(fieldName) ?? null; + } +} + +export function encodeExactSegmentCompanion(input: ExactSectionInput, plan: SearchCompanionPlan): Uint8Array { + const orderedFields = plan.fields + .filter((field) => input.fields[field.name] && field.exact && field.kind !== "text") + .sort((a, b) => a.ordinal - b.ordinal); + const fieldPayloads: Array<{ + entry: { + fieldOrdinal: number; + kind: SearchFieldKind; + termCount: number; + existsOffset: number; + existsLength: number; + existsCodec: number; + dictOffset: number; + dictLength: number; + dfOffset: number; + dfLength: number; + postingsOffsetTableOffset: number; + postingsOffsetTableLength: number; + postingsDataOffset: number; + postingsDataLength: number; + }; + exists: Uint8Array; + dict: Uint8Array; + dfs: Uint8Array; + postingOffsets: Uint8Array; + postings: Uint8Array; + }> = []; + + for (const planField of orderedFields) { + const field = input.fields[planField.name]!; + const terms = Object.keys(field.terms).sort((a, b) => a.localeCompare(b)); + const dict = encodeRestartStringTable(terms); + const encodedExists = encodeDocSet(input.doc_count, field.exists_docs); + const dfWriter = new BinaryWriter(); + const postingOffsetWriter = new BinaryWriter(); + const postingsWriter = new BinaryWriter(); + let postingOffset = 0; + for (const term of terms) { + const encodedPostings = encodeDocSet(input.doc_count, field.terms[term] ?? []); + dfWriter.writeU32(encodedPostings.docIds.length); + postingOffsetWriter.writeU32(postingOffset); + postingsWriter.writeU8(encodedPostings.codec); + postingsWriter.writeBytes(encodedPostings.payload); + postingOffset += 1 + encodedPostings.payload.byteLength; + } + postingOffsetWriter.writeU32(postingOffset); + fieldPayloads.push({ + entry: { + fieldOrdinal: planField.ordinal, + kind: field.kind, + termCount: terms.length, + existsOffset: 0, + existsLength: encodedExists.payload.byteLength, + existsCodec: encodedExists.codec, + dictOffset: 0, + dictLength: dict.byteLength, + dfOffset: 0, + dfLength: dfWriter.length, + postingsOffsetTableOffset: 0, + postingsOffsetTableLength: postingOffsetWriter.length, + postingsDataOffset: 0, + postingsDataLength: postingsWriter.length, + }, + exists: encodedExists.payload, + dict, + dfs: dfWriter.finish(), + postingOffsets: postingOffsetWriter.finish(), + postings: postingsWriter.finish(), + }); + } + + const header = new BinaryWriter(); + header.writeU32(input.doc_count); + header.writeU16(fieldPayloads.length); + header.writeU16(0); + + let payloadOffset = header.length + FIELD_DIR_ENTRY_BYTES * fieldPayloads.length; + for (const payload of fieldPayloads) { + payload.entry.existsOffset = payloadOffset; + payloadOffset += payload.exists.byteLength; + payload.entry.dictOffset = payloadOffset; + payloadOffset += payload.dict.byteLength; + payload.entry.dfOffset = payloadOffset; + payloadOffset += payload.dfs.byteLength; + payload.entry.postingsOffsetTableOffset = payloadOffset; + payloadOffset += payload.postingOffsets.byteLength; + payload.entry.postingsDataOffset = payloadOffset; + payloadOffset += payload.postings.byteLength; + } + + const directory = new BinaryWriter(); + for (const payload of fieldPayloads) { + directory.writeU16(payload.entry.fieldOrdinal); + directory.writeU8(KIND_CODE[payload.entry.kind] ?? 0); + directory.writeU8(0); + directory.writeU32(payload.entry.termCount); + directory.writeU32(payload.entry.existsOffset); + directory.writeU32(payload.entry.existsLength); + directory.writeU32((payload.entry.existsCodec << 24) | 0); + directory.writeU32(payload.entry.dictOffset); + directory.writeU32(payload.entry.dictLength); + directory.writeU32(payload.entry.dfOffset); + directory.writeU32(payload.entry.dfLength); + directory.writeU32(payload.entry.postingsOffsetTableOffset); + directory.writeU32(payload.entry.postingsOffsetTableLength); + directory.writeU32(payload.entry.postingsDataOffset); + directory.writeU32(payload.entry.postingsDataLength); + } + + return concatBytes([ + header.finish(), + directory.finish(), + ...fieldPayloads.flatMap((payload) => [payload.exists, payload.dict, payload.dfs, payload.postingOffsets, payload.postings]), + ]); +} + +export function decodeExactSegmentCompanionResult(bytes: Uint8Array, plan: SearchCompanionPlan): Result { + try { + const cursor = new BinaryCursor(bytes); + const docCount = cursor.readU32(); + const fieldCount = cursor.readU16(); + cursor.readU16(); + const directoryOffset = cursor.offset; + const fields: ExactFieldView[] = []; + for (let index = 0; index < fieldCount; index++) { + const entryOffset = directoryOffset + index * FIELD_DIR_ENTRY_BYTES; + if (entryOffset + FIELD_DIR_ENTRY_BYTES > bytes.byteLength) return invalidExact("invalid .exact2 directory"); + const fieldOrdinal = readU16(bytes, entryOffset); + const kindCode = bytes[entryOffset + 2]!; + const existsCodec = readU32(bytes, entryOffset + 16) >>> 24; + const planField = plan.fields.find((field) => field.ordinal === fieldOrdinal); + if (!planField) return invalidExact(`missing .exact2 plan field ordinal ${fieldOrdinal}`); + const kind = CODE_KIND[kindCode]; + if (!kind) return invalidExact("invalid .exact2 field kind"); + fields.push( + new ExactFieldView( + planField.name, + kind, + docCount, + existsCodec, + slicePayload(bytes, readU32(bytes, entryOffset + 8), readU32(bytes, entryOffset + 12), "invalid .exact2 exists payload"), + slicePayload(bytes, readU32(bytes, entryOffset + 20), readU32(bytes, entryOffset + 24), "invalid .exact2 dict payload"), + slicePayload(bytes, readU32(bytes, entryOffset + 28), readU32(bytes, entryOffset + 32), "invalid .exact2 docfreq payload"), + slicePayload(bytes, readU32(bytes, entryOffset + 36), readU32(bytes, entryOffset + 40), "invalid .exact2 posting-offset payload"), + slicePayload(bytes, readU32(bytes, entryOffset + 44), readU32(bytes, entryOffset + 48), "invalid .exact2 postings payload") + ) + ); + } + return Result.ok(new ExactSectionView(docCount, fields)); + } catch (e: unknown) { + return invalidExact(String((e as any)?.message ?? e)); + } +} + +function slicePayload(bytes: Uint8Array, offset: number, length: number, message: string): Uint8Array { + if (offset < 0 || length < 0 || offset + length > bytes.byteLength) { + throw new BinaryPayloadError(message); + } + return bytes.subarray(offset, offset + length); +} diff --git a/src/search/exact_runtime.ts b/src/search/exact_runtime.ts new file mode 100644 index 0000000..43ad024 --- /dev/null +++ b/src/search/exact_runtime.ts @@ -0,0 +1,55 @@ +import { Result } from "better-result"; +import { ExactSectionView } from "./exact_format"; +import type { SearchExactClause } from "./query"; + +type CandidateDocIds = ReadonlySet | null; + +function intersectInto(target: Set | null, next: Set): Set { + if (target == null) return next; + for (const docId of Array.from(target)) { + if (!next.has(docId)) target.delete(docId); + } + return target; +} + +function docsForClauseResult( + companion: ExactSectionView, + clause: SearchExactClause, + candidateDocIds: CandidateDocIds = null +): Result, { message: string; docFreq: number }> { + const field = companion.getField(clause.field); + if (!field) return Result.err({ message: `missing .exact2 field ${clause.field}`, docFreq: Number.MAX_SAFE_INTEGER }); + const termOrdinal = field.lookupTerm(clause.canonicalValue); + if (termOrdinal == null) return Result.ok(new Set()); + const docs = new Set(); + for (const docId of field.docIds(termOrdinal)) { + if (!candidateDocIds || candidateDocIds.has(docId)) docs.add(docId); + } + return Result.ok(docs); +} + +export function filterDocIdsByExactClausesResult(args: { + companion: ExactSectionView; + clauses: SearchExactClause[]; +}): Result, { message: string }> { + if (args.clauses.length === 0) return Result.ok(new Set()); + + const planned: Array<{ clause: SearchExactClause; docFreq: number }> = []; + for (const clause of args.clauses) { + const field = args.companion.getField(clause.field); + if (!field) return Result.err({ message: `missing .exact2 field ${clause.field}` }); + const termOrdinal = field.lookupTerm(clause.canonicalValue); + planned.push({ clause, docFreq: termOrdinal == null ? 0 : field.docFreq(termOrdinal) }); + } + + planned.sort((left, right) => left.docFreq - right.docFreq); + let intersection: Set | null = null; + for (const plan of planned) { + const clauseRes = docsForClauseResult(args.companion, plan.clause, intersection); + if (Result.isError(clauseRes)) return Result.err({ message: clauseRes.error.message }); + intersection = intersectInto(intersection, clauseRes.value); + if (intersection.size === 0) break; + } + + return Result.ok(intersection ?? new Set()); +} diff --git a/src/search/query.ts b/src/search/query.ts index 3544551..a660396 100644 --- a/src/search/query.ts +++ b/src/search/query.ts @@ -498,7 +498,7 @@ function normalizeSortWithTieBreaker(search: SearchConfig, query: CompiledSearch ? [...explicit] : hasScoringTextClause(query) ? [{ kind: "score", direction: "desc" }, timestampSort, { kind: "offset", direction: "desc" }] - : [timestampSort, { kind: "offset", direction: "desc" }]; + : [{ kind: "offset", direction: "desc" }]; if (!sorts.some((sort) => sort.kind === "offset")) { sorts.push({ kind: "offset", direction: "desc" }); } diff --git a/src/segment/segmenter.ts b/src/segment/segmenter.ts index 44389f5..070150f 100644 --- a/src/segment/segmenter.ts +++ b/src/segment/segmenter.ts @@ -31,6 +31,7 @@ export type SegmenterMemoryStats = { const SEGMENT_COMPRESSION_WINDOW = 8; const MIN_COMPRESSED_FILL_RATIO = 0.5; +const MAX_COMPRESSION_BOOST_MULTIPLIER = 5; export class Segmenter { private readonly config: Config; @@ -171,6 +172,8 @@ export class Segmenter { } const desiredCompressedBytes = Math.ceil(this.config.segmentMaxBytes * MIN_COMPRESSED_FILL_RATIO); const boosted = BigInt(Math.ceil(desiredCompressedBytes / ratio)); + const maxBoosted = baseTarget * BigInt(MAX_COMPRESSION_BOOST_MULTIPLIER); + if (boosted > maxBoosted) return maxBoosted; return boosted > baseTarget ? boosted : baseTarget; } diff --git a/src/segment/segmenter_workers.ts b/src/segment/segmenter_workers.ts index f05f71d..d3d272f 100644 --- a/src/segment/segmenter_workers.ts +++ b/src/segment/segmenter_workers.ts @@ -1,7 +1,7 @@ -import { fileURLToPath } from "node:url"; import { Worker } from "node:worker_threads"; import type { Config } from "../config"; import { detectHostRuntime } from "../runtime/host_runtime.ts"; +import { resolveWorkerModuleUrl } from "../compute/worker_module_url"; import type { SegmenterHooks, SegmenterMemoryStats, SegmenterOptions } from "./segmenter"; export type SegmenterController = { @@ -82,7 +82,7 @@ export class SegmenterWorkerPool implements SegmenterController { } private spawnWorker(idx: number): void { - const workerSpec = fileURLToPath(new URL("./segmenter_worker.ts", import.meta.url)); + const workerSpec = resolveWorkerModuleUrl(import.meta.url, "./segmenter_worker.ts", "../segment/segmenter_worker.js"); const worker = new Worker(workerSpec, { workerData: { config: this.config, diff --git a/src/server.ts b/src/server.ts index 7bd709e..fdde646 100644 --- a/src/server.ts +++ b/src/server.ts @@ -6,283 +6,24 @@ import { MockR2Store } from "./objectstore/mock_r2"; import { R2ObjectStore } from "./objectstore/r2"; import { bootstrapFromR2 } from "./bootstrap"; import { initConsoleLogging } from "./util/log"; -import { AUTO_TUNE_PRESETS, memoryLimitForPreset, tuneForPreset, type AutoTuneConfig } from "./auto_tune"; +import { applyAutoTune, AutoTuneApplyError, parseAutoTuneArg } from "./server_auto_tune"; initConsoleLogging(); const args = process.argv.slice(2); -let autoTuneEnabled = false; -let autoTuneValueMb: number | null = null; -for (let i = 0; i < args.length; i++) { - const arg = args[i]; - if (arg === "--auto-tune") { - autoTuneEnabled = true; - const next = args[i + 1]; - if (next && !next.startsWith("--") && /^[0-9]+$/.test(next)) { - autoTuneValueMb = Number(next); - } - } else if (arg.startsWith("--auto-tune=")) { - autoTuneEnabled = true; - const raw = arg.split("=", 2)[1] ?? ""; - if (raw.trim() !== "") autoTuneValueMb = Number(raw); - } -} - -function formatPresetList(presets: number[], selected: number, map: (preset: number) => T, fmt: (val: T) => string): string { - return presets - .map((preset) => { - const value = fmt(map(preset)); - return preset === selected ? `[${value}]` : value; - }) - .join(", "); -} - -function applyAutoTune(overrideMb: number | null): void { - const envMemRaw = process.env.DS_MEMORY_LIMIT_MB; - if (overrideMb != null) { - if (envMemRaw) { - console.error("--auto-tune with a value cannot be used with DS_MEMORY_LIMIT_MB"); +const autoTune = parseAutoTuneArg(args); +if (autoTune.enabled) { + try { + applyAutoTune(autoTune.valueMb); + } catch (error) { + if (error instanceof AutoTuneApplyError) { + console.error(error.message); process.exit(1); } - } else if (!envMemRaw) { - console.error("--auto-tune requires DS_MEMORY_LIMIT_MB to be set (or pass a value)"); - process.exit(1); - } - const memMb = overrideMb != null ? overrideMb : Number(envMemRaw); - if (!Number.isFinite(memMb) || memMb <= 0) { - const bad = overrideMb != null ? String(overrideMb) : String(envMemRaw); - console.error(`invalid DS_MEMORY_LIMIT_MB: ${bad}`); - process.exit(1); - } - if (process.env.DS_MEMORY_LIMIT_BYTES) { - console.error("--auto-tune does not allow DS_MEMORY_LIMIT_BYTES; use DS_MEMORY_LIMIT_MB"); - process.exit(1); - } - - const conflictVars = [ - "DS_SEGMENT_MAX_BYTES", - "DS_SEGMENT_TARGET_ROWS", - "DS_SQLITE_CACHE_MB", - "DS_SQLITE_CACHE_BYTES", - "DS_WORKER_SQLITE_CACHE_MB", - "DS_WORKER_SQLITE_CACHE_BYTES", - "DS_INDEX_RUN_MEM_CACHE_BYTES", - "DS_LEXICON_INDEX_CACHE_MAX_BYTES", - "DS_INGEST_MAX_BATCH_BYTES", - "DS_INGEST_MAX_QUEUE_BYTES", - "DS_INGEST_CONCURRENCY", - "DS_READ_CONCURRENCY", - "DS_SEARCH_CONCURRENCY", - "DS_ASYNC_INDEX_CONCURRENCY", - "DS_SEARCH_COMPANION_TOC_CACHE_BYTES", - "DS_SEARCH_COMPANION_SECTION_CACHE_BYTES", - "DS_SEARCH_COMPANION_BATCH_SEGMENTS", - "DS_SEARCH_COMPANION_YIELD_BLOCKS", - ]; - const conflicts = conflictVars.filter((v) => process.env[v] != null); - if (conflicts.length > 0) { - console.error(`--auto-tune cannot be used with manual memory settings: ${conflicts.join(", ")}`); - process.exit(1); + throw error; } - - const presets = [...AUTO_TUNE_PRESETS]; - const preset = [...presets].reverse().find((v) => v <= memMb); - if (!preset) { - console.error(`DS_MEMORY_LIMIT_MB=${memMb} is below the minimum preset (256)`); - process.exit(1); - } - const tune: AutoTuneConfig = tuneForPreset(preset); - - const memoryLimitMb = memoryLimitForPreset(preset); - process.env.DS_AUTO_TUNE_REQUESTED_MB = String(memMb); - process.env.DS_AUTO_TUNE_PRESET_MB = String(preset); - process.env.DS_AUTO_TUNE_EFFECTIVE_MEMORY_LIMIT_MB = String(memoryLimitMb); - process.env.DS_MEMORY_LIMIT_MB = String(memoryLimitMb); - process.env.DS_SEGMENT_MAX_BYTES = String(tune.segmentMaxMiB * 1024 * 1024); - process.env.DS_SEGMENT_TARGET_ROWS = String(tune.segmentTargetRows); - process.env.DS_SQLITE_CACHE_MB = String(tune.sqliteCacheMb); - process.env.DS_WORKER_SQLITE_CACHE_MB = String(tune.workerSqliteCacheMb); - process.env.DS_INDEX_RUN_MEM_CACHE_BYTES = String(tune.indexMemMb * 1024 * 1024); - process.env.DS_LEXICON_INDEX_CACHE_MAX_BYTES = String(tune.lexiconIndexCacheMb * 1024 * 1024); - process.env.DS_SEARCH_COMPANION_TOC_CACHE_BYTES = String(tune.searchCompanionTocCacheMb * 1024 * 1024); - process.env.DS_SEARCH_COMPANION_SECTION_CACHE_BYTES = String(tune.searchCompanionSectionCacheMb * 1024 * 1024); - process.env.DS_INGEST_MAX_BATCH_BYTES = String(tune.ingestBatchMb * 1024 * 1024); - process.env.DS_INGEST_MAX_QUEUE_BYTES = String(tune.ingestQueueMb * 1024 * 1024); - process.env.DS_INGEST_CONCURRENCY = String(tune.ingestConcurrency); - process.env.DS_READ_CONCURRENCY = String(tune.readConcurrency); - process.env.DS_SEARCH_CONCURRENCY = String(tune.searchConcurrency); - process.env.DS_ASYNC_INDEX_CONCURRENCY = String(tune.asyncIndexConcurrency); - process.env.DS_INDEX_BUILD_CONCURRENCY = String(tune.indexBuildConcurrency); - process.env.DS_INDEX_COMPACT_CONCURRENCY = String(tune.indexCompactConcurrency); - process.env.DS_SEGMENTER_WORKERS = String(tune.segmenterWorkers); - process.env.DS_UPLOAD_CONCURRENCY = String(tune.uploadConcurrency); - process.env.DS_SEARCH_COMPANION_BATCH_SEGMENTS = String(tune.searchCompanionBatchSegments); - process.env.DS_SEARCH_COMPANION_YIELD_BLOCKS = String(tune.searchCompanionYieldBlocks); - - const presetLine = formatPresetList(presets, preset, (v) => v, (v) => String(v)); - console.log(`Auto-tuning for memory preset ${presetLine}`); - console.log( - `DS_MEMORY_LIMIT_MB presets: ${formatPresetList(presets, preset, (p) => memoryLimitForPreset(p), (v) => String(v))}` - ); - console.log( - `DS_SEGMENT_MAX_MIB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).segmentMaxMiB, - (v) => String(v) - )}` - ); - console.log( - `DS_SEGMENT_TARGET_ROWS presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).segmentTargetRows, - (v) => String(v) - )}` - ); - console.log( - `DS_SQLITE_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).sqliteCacheMb, (v) => String(v))}` - ); - console.log( - `DS_WORKER_SQLITE_CACHE_MB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).workerSqliteCacheMb, - (v) => String(v) - )}` - ); - console.log( - `DS_INDEX_RUN_MEM_CACHE_MB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).indexMemMb, - (v) => String(v) - )}` - ); - console.log( - `DS_LEXICON_INDEX_CACHE_MB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).lexiconIndexCacheMb, - (v) => String(v) - )}` - ); - console.log( - `DS_SEARCH_COMPANION_TOC_CACHE_MB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).searchCompanionTocCacheMb, - (v) => String(v) - )}` - ); - console.log( - `DS_SEARCH_COMPANION_SECTION_CACHE_MB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).searchCompanionSectionCacheMb, - (v) => String(v) - )}` - ); - console.log( - `DS_INGEST_MAX_BATCH_MB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).ingestBatchMb, - (v) => String(v) - )}` - ); - console.log( - `DS_INGEST_MAX_QUEUE_MB presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).ingestQueueMb, - (v) => String(v) - )}` - ); - console.log( - `DS_INGEST_CONCURRENCY presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).ingestConcurrency, - (v) => String(v) - )}` - ); - console.log( - `DS_READ_CONCURRENCY presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).readConcurrency, - (v) => String(v) - )}` - ); - console.log( - `DS_SEARCH_CONCURRENCY presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).searchConcurrency, - (v) => String(v) - )}` - ); - console.log( - `DS_ASYNC_INDEX_CONCURRENCY presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).asyncIndexConcurrency, - (v) => String(v) - )}` - ); - console.log( - `DS_INDEX_BUILD_CONCURRENCY presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).indexBuildConcurrency, - (v) => String(v) - )}` - ); - console.log( - `DS_INDEX_COMPACT_CONCURRENCY presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).indexCompactConcurrency, - (v) => String(v) - )}` - ); - console.log( - `DS_SEGMENTER_WORKERS presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).segmenterWorkers, - (v) => String(v) - )}` - ); - console.log( - `DS_UPLOAD_CONCURRENCY presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).uploadConcurrency, - (v) => String(v) - )}` - ); - console.log( - `DS_SEARCH_COMPANION_BATCH_SEGMENTS presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).searchCompanionBatchSegments, - (v) => String(v) - )}` - ); - console.log( - `DS_SEARCH_COMPANION_YIELD_BLOCKS presets: ${formatPresetList( - presets, - preset, - (p) => tuneForPreset(p).searchCompanionYieldBlocks, - (v) => String(v) - )}` - ); } -if (autoTuneEnabled) applyAutoTune(autoTuneValueMb); - const cfg = loadConfig(); const statsEnabled = args.includes("--stats"); @@ -310,7 +51,15 @@ let store; if (storeChoice === "local") { const memBytesRaw = process.env.DS_MOCK_R2_MAX_INMEM_BYTES; const memMbRaw = process.env.DS_MOCK_R2_MAX_INMEM_MB; + const putDelayRaw = process.env.DS_MOCK_R2_PUT_DELAY_MS; + const getDelayRaw = process.env.DS_MOCK_R2_GET_DELAY_MS; + const headDelayRaw = process.env.DS_MOCK_R2_HEAD_DELAY_MS; + const listDelayRaw = process.env.DS_MOCK_R2_LIST_DELAY_MS; const memBytes = memBytesRaw ? Number(memBytesRaw) : memMbRaw ? Number(memMbRaw) * 1024 * 1024 : null; + const putDelayMs = putDelayRaw ? Number(putDelayRaw) : 0; + const getDelayMs = getDelayRaw ? Number(getDelayRaw) : 0; + const headDelayMs = headDelayRaw ? Number(headDelayRaw) : 0; + const listDelayMs = listDelayRaw ? Number(listDelayRaw) : 0; if (memBytesRaw && !Number.isFinite(memBytes)) { // eslint-disable-next-line no-console console.error(`invalid DS_MOCK_R2_MAX_INMEM_BYTES: ${memBytesRaw}`); @@ -321,13 +70,36 @@ if (storeChoice === "local") { console.error(`invalid DS_MOCK_R2_MAX_INMEM_MB: ${memMbRaw}`); process.exit(1); } + for (const [name, value] of [ + ["DS_MOCK_R2_PUT_DELAY_MS", putDelayMs], + ["DS_MOCK_R2_GET_DELAY_MS", getDelayMs], + ["DS_MOCK_R2_HEAD_DELAY_MS", headDelayMs], + ["DS_MOCK_R2_LIST_DELAY_MS", listDelayMs], + ] as const) { + if (!Number.isFinite(value) || value < 0) { + // eslint-disable-next-line no-console + console.error(`invalid ${name}: ${process.env[name]}`); + process.exit(1); + } + } const spillDir = process.env.DS_MOCK_R2_SPILL_DIR; - store = memBytes != null || spillDir ? new MockR2Store({ maxInMemoryBytes: memBytes ?? undefined, spillDir }) : new MockR2Store(); + store = new MockR2Store({ + maxInMemoryBytes: memBytes ?? undefined, + spillDir, + faults: { + putDelayMs, + getDelayMs, + headDelayMs, + listDelayMs, + }, + }); } else { const bucket = process.env.DURABLE_STREAMS_R2_BUCKET; const accountId = process.env.DURABLE_STREAMS_R2_ACCOUNT_ID; const accessKeyId = process.env.DURABLE_STREAMS_R2_ACCESS_KEY_ID; const secretAccessKey = process.env.DURABLE_STREAMS_R2_SECRET_ACCESS_KEY; + const endpoint = process.env.DURABLE_STREAMS_R2_ENDPOINT; + const region = process.env.DURABLE_STREAMS_R2_REGION; if (!bucket || !accountId || !accessKeyId || !secretAccessKey) { // eslint-disable-next-line no-console console.error("missing R2 env vars: DURABLE_STREAMS_R2_BUCKET, DURABLE_STREAMS_R2_ACCOUNT_ID, DURABLE_STREAMS_R2_ACCESS_KEY_ID, DURABLE_STREAMS_R2_SECRET_ACCESS_KEY"); @@ -338,6 +110,8 @@ if (storeChoice === "local") { accountId, accessKeyId, secretAccessKey, + endpoint, + region, }); } diff --git a/src/server_auto_tune.ts b/src/server_auto_tune.ts new file mode 100644 index 0000000..fa581f1 --- /dev/null +++ b/src/server_auto_tune.ts @@ -0,0 +1,158 @@ +import { AUTO_TUNE_PRESETS, memoryLimitForPreset, tuneForPreset, type AutoTuneConfig } from "./auto_tune"; + +export class AutoTuneApplyError extends Error { + constructor(message: string) { + super(message); + this.name = "AutoTuneApplyError"; + } +} + +export function parseAutoTuneArg(args: string[]): { enabled: boolean; valueMb: number | null } { + let enabled = false; + let valueMb: number | null = null; + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + if (arg === "--auto-tune") { + enabled = true; + const next = args[i + 1]; + if (next && !next.startsWith("--") && /^[0-9]+$/.test(next)) { + valueMb = Number(next); + } + } else if (arg.startsWith("--auto-tune=")) { + enabled = true; + const raw = arg.split("=", 2)[1] ?? ""; + if (raw.trim() !== "") valueMb = Number(raw); + } + } + return { enabled, valueMb }; +} + +function formatPresetList(presets: number[], selected: number, map: (preset: number) => T, fmt: (val: T) => string): string { + return presets + .map((preset) => { + const value = fmt(map(preset)); + return preset === selected ? `[${value}]` : value; + }) + .join(", "); +} + +export function applyAutoTune( + overrideMb: number | null, + opts: { + env?: NodeJS.ProcessEnv; + log?: (message: string) => void; + } = {} +): void { + const env = opts.env ?? process.env; + const log = opts.log ?? console.log; + const envMemRaw = env.DS_MEMORY_LIMIT_MB; + if (overrideMb != null) { + if (envMemRaw) { + throw new AutoTuneApplyError("--auto-tune with a value cannot be used with DS_MEMORY_LIMIT_MB"); + } + } else if (!envMemRaw) { + throw new AutoTuneApplyError("--auto-tune requires DS_MEMORY_LIMIT_MB to be set (or pass a value)"); + } + const memMb = overrideMb != null ? overrideMb : Number(envMemRaw); + if (!Number.isFinite(memMb) || memMb <= 0) { + const bad = overrideMb != null ? String(overrideMb) : String(envMemRaw); + throw new AutoTuneApplyError(`invalid DS_MEMORY_LIMIT_MB: ${bad}`); + } + if (env.DS_MEMORY_LIMIT_BYTES) { + throw new AutoTuneApplyError("--auto-tune does not allow DS_MEMORY_LIMIT_BYTES; use DS_MEMORY_LIMIT_MB"); + } + + const conflictVars = [ + "DS_SEGMENT_MAX_BYTES", + "DS_SEGMENT_TARGET_ROWS", + "DS_SEGMENT_CACHE_MAX_BYTES", + "DS_INDEX_CHECK_MS", + "DS_SQLITE_CACHE_MB", + "DS_SQLITE_CACHE_BYTES", + "DS_WORKER_SQLITE_CACHE_MB", + "DS_WORKER_SQLITE_CACHE_BYTES", + "DS_INDEX_RUN_MEM_CACHE_BYTES", + "DS_LEXICON_INDEX_CACHE_MAX_BYTES", + "DS_INGEST_MAX_BATCH_BYTES", + "DS_INGEST_MAX_QUEUE_BYTES", + "DS_INGEST_CONCURRENCY", + "DS_READ_CONCURRENCY", + "DS_SEARCH_CONCURRENCY", + "DS_ASYNC_INDEX_CONCURRENCY", + "DS_INDEX_BUILD_CONCURRENCY", + "DS_INDEX_COMPACT_CONCURRENCY", + "DS_SEGMENTER_WORKERS", + "DS_UPLOAD_CONCURRENCY", + "DS_SEARCH_COMPANION_TOC_CACHE_BYTES", + "DS_SEARCH_COMPANION_SECTION_CACHE_BYTES", + "DS_SEARCH_COMPANION_BATCH_SEGMENTS", + "DS_SEARCH_COMPANION_YIELD_BLOCKS", + ]; + const conflicts = conflictVars.filter((v) => env[v] != null); + if (conflicts.length > 0) { + throw new AutoTuneApplyError(`--auto-tune cannot be used with manual memory settings: ${conflicts.join(", ")}`); + } + + const presets = [...AUTO_TUNE_PRESETS]; + const preset = [...presets].reverse().find((v) => v <= memMb); + if (!preset) { + throw new AutoTuneApplyError(`DS_MEMORY_LIMIT_MB=${memMb} is below the minimum preset (256)`); + } + const tune: AutoTuneConfig = tuneForPreset(preset); + + const memoryLimitMb = memoryLimitForPreset(preset); + env.DS_AUTO_TUNE_REQUESTED_MB = String(memMb); + env.DS_AUTO_TUNE_PRESET_MB = String(preset); + env.DS_AUTO_TUNE_EFFECTIVE_MEMORY_LIMIT_MB = String(memoryLimitMb); + env.DS_MEMORY_LIMIT_MB = String(memoryLimitMb); + env.DS_SEGMENT_MAX_BYTES = String(tune.segmentMaxMiB * 1024 * 1024); + env.DS_SEGMENT_TARGET_ROWS = String(tune.segmentTargetRows); + env.DS_SEGMENT_CACHE_MAX_BYTES = String(tune.segmentCacheMb * 1024 * 1024); + env.DS_INDEX_CHECK_MS = String(tune.indexCheckMs); + env.DS_SQLITE_CACHE_MB = String(tune.sqliteCacheMb); + env.DS_WORKER_SQLITE_CACHE_MB = String(tune.workerSqliteCacheMb); + env.DS_INDEX_RUN_MEM_CACHE_BYTES = String(tune.indexMemMb * 1024 * 1024); + env.DS_LEXICON_INDEX_CACHE_MAX_BYTES = String(tune.lexiconIndexCacheMb * 1024 * 1024); + env.DS_SEARCH_COMPANION_TOC_CACHE_BYTES = String(tune.searchCompanionTocCacheMb * 1024 * 1024); + env.DS_SEARCH_COMPANION_SECTION_CACHE_BYTES = String(tune.searchCompanionSectionCacheMb * 1024 * 1024); + env.DS_INGEST_MAX_BATCH_BYTES = String(tune.ingestBatchMb * 1024 * 1024); + env.DS_INGEST_MAX_QUEUE_BYTES = String(tune.ingestQueueMb * 1024 * 1024); + env.DS_INGEST_CONCURRENCY = String(tune.ingestConcurrency); + env.DS_READ_CONCURRENCY = String(tune.readConcurrency); + env.DS_SEARCH_CONCURRENCY = String(tune.searchConcurrency); + env.DS_ASYNC_INDEX_CONCURRENCY = String(tune.asyncIndexConcurrency); + env.DS_INDEX_BUILD_CONCURRENCY = String(tune.indexBuildConcurrency); + env.DS_INDEX_COMPACT_CONCURRENCY = String(tune.indexCompactConcurrency); + env.DS_SEGMENTER_WORKERS = String(tune.segmenterWorkers); + env.DS_UPLOAD_CONCURRENCY = String(tune.uploadConcurrency); + env.DS_SEARCH_COMPANION_BATCH_SEGMENTS = String(tune.searchCompanionBatchSegments); + env.DS_SEARCH_COMPANION_YIELD_BLOCKS = String(tune.searchCompanionYieldBlocks); + + const presetLine = formatPresetList(presets, preset, (v) => v, (v) => String(v)); + log(`Auto-tuning for memory preset ${presetLine}`); + log( + `DS_MEMORY_LIMIT_MB presets: ${formatPresetList(presets, preset, (p) => memoryLimitForPreset(p), (v) => String(v))}` + ); + log(`DS_SEGMENT_MAX_MIB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).segmentMaxMiB, (v) => String(v))}`); + log(`DS_SEGMENT_TARGET_ROWS presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).segmentTargetRows, (v) => String(v))}`); + log(`DS_SEGMENT_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).segmentCacheMb, (v) => String(v))}`); + log(`DS_INDEX_CHECK_MS presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).indexCheckMs, (v) => String(v))}`); + log(`DS_SQLITE_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).sqliteCacheMb, (v) => String(v))}`); + log(`DS_WORKER_SQLITE_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).workerSqliteCacheMb, (v) => String(v))}`); + log(`DS_INDEX_RUN_MEM_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).indexMemMb, (v) => String(v))}`); + log(`DS_LEXICON_INDEX_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).lexiconIndexCacheMb, (v) => String(v))}`); + log(`DS_SEARCH_COMPANION_TOC_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).searchCompanionTocCacheMb, (v) => String(v))}`); + log(`DS_SEARCH_COMPANION_SECTION_CACHE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).searchCompanionSectionCacheMb, (v) => String(v))}`); + log(`DS_INGEST_MAX_BATCH_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).ingestBatchMb, (v) => String(v))}`); + log(`DS_INGEST_MAX_QUEUE_MB presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).ingestQueueMb, (v) => String(v))}`); + log(`DS_INGEST_CONCURRENCY presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).ingestConcurrency, (v) => String(v))}`); + log(`DS_READ_CONCURRENCY presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).readConcurrency, (v) => String(v))}`); + log(`DS_SEARCH_CONCURRENCY presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).searchConcurrency, (v) => String(v))}`); + log(`DS_ASYNC_INDEX_CONCURRENCY presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).asyncIndexConcurrency, (v) => String(v))}`); + log(`DS_INDEX_BUILD_CONCURRENCY presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).indexBuildConcurrency, (v) => String(v))}`); + log(`DS_INDEX_COMPACT_CONCURRENCY presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).indexCompactConcurrency, (v) => String(v))}`); + log(`DS_SEGMENTER_WORKERS presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).segmenterWorkers, (v) => String(v))}`); + log(`DS_UPLOAD_CONCURRENCY presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).uploadConcurrency, (v) => String(v))}`); + log(`DS_SEARCH_COMPANION_BATCH_SEGMENTS presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).searchCompanionBatchSegments, (v) => String(v))}`); + log(`DS_SEARCH_COMPANION_YIELD_BLOCKS presets: ${formatPresetList(presets, preset, (p) => tuneForPreset(p).searchCompanionYieldBlocks, (v) => String(v))}`); +} diff --git a/src/touch/worker_pool.ts b/src/touch/worker_pool.ts index 8eb89e0..fccc7e8 100644 --- a/src/touch/worker_pool.ts +++ b/src/touch/worker_pool.ts @@ -1,8 +1,8 @@ -import { fileURLToPath } from "node:url"; import { Worker } from "node:worker_threads"; import { Result } from "better-result"; import type { Config } from "../config"; import { detectHostRuntime } from "../runtime/host_runtime.ts"; +import { resolveWorkerModuleUrl } from "../compute/worker_module_url"; import type { ProcessRequest, ProcessResult, WorkerMessage } from "./worker_protocol"; import { dsError } from "../util/ds_error.ts"; @@ -114,7 +114,7 @@ export class TouchProcessorWorkerPool { } private spawnWorker(idx: number, generation: number = this.generation): void { - const workerSpec = fileURLToPath(new URL("./processor_worker.ts", import.meta.url)); + const workerSpec = resolveWorkerModuleUrl(import.meta.url, "./processor_worker.ts", "../touch/processor_worker.js"); const worker = new Worker(workerSpec, { workerData: { config: this.cfg, hostRuntime: detectHostRuntime() }, diff --git a/test/aggregate_http.test.ts b/test/aggregate_http.test.ts index 21959a3..b7fdf7c 100644 --- a/test/aggregate_http.test.ts +++ b/test/aggregate_http.test.ts @@ -120,8 +120,10 @@ async function waitForUploadedWithoutCompanions( const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { const srow = app.deps.db.getStream(STREAM); - const companionSegments = app.deps.db.listSearchSegmentCompanions(STREAM); - if (srow && srow.uploaded_through >= 0n && companionSegments.length === 0) return; + if (srow && srow.uploaded_through >= 0n) { + app.deps.db.deleteSearchSegmentCompanions(STREAM); + if (app.deps.db.listSearchSegmentCompanions(STREAM).length === 0) return; + } await sleep(50); } throw new Error("timeout waiting for uploaded uncompanioned prefix"); @@ -208,7 +210,7 @@ describe("_aggregate http", () => { ); expect(res.status).toBe(200); let indexStatus = await res.json(); - expect(indexStatus.search_families.map((family: any) => family.family).sort()).toEqual(["agg", "col", "fts"]); + expect(indexStatus.search_families.map((family: any) => family.family).sort()).toEqual(["agg", "col", "exact", "fts"]); res = await app.fetch( new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}/_aggregate`, { @@ -357,7 +359,7 @@ describe("_aggregate http", () => { ); expect(res.status).toBe(200); const details = await res.json(); - expect(details.index_status.search_families.map((family: any) => family.family).sort()).toEqual(["agg", "col", "fts"]); + expect(details.index_status.search_families.map((family: any) => family.family).sort()).toEqual(["agg", "col", "exact", "fts"]); res = await app.fetch( new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}/_aggregate`, { diff --git a/test/auto_tune.test.ts b/test/auto_tune.test.ts index 857693c..1dafafe 100644 --- a/test/auto_tune.test.ts +++ b/test/auto_tune.test.ts @@ -4,24 +4,28 @@ import { memoryLimitForPreset, tuneForPreset } from "../src/auto_tune"; describe("auto tune presets", () => { test("keeps ingest batch and queue budgets conservative on small presets", () => { expect(memoryLimitForPreset(256)).toBe(300); - expect(tuneForPreset(256).segmentMaxMiB).toBe(16); - expect(tuneForPreset(256).segmentTargetRows).toBe(100_000); - expect(tuneForPreset(1024).segmentMaxMiB).toBe(16); - expect(tuneForPreset(1024).segmentTargetRows).toBe(100_000); + expect(tuneForPreset(256).segmentMaxMiB).toBe(8); + expect(tuneForPreset(256).segmentTargetRows).toBe(50_000); + expect(tuneForPreset(1024).segmentMaxMiB).toBe(8); + expect(tuneForPreset(1024).segmentTargetRows).toBe(50_000); + expect(tuneForPreset(1024).segmentCacheMb).toBe(0); + expect(tuneForPreset(1024).indexCheckMs).toBe(3_600_000); expect(tuneForPreset(1024).ingestBatchMb).toBe(4); expect(tuneForPreset(1024).ingestQueueMb).toBe(16); expect(tuneForPreset(1024).ingestConcurrency).toBe(2); expect(tuneForPreset(1024).readConcurrency).toBe(4); expect(tuneForPreset(1024).searchConcurrency).toBe(2); expect(tuneForPreset(1024).asyncIndexConcurrency).toBe(1); - expect(tuneForPreset(1024).segmenterWorkers).toBe(1); - expect(tuneForPreset(1024).uploadConcurrency).toBe(2); + expect(tuneForPreset(1024).segmenterWorkers).toBe(0); + expect(tuneForPreset(1024).uploadConcurrency).toBe(1); expect(tuneForPreset(1024).indexBuildConcurrency).toBe(1); expect(tuneForPreset(1024).lexiconIndexCacheMb).toBe(32); expect(tuneForPreset(1024).searchCompanionBatchSegments).toBe(1); expect(tuneForPreset(1024).searchCompanionYieldBlocks).toBe(1); expect(tuneForPreset(2048).segmentMaxMiB).toBe(16); expect(tuneForPreset(2048).segmentTargetRows).toBe(100_000); + expect(tuneForPreset(2048).segmentCacheMb).toBe(256); + expect(tuneForPreset(2048).indexCheckMs).toBe(1_000); expect(tuneForPreset(2048).ingestBatchMb).toBe(8); expect(tuneForPreset(2048).ingestQueueMb).toBe(32); expect(tuneForPreset(2048).ingestConcurrency).toBe(2); @@ -39,6 +43,8 @@ describe("auto tune presets", () => { test("preserves larger ingest presets on bigger hosts", () => { expect(tuneForPreset(4096).segmentMaxMiB).toBe(16); expect(tuneForPreset(4096).segmentTargetRows).toBe(100_000); + expect(tuneForPreset(4096).segmentCacheMb).toBe(256); + expect(tuneForPreset(4096).indexCheckMs).toBe(1_000); expect(tuneForPreset(4096).ingestBatchMb).toBe(16); expect(tuneForPreset(4096).ingestQueueMb).toBe(64); expect(tuneForPreset(4096).ingestConcurrency).toBe(4); diff --git a/test/companion_backfill.test.ts b/test/companion_backfill.test.ts index aa9572d..ca79e13 100644 --- a/test/companion_backfill.test.ts +++ b/test/companion_backfill.test.ts @@ -9,6 +9,7 @@ import { MockR2Store } from "../src/objectstore/mock_r2"; import { buildDesiredSearchCompanionPlan } from "../src/search/companion_plan"; import { decodeBundledSegmentCompanionTocResult } from "../src/search/companion_format"; import { streamHash16Hex } from "../src/util/stream_paths"; +import { LOW_MEMORY_INDEX_ENQUEUE_MAX_DEFER_MS, shouldWaitForLowMemoryIndexQuiet } from "../src/index/schedule"; const STREAM = "backfill"; @@ -126,7 +127,91 @@ async function waitForSegment( throw new Error(`timeout waiting for segment ${segmentIndex}`); } +async function waitForUploadedSegments(app: ReturnType, timeoutMs = 10_000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + const row = app.deps.db.getStream(STREAM); + const uploaded = app.deps.db.countUploadedSegments(STREAM); + if (row && uploaded > 0 && row.uploaded_through >= row.sealed_through) return uploaded; + await sleep(25); + } + throw new Error("timeout waiting for uploaded segments"); +} + describe("bundled companions and backfill", () => { + test("low-memory enqueue quiet waits are capped so trickle ingest cannot starve backfill", () => { + const cfg = { + memoryLimitBytes: 1024 * 1024 * 1024, + indexCheckIntervalMs: 60_000, + }; + const now = 10_000_000; + + expect(shouldWaitForLowMemoryIndexQuiet(cfg, now - 1_000, true, now)).toBe(true); + expect(shouldWaitForLowMemoryIndexQuiet(cfg, now - LOW_MEMORY_INDEX_ENQUEUE_MAX_DEFER_MS, true, now)).toBe(false); + expect(shouldWaitForLowMemoryIndexQuiet(cfg, now - 1_000, false, now)).toBe(false); + expect(shouldWaitForLowMemoryIndexQuiet({ ...cfg, memoryLimitBytes: 2048 * 1024 * 1024 }, now - 1_000, true, now)).toBe(false); + }); + + test("low-memory presets defer explicit companion enqueue wakeups to the periodic index tick", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-companion-low-memory-defer-")); + const cfg = makeConfig(root, { + memoryLimitBytes: 1024 * 1024 * 1024, + segmentMaxBytes: 180, + segmentCheckIntervalMs: 10, + uploadIntervalMs: 10, + uploadConcurrency: 1, + indexL0SpanSegments: 2, + indexCheckIntervalMs: 60_000, + segmentCacheMaxBytes: 0, + segmentFooterCacheEntries: 0, + }); + const app = createApp(cfg); + try { + let res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}`, { + method: "PUT", + headers: { "content-type": "application/json" }, + }) + ); + expect([200, 201]).toContain(res.status); + + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}/_schema`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(SCHEMA_V1), + }) + ); + expect(res.status).toBe(200); + + for (let i = 0; i < 6; i++) { + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + eventTime: `2026-03-25T10:0${i}:00.000Z`, + service: i % 2 === 0 ? "api" : "worker", + status: 500 + i, + why: i % 2 === 0 ? "retry later" : "issuer timeout", + pad: "x".repeat(256), + }), + }) + ); + expect(res.status).toBe(204); + } + + await waitForUploadedSegments(app); + app.deps.indexer?.enqueue(STREAM); + await sleep(200); + + expect(app.deps.db.listSearchSegmentCompanions(STREAM)).toHaveLength(0); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + test( "stores one .cix per sealed segment and backfills existing streams after search config changes", async () => { @@ -889,6 +974,13 @@ describe("bundled companions and backfill", () => { const registry = registryRes.value; const plan = buildDesiredSearchCompanionPlan(registry); const companionIndex = (app.deps.indexer as any).companionIndex; + let yieldCount = 0; + companionIndex.foregroundActivity = { + yieldBackgroundWork: async () => { + yieldCount += 1; + await sleep(0); + }, + }; let timerFiredAt = 0; const startedAt = Date.now(); @@ -900,6 +992,7 @@ describe("bundled companions and backfill", () => { const finishedAt = Date.now() - startedAt; expect(Result.isError(buildRes)).toBeFalse(); + expect(yieldCount).toBeGreaterThan(0); expect(timerFiredAt).toBeGreaterThan(0); expect(timerFiredAt).toBeLessThan(finishedAt); } finally { diff --git a/test/compute/bundle_build.test.ts b/test/compute/bundle_build.test.ts new file mode 100644 index 0000000..77182bd --- /dev/null +++ b/test/compute/bundle_build.test.ts @@ -0,0 +1,37 @@ +import { describe, expect, test } from "bun:test"; +import { fileURLToPath } from "node:url"; +import { mkdtemp, readFile, readdir, rm } from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { buildComputeBundle } from "../../scripts/compute/build-bundle.mjs"; + +describe("compute bundle build", () => { + test("emits the server entrypoint and worker entrypoints together", async () => { + const tmpRoot = await mkdtemp(path.join(os.tmpdir(), "streams-compute-bundle-")); + const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", ".."); + try { + const outDir = path.join(tmpRoot, "bundle"); + const { bundleDir, entrypoint } = await buildComputeBundle({ cwd: repoRoot, outDir }); + expect(entrypoint).toBe("compute/entry.js"); + + const files = await readdir(bundleDir); + const computeFiles = await readdir(path.join(bundleDir, "compute")); + const segmentFiles = await readdir(path.join(bundleDir, "segment")); + const touchFiles = await readdir(path.join(bundleDir, "touch")); + expect(files).toContain("compute"); + expect(computeFiles).toContain("entry.js"); + expect(segmentFiles).toContain("segmenter_worker.js"); + expect(touchFiles).toContain("processor_worker.js"); + + const runtimeEntrypoint = await readFile(path.join(bundleDir, "compute", "entry.js"), "utf8"); + expect(runtimeEntrypoint).toContain( + 'resolveWorkerModuleUrl(import.meta.url, "./segmenter_worker.ts", "../segment/segmenter_worker.js")' + ); + expect(runtimeEntrypoint).toContain( + 'resolveWorkerModuleUrl(import.meta.url, "./processor_worker.ts", "../touch/processor_worker.js")' + ); + } finally { + await rm(tmpRoot, { recursive: true, force: true }); + } + }); +}); diff --git a/test/compute/demo_entry.test.ts b/test/compute/demo_entry.test.ts new file mode 100644 index 0000000..76c1ffe --- /dev/null +++ b/test/compute/demo_entry.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, test } from "bun:test"; +import { + applyColocatedComputeDemoArgv, + createExternalStreamsTarget, + resolveExternalStreamsServerUrl, +} from "../../src/compute/demo_entry"; + +describe("compute demo entrypoint", () => { + test("applies colocated Compute auto-tune before config loading", () => { + const env: NodeJS.ProcessEnv = { DS_MEMORY_LIMIT_MB: "1024" }; + const logs: string[] = []; + + const argv = applyColocatedComputeDemoArgv(["bun", "src/compute/demo_entry.ts"], env, { + log: (message) => logs.push(message), + }); + expect(argv).toEqual([ + "bun", + "src/compute/demo_entry.ts", + "--object-store", + "r2", + "--auto-tune", + ]); + expect(env.DS_AUTO_TUNE_PRESET_MB).toBe("1024"); + expect(env.DS_SEGMENT_MAX_BYTES).toBe(String(8 * 1024 * 1024)); + expect(env.DS_SEGMENT_TARGET_ROWS).toBe("50000"); + expect(env.DS_SEGMENT_CACHE_MAX_BYTES).toBe("0"); + expect(env.DS_SEGMENTER_WORKERS).toBe("0"); + expect(env.DS_UPLOAD_CONCURRENCY).toBe("1"); + expect(logs.some((line) => line.includes("Auto-tuning for memory preset"))).toBe(true); + }); + + test("resolves and normalizes external Streams server URLs", () => { + expect( + resolveExternalStreamsServerUrl({ + COMPUTE_DEMO_STREAMS_SERVER_URL: "cmoa45nql0u6bzycn7dwdpxe0.cdg.prisma.build/", + }), + ).toBe("https://cmoa45nql0u6bzycn7dwdpxe0.cdg.prisma.build"); + }); + + test("external target rewrites incoming requests to the configured Streams server", async () => { + const originalFetch = globalThis.fetch; + const calls: Array<{ body: string; method: string; url: string }> = []; + + globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => { + calls.push({ + body: + init?.body instanceof ArrayBuffer + ? new TextDecoder().decode(init.body) + : "", + method: init?.method ?? "GET", + url: String(input), + }); + return Response.json({ ok: true }); + }) as typeof fetch; + + try { + const target = createExternalStreamsTarget( + "https://cmoa45nql0u6bzycn7dwdpxe0.cdg.prisma.build/", + ); + const response = await target.fetch( + new Request("http://demo.local/v1/stream/demo?format=json", { + body: JSON.stringify([{ ok: true }]), + headers: { + "content-type": "application/json", + }, + method: "POST", + }), + ); + + expect(response.status).toBe(200); + expect(calls).toEqual([ + { + body: JSON.stringify([{ ok: true }]), + method: "POST", + url: "https://cmoa45nql0u6bzycn7dwdpxe0.cdg.prisma.build/v1/stream/demo?format=json", + }, + ]); + } finally { + globalThis.fetch = originalFetch; + } + }); +}); diff --git a/test/compute/demo_site.test.ts b/test/compute/demo_site.test.ts new file mode 100644 index 0000000..cda7855 --- /dev/null +++ b/test/compute/demo_site.test.ts @@ -0,0 +1,231 @@ +import { afterEach, describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { createApp } from "../../src/app"; +import { createComputeDemoSite, type PrebuiltStudioAssets } from "../../src/compute/demo_site"; +import { loadConfig } from "../../src/config"; +import { MockR2Store } from "../../src/objectstore/mock_r2"; + +function createDemoTestApp(rootDir: string) { + const base = loadConfig(); + const app = createApp( + { + ...base, + dbPath: `${rootDir}/wal.sqlite`, + port: 0, + rootDir, + searchWalOverlayQuietPeriodMs: 0, + segmentCheckIntervalMs: 60_000, + uploadIntervalMs: 60_000, + }, + new MockR2Store(), + ); + + return app; +} + +const fakeStudioAssets: PrebuiltStudioAssets = { + appScript: "window.__studioLoaded = true;", + appStyles: "body{background:#000;color:#fff;}", + builtAssets: new Map([ + [ + "/asset.svg", + { + bytes: new TextEncoder().encode(""), + contentType: "image/svg+xml; charset=utf-8", + }, + ], + ]), +}; + +const roots: string[] = []; + +afterEach(() => { + while (roots.length > 0) { + const root = roots.pop(); + if (root) rmSync(root, { force: true, recursive: true }); + } +}); + +async function readJson(response: Response): Promise { + const text = await response.text(); + return text === "" ? null : JSON.parse(text); +} + +async function waitForJob(site: ReturnType, id: string): Promise { + const deadline = Date.now() + 10_000; + + while (Date.now() < deadline) { + const response = await site.fetch( + new Request(`http://local/api/generate/jobs/${encodeURIComponent(id)}`, { + method: "GET", + }), + ); + expect(response.status).toBe(200); + const payload = await readJson(response); + if (payload.job.status === "succeeded" || payload.job.status === "failed") { + return payload.job; + } + await Bun.sleep(10); + } + + throw new Error("timed out waiting for generate job"); +} + +describe("compute demo site", () => { + test("serves studio shell and proxies streams requests", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-compute-demo-studio-")); + roots.push(root); + const streamsApp = createDemoTestApp(root); + const site = createComputeDemoSite({ + studioAssets: fakeStudioAssets, + streamsApp, + }); + + try { + const studioResponse = await site.fetch(new Request("http://local/studio")); + expect(studioResponse.status).toBe(200); + expect(await studioResponse.text()).toContain("/studio/app.js"); + + const generateResponse = await site.fetch(new Request("http://local/generate")); + expect(generateResponse.status).toBe(200); + const generateHtml = await generateResponse.text(); + expect(generateHtml).toContain('value="demo-app"'); + expect(generateHtml).toContain("Insert 100k"); + + const configResponse = await site.fetch( + new Request("http://local/api/config"), + ); + expect(configResponse.status).toBe(200); + expect(await readJson(configResponse)).toEqual({ + ai: { enabled: false }, + bootId: expect.any(String), + database: { enabled: false }, + streams: { url: "/studio/api/streams" }, + }); + + const createResponse = await site.fetch( + new Request("http://local/studio/api/streams/v1/stream/proxy-demo", { + headers: { + "content-type": "application/json", + }, + method: "PUT", + }), + ); + expect([201, 204]).toContain(createResponse.status); + + const listResponse = await site.fetch( + new Request("http://local/studio/api/streams/v1/streams"), + ); + expect(listResponse.status).toBe(200); + const body = await readJson(listResponse); + expect(body.some((stream: { name: string }) => stream.name === "proxy-demo")).toBe(true); + + const assetResponse = await site.fetch( + new Request("http://local/studio/asset.svg"), + ); + expect(assetResponse.status).toBe(200); + expect(assetResponse.headers.get("content-type")).toContain("image/svg+xml"); + } finally { + site.close(); + streamsApp.close(); + } + }); + + test("uses the requested evlog stream and appends events through the generate API", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-compute-demo-generate-")); + roots.push(root); + const streamsApp = createDemoTestApp(root); + const site = createComputeDemoSite({ + studioAssets: fakeStudioAssets, + streamsApp, + }); + + try { + const startResponse = await site.fetch( + new Request("http://local/api/generate/jobs", { + body: JSON.stringify({ count: 1_000, stream: "demo-app" }), + headers: { + "content-type": "application/json", + }, + method: "POST", + }), + ); + expect(startResponse.status).toBe(202); + const startPayload = await readJson(startResponse); + expect(startPayload.job.total).toBe(1_000); + expect(startPayload.job.stream).toBe("demo-app"); + + const job = await waitForJob(site, startPayload.job.id); + expect(job.status).toBe("succeeded"); + expect(job.inserted).toBe(1_000); + expect(job.batchSize).toBeGreaterThan(0); + + const secondStartResponse = await site.fetch( + new Request("http://local/api/generate/jobs", { + body: JSON.stringify({ count: 1_000, stream: "demo-app" }), + headers: { + "content-type": "application/json", + }, + method: "POST", + }), + ); + expect(secondStartResponse.status).toBe(202); + const secondStartPayload = await readJson(secondStartResponse); + expect(secondStartPayload.job.stream).toBe("demo-app"); + + const secondJob = await waitForJob(site, secondStartPayload.job.id); + expect(secondJob.status).toBe("succeeded"); + expect(secondJob.inserted).toBe(1_000); + + const detailsResponse = await streamsApp.fetch( + new Request( + `http://streams.internal/v1/stream/${encodeURIComponent(job.stream)}/_details`, + { method: "GET" }, + ), + ); + expect(detailsResponse.status).toBe(200); + const detailsPayload = await readJson(detailsResponse); + expect(detailsPayload.stream.name).toBe(job.stream); + expect(detailsPayload.stream.profile).toBe("evlog"); + expect(detailsPayload.stream.next_offset).toBe("2000"); + expect(detailsPayload.profile.profile.kind).toBe("evlog"); + + const readResponse = await streamsApp.fetch( + new Request( + `http://streams.internal/v1/stream/${encodeURIComponent(job.stream)}?format=json`, + { method: "GET" }, + ), + ); + expect(readResponse.status).toBe(200); + const readPayload = await readJson(readResponse); + expect(readPayload.length).toBeGreaterThan(0); + const timestamps = readPayload + .map((event: { timestamp?: unknown }) => + typeof event.timestamp === "string" + ? Date.parse(event.timestamp) + : Number.NaN, + ) + .filter((timestamp: number) => Number.isFinite(timestamp)); + expect(timestamps.length).toBeGreaterThan(0); + expect(Math.max(...timestamps)).toBeLessThanOrEqual(Date.now() + 1_000); + expect(readPayload[0]).toEqual( + expect.objectContaining({ + context: expect.objectContaining({ + fingerprint: expect.any(String), + }), + environment: expect.any(String), + message: expect.any(String), + path: expect.any(String), + requestId: expect.any(String), + service: expect.any(String), + traceId: expect.any(String), + }), + ); + } finally { + site.close(); + streamsApp.close(); + } + }); +}); diff --git a/test/compute/entry.test.ts b/test/compute/entry.test.ts new file mode 100644 index 0000000..54ee18f --- /dev/null +++ b/test/compute/entry.test.ts @@ -0,0 +1,43 @@ +import { describe, expect, test } from "bun:test"; +import { ensureComputeArgv } from "../../src/compute/entry"; + +describe("compute entrypoint", () => { + test("adds r2 object-store args when missing", () => { + expect(ensureComputeArgv(["bun", "src/compute/entry.ts", "--stats"])).toEqual([ + "bun", + "src/compute/entry.ts", + "--stats", + "--object-store", + "r2", + ]); + }); + + test("preserves an explicit object-store choice", () => { + expect(ensureComputeArgv(["bun", "src/compute/entry.ts", "--object-store", "local"])).toEqual([ + "bun", + "src/compute/entry.ts", + "--object-store", + "local", + ]); + }); + + test("adds auto-tune when DS_MEMORY_LIMIT_MB is set", () => { + expect(ensureComputeArgv(["bun", "src/compute/entry.ts"], { DS_MEMORY_LIMIT_MB: "1024" })).toEqual([ + "bun", + "src/compute/entry.ts", + "--object-store", + "r2", + "--auto-tune", + ]); + }); + + test("preserves an explicit auto-tune choice", () => { + expect(ensureComputeArgv(["bun", "src/compute/entry.ts", "--auto-tune=2048"], { DS_MEMORY_LIMIT_MB: "1024" })).toEqual([ + "bun", + "src/compute/entry.ts", + "--auto-tune=2048", + "--object-store", + "r2", + ]); + }); +}); diff --git a/test/compute/verify_payload.test.ts b/test/compute/verify_payload.test.ts new file mode 100644 index 0000000..1adfd4b --- /dev/null +++ b/test/compute/verify_payload.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, test } from "bun:test"; +import { encodeBlock } from "../../src/segment/format"; +import { readU32BE } from "../../src/util/endian"; +import { buildComputeVerifyPayload } from "../../experiments/demo/compute/verify_payload"; + +function compressionRatio(records: Uint8Array[]): number { + const block = encodeBlock( + records.map((payload, idx) => ({ + appendNs: BigInt(idx + 1), + routingKey: new Uint8Array(0), + payload, + })) + ); + const compressedLen = readU32BE(block, 8); + const payloadBytes = records.reduce((sum, payload) => sum + payload.byteLength, 0); + return compressedLen / payloadBytes; +} + +describe("compute verify payload", () => { + test("stays above the 2:1 compression threshold while remaining compressible", () => { + const records = Array.from({ length: 8 }, (_, seq) => buildComputeVerifyPayload(256 * 1024, seq)); + const ratio = compressionRatio(records); + expect(ratio).toBeGreaterThanOrEqual(0.5); + expect(ratio).toBeLessThan(0.8); + }); + + test("varies by sequence so producer retries can resend the same payload deterministically", () => { + const first = buildComputeVerifyPayload(1024, 1); + const second = buildComputeVerifyPayload(1024, 2); + const retry = buildComputeVerifyPayload(1024, 1); + expect(Array.from(first)).not.toEqual(Array.from(second)); + expect(Array.from(first)).toEqual(Array.from(retry)); + }); +}); diff --git a/test/compute/worker_module_url.test.ts b/test/compute/worker_module_url.test.ts new file mode 100644 index 0000000..f5cd72d --- /dev/null +++ b/test/compute/worker_module_url.test.ts @@ -0,0 +1,33 @@ +import { describe, expect, test } from "bun:test"; +import { resolveWorkerModuleUrl } from "../../src/compute/worker_module_url"; + +describe("resolveWorkerModuleUrl", () => { + test("keeps source-worker paths on ts modules", () => { + expect(resolveWorkerModuleUrl("file:///repo/src/segment/segmenter_workers.ts", "./segmenter_worker.ts").href).toBe( + "file:///repo/src/segment/segmenter_worker.ts" + ); + }); + + test("switches to built worker paths on js modules", () => { + expect( + resolveWorkerModuleUrl("file:///repo/bundle/compute/entry.js", "./segmenter_worker.ts", "../segment/segmenter_worker.js") + .href + ).toBe("file:///repo/bundle/segment/segmenter_worker.js"); + }); + + test("resolves bundled segment workers from the compute entrypoint", () => { + expect( + resolveWorkerModuleUrl("file:///repo/bundle/compute/entry.js", "./segmenter_worker.ts", "../segment/segmenter_worker.js").href + ).toBe( + "file:///repo/bundle/segment/segmenter_worker.js" + ); + }); + + test("resolves bundled touch workers beside the local bundle", () => { + expect( + resolveWorkerModuleUrl("file:///repo/dist/local/index-hash.js", "./processor_worker.ts", "../touch/processor_worker.js").href + ).toBe( + "file:///repo/dist/touch/processor_worker.js" + ); + }); +}); diff --git a/test/gharchive_demo.test.ts b/test/gharchive_demo.test.ts index 16df71f..3a6c1f5 100644 --- a/test/gharchive_demo.test.ts +++ b/test/gharchive_demo.test.ts @@ -408,7 +408,7 @@ describe("gharchive demo", () => { expect(Object.keys(exactDetails.schema.search.fields).sort()).toEqual(["actorLogin", "eventTime"]); expect(exactDetails.index_status.exact_indexes).toHaveLength(1); expect(exactDetails.index_status.exact_indexes[0].name).toBe("actorLogin"); - expect(exactDetails.index_status.search_families).toEqual([]); + expect(exactDetails.index_status.search_families.map((entry: { family: string }) => entry.family)).toEqual(["exact"]); const ftsDetailsRes = await app.fetch( new Request(`http://local/v1/stream/${encodeURIComponent(ftsStream)}/_details`, { method: "GET" }) diff --git a/test/http_behavior.test.ts b/test/http_behavior.test.ts index 74732bc..c207506 100644 --- a/test/http_behavior.test.ts +++ b/test/http_behavior.test.ts @@ -1036,6 +1036,19 @@ describe("http behavior", () => { }); }); + test("low-memory append responses close HTTP connections", async () => { + await withServer({ memoryLimitBytes: 1024 * 1024 * 1024 }, async ({ baseUrl }) => { + await fetch(`${baseUrl}/v1/stream/json-close`, { method: "PUT", headers: { "content-type": "application/json" } }); + const r = await fetch(`${baseUrl}/v1/stream/json-close`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify([{ x: 1 }, { y: 2 }]), + }); + expect(r.status).toBe(204); + expect(r.headers.get("connection")).toBe("close"); + }); + }); + test("schema routing key batch append and read by key", async () => { await withServer({}, async ({ baseUrl }) => { await fetch(`${baseUrl}/v1/stream/keys`, { method: "PUT", headers: { "content-type": "application/json" } }); diff --git a/test/objectstore_accounting.test.ts b/test/objectstore_accounting.test.ts new file mode 100644 index 0000000..46d7aa8 --- /dev/null +++ b/test/objectstore_accounting.test.ts @@ -0,0 +1,131 @@ +import { describe, expect, test } from "bun:test"; + +import { Metrics } from "../src/metrics"; +import { AccountingObjectStore } from "../src/objectstore/accounting"; +import type { ObjectStore, PutResult } from "../src/objectstore/interface"; + +const STREAM_HASH = "0123456789abcdef0123456789abcdef"; + +function findMetric( + events: Array>, + metric: string, + tags: Record +): Record | undefined { + return events.find((event) => { + if (event.metric !== metric) return false; + const eventTags = event.tags as Record | undefined; + if (!eventTags) return false; + return Object.entries(tags).every(([key, value]) => eventTags[key] === value); + }); +} + +describe("AccountingObjectStore", () => { + test("records latency metrics and request counters for classified put/get operations", async () => { + const requestCounts: Array<[string, string, string, number]> = []; + const metrics = new Metrics(); + const inner: ObjectStore = { + async put(): Promise { + return { etag: "etag-put" }; + }, + async putFile(): Promise { + return { etag: "etag-file" }; + }, + async get(): Promise { + return new Uint8Array([1, 2, 3]); + }, + async head() { + return null; + }, + async delete() {}, + async list() { + return []; + }, + }; + + const store = new AccountingObjectStore( + inner, + { + recordObjectStoreRequestByHash(streamHash: string, artifact: string, op: string, size: number) { + requestCounts.push([streamHash, artifact, op, size]); + }, + } as any, + metrics + ); + + await store.put(`streams/${STREAM_HASH}/manifest.json`, new Uint8Array([7, 8, 9])); + await store.get(`streams/${STREAM_HASH}/segments/0000000000000000.bin`); + + const events = metrics.flushInterval(); + expect(findMetric(events, "tieredstore.objectstore.put.latency", { artifact: "manifest", outcome: "ok" })).toEqual( + expect.objectContaining({ + count: 1, + metric: "tieredstore.objectstore.put.latency", + unit: "ns", + }) + ); + expect(findMetric(events, "tieredstore.objectstore.get.latency", { artifact: "segment", outcome: "ok" })).toEqual( + expect.objectContaining({ + count: 1, + metric: "tieredstore.objectstore.get.latency", + unit: "ns", + }) + ); + expect(requestCounts).toEqual([ + [STREAM_HASH, "manifest", "put", 3], + [STREAM_HASH, "segment", "get", 3], + ]); + }); + + test("records miss, error, and stream-catalog list outcomes", async () => { + const metrics = new Metrics(); + const inner: ObjectStore = { + async put(): Promise { + return { etag: "etag-put" }; + }, + async get() { + return null; + }, + async head() { + return null; + }, + async delete() { + throw new Error("delete failed"); + }, + async list() { + return ["streams/example"]; + }, + }; + + const store = new AccountingObjectStore( + inner, + { + recordObjectStoreRequestByHash() {}, + } as any, + metrics + ); + + await expect(store.get(`streams/${STREAM_HASH}/segments/0000000000000001.bin`)).resolves.toBeNull(); + await expect(store.list("streams/")).resolves.toEqual(["streams/example"]); + await expect(store.delete(`streams/${STREAM_HASH}/segments/0000000000000001.bin`)).rejects.toThrow("delete failed"); + + const events = metrics.flushInterval(); + expect(findMetric(events, "tieredstore.objectstore.get.latency", { artifact: "segment", outcome: "miss" })).toEqual( + expect.objectContaining({ + count: 1, + metric: "tieredstore.objectstore.get.latency", + }) + ); + expect(findMetric(events, "tieredstore.objectstore.list.latency", { artifact: "stream_catalog", outcome: "ok" })).toEqual( + expect.objectContaining({ + count: 1, + metric: "tieredstore.objectstore.list.latency", + }) + ); + expect(findMetric(events, "tieredstore.objectstore.delete.latency", { artifact: "segment", outcome: "error" })).toEqual( + expect.objectContaining({ + count: 1, + metric: "tieredstore.objectstore.delete.latency", + }) + ); + }); +}); diff --git a/test/profile_metrics.test.ts b/test/profile_metrics.test.ts index 12be388..ce9c00b 100644 --- a/test/profile_metrics.test.ts +++ b/test/profile_metrics.test.ts @@ -334,7 +334,7 @@ describe("metrics profile", () => { const indexStatusRes = await fetchJsonApp(app, `http://local/v1/stream/${encodeURIComponent(stream)}/_index_status`, { method: "GET" }); expect(indexStatusRes.status).toBe(200); - expect(indexStatusRes.body?.search_families.map((family: any) => family.family).sort()).toEqual(["agg", "col", "fts", "mblk"]); + expect(indexStatusRes.body?.search_families.map((family: any) => family.family).sort()).toEqual(["agg", "col", "exact", "fts", "mblk"]); expect(indexStatusRes.body?.search_families.find((family: any) => family.family === "mblk")?.fully_indexed_uploaded_segments).toBe(true); const alignedRes = await fetchJsonApp(app, `http://local/v1/stream/${encodeURIComponent(stream)}/_aggregate`, { diff --git a/test/r2_objectstore.test.ts b/test/r2_objectstore.test.ts index 1bf7de5..b12df59 100644 --- a/test/r2_objectstore.test.ts +++ b/test/r2_objectstore.test.ts @@ -7,105 +7,103 @@ type Entry = { type: string; }; -class FakeS3File { - constructor( - private readonly client: FakeS3Client, - private readonly key: string, - private readonly range?: { begin?: number; end?: number } - ) {} - - async exists(): Promise { - this.client.existsCalls += 1; - return this.client.entries.has(this.key); - } +const TEXT_ENCODER = new TextEncoder(); +const originalFetch = globalThis.fetch; +const entries = new Map(); +const requests: Request[] = []; + +function xmlEscape(value: string): string { + return value + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); +} - slice(begin?: number, end?: number): FakeS3File { - return new FakeS3File(this.client, this.key, { begin, end }); - } +function objectKeyFromUrl(url: URL): string { + const parts = url.pathname.split("/").filter(Boolean); + return parts.slice(1).map(decodeURIComponent).join("/"); +} - async arrayBuffer(): Promise { - this.client.arrayBufferCalls += 1; - const entry = this.client.entries.get(this.key); - if (!entry) throw new Error("missing"); - const start = this.range?.begin ?? 0; - const end = this.range?.end ?? entry.data.byteLength; - const sliced = entry.data.slice(start, end); - return sliced.buffer.slice(sliced.byteOffset, sliced.byteOffset + sliced.byteLength); - } +function etagFor(key: string): string { + return `"etag-${key}"`; +} - async write(data: Uint8Array | Blob, opts?: { type?: string }): Promise { - const bytes = data instanceof Uint8Array ? data : new Uint8Array(await data.arrayBuffer()); - this.client.entries.set(this.key, { +async function fakeFetch(input: RequestInfo | URL, init?: RequestInit): Promise { + const req = new Request(input, init); + requests.push(req); + expect(req.headers.get("authorization")).toStartWith("AWS4-HMAC-SHA256 "); + expect(req.headers.get("x-amz-date")).not.toBeNull(); + expect(req.headers.get("x-amz-content-sha256")).not.toBeNull(); + + const url = new URL(req.url); + const key = objectKeyFromUrl(url); + if (req.method === "PUT") { + const bytes = new Uint8Array(await req.arrayBuffer()); + entries.set(key, { data: bytes, - etag: `"etag-${this.key}"`, - type: opts?.type ?? "", + etag: etagFor(key), + type: req.headers.get("content-type") ?? "", }); - return bytes.byteLength; - } - - async stat(): Promise<{ size: number; etag: string; type: string; lastModified: Date }> { - const entry = this.client.entries.get(this.key); - if (!entry) throw new Error("missing"); - return { - size: entry.data.byteLength, - etag: entry.etag, - type: entry.type, - lastModified: new Date(), - }; + return new Response(null, { status: 200, headers: { etag: etagFor(key) } }); } - - async delete(): Promise { - this.client.entries.delete(this.key); + if (req.method === "HEAD") { + const entry = entries.get(key); + if (!entry) return new Response(null, { status: 404 }); + return new Response(null, { + status: 200, + headers: { + etag: entry.etag, + "content-length": String(entry.data.byteLength), + "content-type": entry.type, + }, + }); } -} - -class FakeS3Client { - static instances: FakeS3Client[] = []; - - readonly entries = new Map(); - readonly options: Record | undefined; - existsCalls = 0; - arrayBufferCalls = 0; - - constructor(options?: Record) { - this.options = options; - FakeS3Client.instances.push(this); + if (req.method === "GET" && url.searchParams.get("list-type") === "2") { + const prefix = url.searchParams.get("prefix") ?? ""; + const keys = [...entries.keys()].filter((k) => k.startsWith(prefix)).sort(); + const body = [ + '', + "", + "false", + ...keys.map((k) => `${xmlEscape(k)}`), + "", + ].join(""); + return new Response(TEXT_ENCODER.encode(body), { status: 200 }); } - - file(path: string): FakeS3File { - return new FakeS3File(this, path); + if (req.method === "GET") { + const entry = entries.get(key); + if (!entry) return new Response(null, { status: 404 }); + const range = req.headers.get("range"); + if (range) { + const match = range.match(/^bytes=(\d+)-(\d*)$/); + if (!match) return new Response(null, { status: 416 }); + const start = Number(match[1]); + const end = match[2] ? Number(match[2]) : entry.data.byteLength - 1; + return new Response(entry.data.slice(start, Math.min(end + 1, entry.data.byteLength)), { status: 206 }); + } + return new Response(entry.data.slice(), { status: 200 }); } - - async list(input?: { prefix?: string; continuationToken?: string }): Promise<{ - contents: Array<{ key: string }>; - isTruncated: boolean; - nextContinuationToken?: string; - }> { - const filtered = [...this.entries.keys()].filter((key) => (input?.prefix ? key.startsWith(input.prefix) : true)).sort(); - const start = input?.continuationToken ? Number(input.continuationToken) : 0; - const page = filtered.slice(start, start + 2); - const next = start + page.length; - return { - contents: page.map((key) => ({ key })), - isTruncated: next < filtered.length, - nextContinuationToken: next < filtered.length ? String(next) : undefined, - }; + if (req.method === "DELETE") { + entries.delete(key); + return new Response(null, { status: 204 }); } + return new Response(null, { status: 405 }); } -const originalS3Client = Bun.S3Client; - describe("R2ObjectStore", () => { beforeEach(() => { - FakeS3Client.instances = []; - (Bun as any).S3Client = FakeS3Client; + entries.clear(); + requests.length = 0; + globalThis.fetch = fakeFetch; }); afterEach(() => { - (Bun as any).S3Client = originalS3Client; + globalThis.fetch = originalFetch; }); - test("uses Bun.S3Client for R2 reads and writes", async () => { + test("uses signed fetch requests for R2 reads and writes", async () => { const store = new R2ObjectStore({ accountId: "acct", bucket: "bucket", @@ -113,16 +111,8 @@ describe("R2ObjectStore", () => { secretAccessKey: "secret", }); - const client = FakeS3Client.instances[0]; - expect(client?.options).toMatchObject({ - bucket: "bucket", - accessKeyId: "key", - secretAccessKey: "secret", - region: "auto", - endpoint: "https://acct.r2.cloudflarestorage.com", - }); - await store.put("streams/a", new Uint8Array([1, 2, 3]), { contentType: "application/octet-stream" }); + expect(requests[0]?.url).toBe("https://acct.r2.cloudflarestorage.com/bucket/streams/a"); expect(await store.head("streams/a")).toEqual({ etag: "etag-streams/a", @@ -136,7 +126,21 @@ describe("R2ObjectStore", () => { expect(await store.get("streams/a")).toBeNull(); }); - test("get handles missing objects from the GET itself without an exists preflight", async () => { + test("supports custom S3-compatible endpoints for local R2-path stress tests", async () => { + const store = new R2ObjectStore({ + accountId: "acct", + bucket: "bucket", + accessKeyId: "key", + secretAccessKey: "secret", + endpoint: "http://127.0.0.1:9000", + region: "us-east-1", + }); + + await store.put("streams/a", new Uint8Array([1])); + expect(requests[0]?.url).toBe("http://127.0.0.1:9000/bucket/streams/a"); + }); + + test("get handles missing objects", async () => { const store = new R2ObjectStore({ accountId: "acct", bucket: "bucket", @@ -144,10 +148,7 @@ describe("R2ObjectStore", () => { secretAccessKey: "secret", }); - const client = FakeS3Client.instances[0]!; expect(await store.get("streams/missing")).toBeNull(); - expect(client.existsCalls).toBe(0); - expect(client.arrayBufferCalls).toBe(1); }); test("paginates list results", async () => { @@ -159,11 +160,10 @@ describe("R2ObjectStore", () => { region: "auto", }); - const client = FakeS3Client.instances[0]!; - client.entries.set("streams/a", { data: new Uint8Array([1]), etag: '"a"', type: "application/octet-stream" }); - client.entries.set("streams/b", { data: new Uint8Array([2]), etag: '"b"', type: "application/octet-stream" }); - client.entries.set("streams/c", { data: new Uint8Array([3]), etag: '"c"', type: "application/octet-stream" }); - client.entries.set("other/d", { data: new Uint8Array([4]), etag: '"d"', type: "application/octet-stream" }); + entries.set("streams/a", { data: new Uint8Array([1]), etag: '"a"', type: "application/octet-stream" }); + entries.set("streams/b", { data: new Uint8Array([2]), etag: '"b"', type: "application/octet-stream" }); + entries.set("streams/c", { data: new Uint8Array([3]), etag: '"c"', type: "application/octet-stream" }); + entries.set("other/d", { data: new Uint8Array([4]), etag: '"d"', type: "application/octet-stream" }); expect(await store.list("streams/")).toEqual(["streams/a", "streams/b", "streams/c"]); }); diff --git a/test/routing_key_lexicon.test.ts b/test/routing_key_lexicon.test.ts index a1c9a99..a926b66 100644 --- a/test/routing_key_lexicon.test.ts +++ b/test/routing_key_lexicon.test.ts @@ -75,6 +75,26 @@ async function appendRepoBatchEvents( } describe("routing key lexicon", () => { + test("rejects routing key list limits above the documented maximum", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-routing-lexicon-limit-")); + const { app } = createProfileTestApp(root, { + metricsFlushIntervalMs: 0, + }); + try { + const stream = "routing-lexicon-limit"; + await createRoutedJsonStream(app, stream); + + const res = await fetchJsonApp(app, `http://local/v1/stream/${encodeURIComponent(stream)}/_routing_keys?limit=501`, { + method: "GET", + }); + expect(res.status).toBe(400); + expect(res.body?.error?.message).toBe("invalid limit"); + } finally { + app.close(); + rmSync(root, { recursive: true, force: true }); + } + }); + test("lists routing keys completely before the first lexicon run exists", async () => { const root = mkdtempSync(join(tmpdir(), "ds-routing-lexicon-fallback-")); const { app, store } = createProfileTestApp(root, { diff --git a/test/search_http.test.ts b/test/search_http.test.ts index 660041e..4c8bff1 100644 --- a/test/search_http.test.ts +++ b/test/search_http.test.ts @@ -134,14 +134,90 @@ async function waitForUploadedWithoutCompanions( const deadline = Date.now() + timeoutMs; while (Date.now() < deadline) { const srow = app.deps.db.getStream(STREAM); - const companionSegments = app.deps.db.listSearchSegmentCompanions(STREAM); - if (srow && srow.uploaded_through >= 0n && companionSegments.length === 0) return; + if (srow && srow.uploaded_through >= 0n) { + app.deps.db.deleteSearchSegmentCompanions(STREAM); + if (app.deps.db.listSearchSegmentCompanions(STREAM).length === 0) return; + } await sleep(50); } throw new Error("timeout waiting for uploaded uncompanioned prefix"); } describe("_search http", () => { + test("processes explicitly queued search companion work before the next periodic sweep", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-search-http-enqueue-wake-")); + const cfg = makeConfig(root, { + segmentMaxBytes: 200, + segmentCheckIntervalMs: 10, + uploadIntervalMs: 10, + indexL0SpanSegments: 2, + indexCheckIntervalMs: 60_000, + segmentCacheMaxBytes: 0, + segmentFooterCacheEntries: 0, + searchWalOverlayQuietPeriodMs: 0, + }); + const app = createApp(cfg); + try { + let res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}`, { + method: "PUT", + headers: { "content-type": "application/json" }, + }) + ); + expect([200, 201]).toContain(res.status); + + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}/_schema`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(SEARCH_SCHEMA), + }) + ); + expect(res.status).toBe(200); + + const events = Array.from({ length: 24 }, (_, index) => ({ + eventTime: new Date(Date.UTC(2026, 2, 25, 10, 0, index)).toISOString(), + service: index % 2 === 0 ? "billing-api" : "identity", + status: index % 5 === 0 ? 503 : 200, + duration: index + 1, + requestId: `req_${index}`, + region: index % 2 === 0 ? "ap-southeast-1" : "eu-west-1", + message: index % 2 === 0 ? "queued companion wake match" : "other event", + why: "background indexing", + })); + + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(events), + }) + ); + expect([201, 204]).toContain(res.status); + + await waitForSearchFamilies(app, 5_000); + + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(STREAM)}/_search`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + q: 'region:"ap-southeast-1" message:"queued companion wake"', + size: 5, + sort: ["offset:asc"], + }), + }) + ); + expect(res.status).toBe(200); + const body = await res.json(); + expect(body.coverage.indexed_segments).toBeGreaterThan(0); + expect(body.hits.length).toBeGreaterThan(0); + } finally { + app.close(); + rmSync(root, { force: true, recursive: true }); + } + }); + test( "supports exact, range, prefix, bare text, phrase, and search_after pagination", async () => { @@ -243,10 +319,19 @@ describe("_search http", () => { }) ); expect(res.status).toBe(200); + expect(Number(res.headers.get("search-candidate-doc-ids"))).toBeGreaterThan(0); + expect(Number(res.headers.get("search-decoded-records"))).toBeGreaterThan(0); + expect(Number(res.headers.get("search-segment-payload-bytes-fetched"))).toBeGreaterThan(0); let body = await res.json(); expect(body.total).toEqual({ value: 1, relation: "eq" }); expect(body.coverage.index_families_used).toEqual(expect.arrayContaining(["col"])); expect(body.coverage.index_families_used).toEqual(expect.arrayContaining(["fts"])); + expect(body.coverage.candidate_doc_ids).toBeGreaterThan(0); + expect(body.coverage.decoded_records).toBeGreaterThan(0); + expect(body.coverage.json_parse_time_ms).toEqual(expect.any(Number)); + expect(body.coverage.segment_payload_bytes_fetched).toBeGreaterThan(0); + expect(body.coverage.sort_time_ms).toEqual(expect.any(Number)); + expect(body.coverage.peak_hits_held).toBeGreaterThan(0); expect(body.hits).toHaveLength(1); expect(body.hits[0].fields.requestId).toBe("req_2"); @@ -263,6 +348,7 @@ describe("_search http", () => { expect(res.status).toBe(200); body = await res.json(); expect(body.total).toEqual({ value: 2, relation: "eq" }); + expect(body.coverage.index_families_used).toEqual(expect.arrayContaining(["exact"])); expect(body.coverage.index_families_used).toEqual(expect.not.arrayContaining(["fts"])); expect(body.hits.map((hit: any) => hit.fields.requestId)).toEqual(["job_1", "req_1"]); @@ -516,6 +602,7 @@ describe("_search http", () => { indexCheckIntervalMs: 10, segmentCacheMaxBytes: 0, segmentFooterCacheEntries: 0, + searchWalOverlayQuietPeriodMs: 0, }); const app = createApp(cfg); try { @@ -634,6 +721,10 @@ describe("_search http", () => { ); expect(res.status).toBe(200); + // Keep this test focused on search behavior while companions are not + // caught up. Enqueued work normally wakes the managers promptly. + app.deps.indexer?.stop(); + for (const event of [ { eventTime: "2026-03-25T10:15:23.123Z", @@ -809,11 +900,12 @@ describe("_search http", () => { ); test( - "omits the newest uploaded and WAL suffix while companions are still catching up", + "reports incomplete WAL coverage while returning visible uploaded matches", async () => { const root = mkdtempSync(join(tmpdir(), "ds-search-omit-suffix-")); const cfg = makeConfig(root, { segmentMaxBytes: 140, + segmentTargetRows: 1, segmentCheckIntervalMs: 10, uploadIntervalMs: 10, uploadConcurrency: 2, @@ -905,15 +997,15 @@ describe("_search http", () => { ); expect(res.status).toBe(200); const body = await res.json(); - expect(body.hits).toEqual([]); + expect(body.hits.length).toBeGreaterThan(0); expect(body.coverage.complete).toBe(false); expect(body.coverage.mode).toBe("published"); - expect(body.coverage.scanned_segments).toBe(0); + expect(body.coverage.indexed_segments + body.coverage.scanned_segments).toBeGreaterThan(0); expect(body.coverage.scanned_tail_docs).toBe(0); - expect(body.coverage.possible_missing_uploaded_segments).toBeGreaterThan(0); expect(body.coverage.possible_missing_wal_rows).toBeGreaterThan(0); expect(body.coverage.possible_missing_events_upper_bound).toBeGreaterThan(0); - expect(body.total).toEqual({ value: 0, relation: "gte" }); + expect(body.total.value).toBeGreaterThan(0); + expect(["eq", "gte"]).toContain(body.total.relation); } finally { app.close(); rmSync(root, { recursive: true, force: true }); diff --git a/test/search_perf_repro.test.ts b/test/search_perf_repro.test.ts new file mode 100644 index 0000000..dcc4d90 --- /dev/null +++ b/test/search_perf_repro.test.ts @@ -0,0 +1,681 @@ +import { describe, expect, test } from "bun:test"; +import { mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { Result } from "better-result"; +import { createApp } from "../src/app"; +import { loadConfig, type Config } from "../src/config"; +import { MockR2Store } from "../src/objectstore/mock_r2"; + +const RUN = process.env.SEARCH_PERF_REPRO === "1"; +const t = RUN ? test : test.skip; +const EXPECT_SLOW = process.env.SEARCH_PERF_EXPECT_SLOW === "1"; + +const TIMEOUT_MS = envNumber("SEARCH_PERF_TIMEOUT_MS", 900_000); +const MIN_CASE_MS = envNumber("SEARCH_PERF_MIN_CASE_MS", 2_000, { allowZero: true }); +const DEFAULT_SORT_SEGMENTS = envNumber("SEARCH_PERF_DEFAULT_SORT_SEGMENTS", 16); +const DEFAULT_SORT_ROWS_PER_SEGMENT = envNumber("SEARCH_PERF_DEFAULT_SORT_ROWS_PER_SEGMENT", 2_048); +const DEFAULT_SORT_PAYLOAD_BYTES = envNumber("SEARCH_PERF_DEFAULT_SORT_PAYLOAD_BYTES", 4 * 1024); +const REVERSE_ROWS = envNumber("SEARCH_PERF_REVERSE_ROWS", 32_768); +const REVERSE_PAYLOAD_BYTES = envNumber("SEARCH_PERF_REVERSE_PAYLOAD_BYTES", 8 * 1024); +const SMALL_STREAM_ROWS_PER_SEGMENT = envNumber("SEARCH_PERF_SMALL_ROWS_PER_SEGMENT", 32_768); +const SMALL_STREAM_PAYLOAD_BYTES = envNumber("SEARCH_PERF_SMALL_PAYLOAD_BYTES", 8 * 1024); +const WAL_TAIL_ROWS = envNumber("SEARCH_PERF_WAL_TAIL_ROWS", 32_768); +const WAL_TAIL_PAYLOAD_BYTES = envNumber("SEARCH_PERF_WAL_TAIL_PAYLOAD_BYTES", 4 * 1024); +const EXACT_ONLY_ROWS = envNumber("SEARCH_PERF_EXACT_ONLY_ROWS", 32_768); +const EXACT_ONLY_PAYLOAD_BYTES = envNumber("SEARCH_PERF_EXACT_ONLY_PAYLOAD_BYTES", 4 * 1024); +const APPEND_BATCH_ROWS = envNumber("SEARCH_PERF_APPEND_BATCH_ROWS", 512); +const BLOCK_MAX_BYTES = envNumber("SEARCH_PERF_BLOCK_MAX_BYTES", 64 * 1024); + +function envNumber(name: string, fallback: number, options: { allowZero?: boolean } = {}): number { + const raw = process.env[name]; + if (raw == null || raw.trim() === "") return fallback; + const parsed = Number(raw); + const valid = Number.isFinite(parsed) && (options.allowZero ? parsed >= 0 : parsed > 0); + if (!valid) throw new Error(`${name} must be ${options.allowZero ? "a non-negative" : "a positive"} number`); + return parsed; +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function makeConfig(rootDir: string, overrides: Partial): Config { + const base = loadConfig(); + return { + ...base, + rootDir, + dbPath: `${rootDir}/wal.sqlite`, + port: 0, + ...overrides, + }; +} + +type PerfFixture = { + app: ReturnType; + root: string; + store: MockR2Store; + stream: string; + rows: number; + segments: number; + payloadBytes: number; +}; + +function padFor(index: number, targetChars: number): string { + const alphabet = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ._-"; + let state = (0x9e3779b9 ^ index) >>> 0; + let out = ""; + while (out.length < targetChars) { + state = (Math.imul(state, 1664525) + 1013904223) >>> 0; + out += alphabet[state % alphabet.length]!; + } + return out.slice(0, targetChars); +} + +function buildEvlogPayload(index: number, targetBytes: number, environment = "staging"): Uint8Array { + const encoder = new TextEncoder(); + const timestamp = new Date(Date.UTC(2026, 0, 1, 0, 0, 0) + index * 1000).toISOString(); + const base = { + timestamp, + level: index % 11 === 0 ? "error" : "info", + service: `svc-${String(index % 12).padStart(2, "0")}`, + environment, + version: "2026.04.24", + region: index % 2 === 0 ? "us-east-1" : "eu-west-1", + requestId: `req-${String(index).padStart(10, "0")}`, + traceId: `trace-${String(Math.floor(index / 4)).padStart(10, "0")}`, + spanId: `span-${String(index).padStart(10, "0")}`, + method: index % 3 === 0 ? "POST" : "GET", + path: `/api/resource/${index % 64}`, + status: index % 17 === 0 ? 500 : 200, + duration: 10 + (index % 500) / 10, + message: `seed event ${index} staging request list candidate`, + why: null, + fix: null, + link: null, + sampling: null, + redaction: { keys: [] }, + context: { seed: index, pad: "" }, + }; + const baseBytes = encoder.encode(JSON.stringify(base)).byteLength; + const padChars = targetBytes - baseBytes; + if (padChars < 0) throw new Error(`target payload ${targetBytes} is too small for evlog seed row (${baseBytes})`); + const bytes = encoder.encode(JSON.stringify({ ...base, context: { seed: index, pad: padFor(index, padChars) } })); + if (bytes.byteLength !== targetBytes) { + throw new Error(`expected ${targetBytes} bytes, got ${bytes.byteLength}`); + } + return bytes; +} + +function buildExactOnlyPayload(index: number, targetBytes: number): Uint8Array { + const encoder = new TextEncoder(); + const timestamp = new Date(Date.UTC(2026, 0, 1, 0, 0, 0) + index * 1000).toISOString(); + const base = { + eventTime: timestamp, + customerId: `cust-${String(index).padStart(10, "0")}`, + message: `exact-only seed event ${index}`, + context: { seed: index, pad: "" }, + }; + const baseBytes = encoder.encode(JSON.stringify(base)).byteLength; + const padChars = targetBytes - baseBytes; + if (padChars < 0) throw new Error(`target payload ${targetBytes} is too small for exact-only seed row (${baseBytes})`); + const bytes = encoder.encode(JSON.stringify({ ...base, context: { seed: index, pad: padFor(index, padChars) } })); + if (bytes.byteLength !== targetBytes) { + throw new Error(`expected ${targetBytes} bytes, got ${bytes.byteLength}`); + } + return bytes; +} + +async function createEvlogStream(app: ReturnType, stream: string): Promise { + let res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(stream)}`, { + method: "PUT", + headers: { "content-type": "application/json" }, + }) + ); + expect([200, 201]).toContain(res.status); + + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(stream)}/_profile`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + apiVersion: "durable.streams/profile/v1", + profile: { kind: "evlog" }, + }), + }) + ); + expect(res.status).toBe(200); +} + +async function createExactOnlyStream(app: ReturnType, stream: string): Promise { + let res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(stream)}`, { + method: "PUT", + headers: { "content-type": "application/json" }, + }) + ); + expect([200, 201]).toContain(res.status); + + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(stream)}/_schema`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify({ + schema: { type: "object", additionalProperties: true }, + search: { + primaryTimestampField: "eventTime", + fields: { + eventTime: { + kind: "date", + bindings: [{ version: 1, jsonPointer: "/eventTime" }], + column: true, + exists: true, + sortable: true, + }, + customerId: { + kind: "keyword", + bindings: [{ version: 1, jsonPointer: "/customerId" }], + exact: true, + exists: true, + sortable: true, + }, + }, + }, + }), + }) + ); + expect(res.status).toBe(200); +} + +function appendSeedRows( + app: ReturnType, + stream: string, + rows: number, + payloadBytes: number, + batchRows: number +): void { + let nextOffset = app.deps.db.getStream(stream)?.next_offset ?? 0n; + let appendBaseMs = Date.parse("2026-01-01T00:00:00.000Z"); + for (let start = 0; start < rows; start += batchRows) { + const count = Math.min(batchRows, rows - start); + const batch = Array.from({ length: count }, (_, localIndex) => { + const index = start + localIndex; + return { + routingKey: null, + contentType: "application/json", + payload: buildEvlogPayload(index, payloadBytes), + appendMs: BigInt(appendBaseMs + index), + }; + }); + const append = app.deps.db.appendWalRows({ + stream, + startOffset: nextOffset, + expectedOffset: nextOffset, + baseAppendMs: BigInt(appendBaseMs + start), + rows: batch, + }); + expect(Result.isOk(append)).toBe(true); + if (Result.isError(append)) throw new Error(append.error.kind); + nextOffset = append.value.lastOffset + 1n; + } +} + +function appendExactOnlyRows( + app: ReturnType, + stream: string, + rows: number, + payloadBytes: number, + batchRows: number +): void { + let nextOffset = app.deps.db.getStream(stream)?.next_offset ?? 0n; + const appendBaseMs = Date.parse("2026-01-01T00:00:00.000Z"); + for (let start = 0; start < rows; start += batchRows) { + const count = Math.min(batchRows, rows - start); + const batch = Array.from({ length: count }, (_, localIndex) => { + const index = start + localIndex; + return { + routingKey: null, + contentType: "application/json", + payload: buildExactOnlyPayload(index, payloadBytes), + appendMs: BigInt(appendBaseMs + index), + }; + }); + const append = app.deps.db.appendWalRows({ + stream, + startOffset: nextOffset, + expectedOffset: nextOffset, + baseAppendMs: BigInt(appendBaseMs + start), + rows: batch, + }); + expect(Result.isOk(append)).toBe(true); + if (Result.isError(append)) throw new Error(append.error.kind); + nextOffset = append.value.lastOffset + 1n; + } +} + +async function waitForUploadedCompanions( + app: ReturnType, + stream: string, + expectedSegments: number, + timeoutMs: number +): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + const row = app.deps.db.getStream(stream); + const companions = app.deps.db.listSearchSegmentCompanions(stream); + const fullySealed = + !!row && + row.next_offset > 0n && + row.sealed_through === row.next_offset - 1n && + row.pending_bytes === 0n && + row.pending_rows === 0n; + if ( + row && + fullySealed && + row.uploaded_segment_count === expectedSegments && + app.deps.db.countUploadedSegments(stream) === expectedSegments && + row.uploaded_through === row.next_offset - 1n && + app.deps.db.getSearchCompanionPlan(stream) && + companions.length === expectedSegments + ) { + return; + } + app.deps.indexer?.enqueue(stream); + await sleep(50); + } + const row = app.deps.db.getStream(stream); + throw new Error( + `timeout waiting for fixture: expectedSegments=${expectedSegments} uploaded=${row?.uploaded_segment_count ?? 0} ` + + `sealed=${row?.sealed_through?.toString() ?? "missing"} next=${row?.next_offset?.toString() ?? "missing"} ` + + `companions=${app.deps.db.listSearchSegmentCompanions(stream).length}` + ); +} + +async function buildFixture(args: { + stream: string; + segments: number; + rowsPerSegment: number; + payloadBytes: number; + indexL0SpanSegments: number; +}): Promise { + const root = mkdtempSync(join(tmpdir(), `ds-search-perf-${args.stream}-`)); + const store = new MockR2Store({ + maxInMemoryBytes: 1 * 1024 * 1024, + spillDir: `${root}/mock-r2`, + }); + const totalRows = args.segments * args.rowsPerSegment; + const commonConfig = { + segmentTargetRows: args.rowsPerSegment, + segmentMaxBytes: args.rowsPerSegment * args.payloadBytes * 4, + blockMaxBytes: BLOCK_MAX_BYTES, + indexL0SpanSegments: args.indexL0SpanSegments, + segmentCacheMaxBytes: 0, + segmentFooterCacheEntries: 0, + searchWalOverlayQuietPeriodMs: 0, + } satisfies Partial; + + let buildApp: ReturnType | null = createApp( + makeConfig(root, { + ...commonConfig, + segmentCheckIntervalMs: 5, + uploadIntervalMs: 5, + uploadConcurrency: 4, + indexCheckIntervalMs: 5, + }), + store + ); + try { + await createEvlogStream(buildApp, args.stream); + appendSeedRows(buildApp, args.stream, totalRows, args.payloadBytes, APPEND_BATCH_ROWS); + await waitForUploadedCompanions(buildApp, args.stream, args.segments, TIMEOUT_MS); + } finally { + buildApp?.close(); + buildApp = null; + } + + const app = createApp( + makeConfig(root, { + ...commonConfig, + segmentCheckIntervalMs: 60_000, + uploadIntervalMs: 60_000, + indexCheckIntervalMs: 60_000, + }), + store + ); + return { + app, + root, + store, + stream: args.stream, + rows: totalRows, + segments: args.segments, + payloadBytes: args.payloadBytes, + }; +} + +async function buildWalTailFixture(args: { stream: string; rows: number; payloadBytes: number }): Promise { + const root = mkdtempSync(join(tmpdir(), `ds-search-perf-${args.stream}-`)); + const store = new MockR2Store({ + maxInMemoryBytes: 1 * 1024 * 1024, + spillDir: `${root}/mock-r2`, + }); + const app = createApp( + makeConfig(root, { + segmentTargetRows: args.rows, + segmentMaxBytes: args.rows * args.payloadBytes * 4, + blockMaxBytes: BLOCK_MAX_BYTES, + searchWalOverlayQuietPeriodMs: 0, + searchWalOverlayMaxBytes: args.rows * args.payloadBytes * 4, + segmentCheckIntervalMs: 60_000, + uploadIntervalMs: 60_000, + indexCheckIntervalMs: 60_000, + }), + store + ); + await createEvlogStream(app, args.stream); + appendSeedRows(app, args.stream, args.rows, args.payloadBytes, APPEND_BATCH_ROWS); + return { + app, + root, + store, + stream: args.stream, + rows: args.rows, + segments: 0, + payloadBytes: args.payloadBytes, + }; +} + +async function buildExactOnlyFixture(args: { stream: string; rows: number; payloadBytes: number }): Promise { + const root = mkdtempSync(join(tmpdir(), `ds-search-perf-${args.stream}-`)); + const store = new MockR2Store({ + maxInMemoryBytes: 1 * 1024 * 1024, + spillDir: `${root}/mock-r2`, + }); + const commonConfig = { + segmentTargetRows: args.rows, + segmentMaxBytes: args.rows * args.payloadBytes * 4, + blockMaxBytes: BLOCK_MAX_BYTES, + indexL0SpanSegments: 16, + segmentCacheMaxBytes: 0, + segmentFooterCacheEntries: 0, + searchWalOverlayQuietPeriodMs: 0, + } satisfies Partial; + let buildApp: ReturnType | null = createApp( + makeConfig(root, { + ...commonConfig, + segmentCheckIntervalMs: 5, + uploadIntervalMs: 5, + indexCheckIntervalMs: 5, + }), + store + ); + try { + await createExactOnlyStream(buildApp, args.stream); + appendExactOnlyRows(buildApp, args.stream, args.rows, args.payloadBytes, APPEND_BATCH_ROWS); + await waitForUploadedCompanions(buildApp, args.stream, 1, TIMEOUT_MS); + } finally { + buildApp?.close(); + buildApp = null; + } + + const app = createApp( + makeConfig(root, { + ...commonConfig, + segmentCheckIntervalMs: 60_000, + uploadIntervalMs: 60_000, + indexCheckIntervalMs: 60_000, + }), + store + ); + return { + app, + root, + store, + stream: args.stream, + rows: args.rows, + segments: 1, + payloadBytes: args.payloadBytes, + }; +} + +async function measuredSearch( + app: ReturnType, + stream: string, + requestBody: Record +): Promise<{ elapsedMs: number; parseCalls: number; body: any }> { + const originalParse = JSON.parse; + let parseCalls = 0; + JSON.parse = ((text: string, reviver?: (this: unknown, key: string, value: unknown) => unknown) => { + parseCalls += 1; + return originalParse(text, reviver); + }) as typeof JSON.parse; + + const started = performance.now(); + let res: Response; + let text: string; + try { + res = await app.fetch( + new Request(`http://local/v1/stream/${encodeURIComponent(stream)}/_search`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(requestBody), + }) + ); + text = await res.text(); + } finally { + JSON.parse = originalParse; + } + const elapsedMs = performance.now() - started; + expect(res!.status).toBe(200); + return { elapsedMs, parseCalls, body: originalParse(text!) }; +} + +function expectMultiSecondRuntime(label: string, elapsedMs: number): void { + if (!EXPECT_SLOW || MIN_CASE_MS <= 0) return; + expect( + elapsedMs, + `${label} completed in ${elapsedMs.toFixed(2)}ms; increase fixture size or lower SEARCH_PERF_MIN_CASE_MS for this machine` + ).toBeGreaterThanOrEqual(MIN_CASE_MS); +} + +function logPerfCase(label: string, fixture: PerfFixture, result: { elapsedMs: number; parseCalls: number; body: any }): void { + const coverage = result.body.coverage ?? {}; + // eslint-disable-next-line no-console + console.log( + `[search-perf-repro] ${label} rows=${fixture.rows} segments=${fixture.segments} payloadBytes=${fixture.payloadBytes} ` + + `elapsedMs=${result.elapsedMs.toFixed(2)} tookMs=${result.body.took_ms ?? result.body.tookMs ?? "n/a"} ` + + `parseCalls=${result.parseCalls} indexedSegments=${coverage.indexed_segments ?? coverage.indexedSegments ?? "n/a"} ` + + `indexedSegmentTimeMs=${coverage.indexed_segment_time_ms ?? coverage.indexedSegmentTimeMs ?? "n/a"} ` + + `ftsDecodeMs=${coverage.fts_decode_ms ?? coverage.ftsDecodeMs ?? "n/a"} ` + + `candidateDocIds=${coverage.candidate_doc_ids ?? "n/a"} decodedRecords=${coverage.decoded_records ?? "n/a"} ` + + `jsonParseTimeMs=${coverage.json_parse_time_ms ?? "n/a"} segmentBytesFetched=${coverage.segment_payload_bytes_fetched ?? "n/a"} ` + + `sortTimeMs=${coverage.sort_time_ms ?? "n/a"} peakHitsHeld=${coverage.peak_hits_held ?? "n/a"} ` + + `families=${JSON.stringify(coverage.index_families_used ?? [])}` + ); +} + +describe("search performance repro cases", () => { + t( + "default non-scoring evlog filter measures broad event-list query cost", + async () => { + const fixture = await buildFixture({ + stream: "perf-default-sort", + segments: DEFAULT_SORT_SEGMENTS, + rowsPerSegment: DEFAULT_SORT_ROWS_PER_SEGMENT, + payloadBytes: DEFAULT_SORT_PAYLOAD_BYTES, + indexL0SpanSegments: 2, + }); + try { + const result = await measuredSearch(fixture.app, fixture.stream, { + q: 'environment:"staging"', + size: 100, + }); + logPerfCase("default-non-scoring-sort", fixture, result); + + expect(result.body.hits).toHaveLength(100); + expect(result.body.coverage.index_families_used).toContain("fts"); + expectMultiSecondRuntime("default timestamp sort broad filter", result.elapsedMs); + } finally { + fixture.app.close(); + rmSync(fixture.root, { recursive: true, force: true }); + } + }, + TIMEOUT_MS + ); + + t( + "offset-desc broad filter measures newest-segment first-page cost", + async () => { + const fixture = await buildFixture({ + stream: "perf-reverse-offset", + segments: 1, + rowsPerSegment: REVERSE_ROWS, + payloadBytes: REVERSE_PAYLOAD_BYTES, + indexL0SpanSegments: 16, + }); + try { + const result = await measuredSearch(fixture.app, fixture.stream, { + q: 'environment:"staging"', + size: 1, + sort: ["offset:desc"], + }); + logPerfCase("offset-desc-full-segment-decode", fixture, result); + + expect(result.body.hits).toHaveLength(1); + expect(result.parseCalls).toBeLessThanOrEqual(12); + expect(result.body.coverage.index_families_used).toContain("fts"); + expectMultiSecondRuntime("offset-desc newest segment decode", result.elapsedMs); + } finally { + fixture.app.close(); + rmSync(fixture.root, { recursive: true, force: true }); + } + }, + TIMEOUT_MS + ); + + t( + "explicit timestamp-desc broad filter measures event-time global sort cost", + async () => { + const fixture = await buildFixture({ + stream: "perf-timestamp-topk", + segments: DEFAULT_SORT_SEGMENTS, + rowsPerSegment: DEFAULT_SORT_ROWS_PER_SEGMENT, + payloadBytes: DEFAULT_SORT_PAYLOAD_BYTES, + indexL0SpanSegments: 2, + }); + try { + const result = await measuredSearch(fixture.app, fixture.stream, { + q: 'environment:"staging"', + size: 100, + sort: ["timestamp:desc", "offset:desc"], + }); + logPerfCase("timestamp-desc-top-k", fixture, result); + + expect(result.body.hits).toHaveLength(100); + expect(result.body.coverage.index_families_used).toContain("fts"); + expect(result.body.coverage.indexed_segments).toBeLessThanOrEqual(2); + expect(result.body.coverage.peak_hits_held).toBeLessThanOrEqual(100); + expect(result.parseCalls).toBeLessThanOrEqual(DEFAULT_SORT_ROWS_PER_SEGMENT + 128); + expectMultiSecondRuntime("explicit timestamp-desc broad filter", result.elapsedMs); + } finally { + fixture.app.close(); + rmSync(fixture.root, { recursive: true, force: true }); + } + }, + TIMEOUT_MS + ); + + t( + "two uploaded evlog segments below the secondary exact L0 span use companion candidates plus source scan", + async () => { + const fixture = await buildFixture({ + stream: "perf-small-no-l0", + segments: 2, + rowsPerSegment: SMALL_STREAM_ROWS_PER_SEGMENT, + payloadBytes: SMALL_STREAM_PAYLOAD_BYTES, + indexL0SpanSegments: 16, + }); + try { + expect(fixture.app.deps.db.listSecondaryIndexRuns(fixture.stream, "environment")).toHaveLength(0); + + const result = await measuredSearch(fixture.app, fixture.stream, { + q: 'environment:"staging"', + size: 100, + sort: ["offset:desc"], + }); + logPerfCase("small-stream-no-exact-l0", fixture, result); + + expect(result.body.hits).toHaveLength(100); + expect(result.body.coverage.index_families_used).toContain("fts"); + expect(result.body.coverage.indexed_segments).toBeGreaterThan(0); + expect(fixture.app.deps.db.listSecondaryIndexRuns(fixture.stream, "environment")).toHaveLength(0); + expectMultiSecondRuntime("small stream below exact L0 span", result.elapsedMs); + } finally { + fixture.app.close(); + rmSync(fixture.root, { recursive: true, force: true }); + } + }, + TIMEOUT_MS + ); + + t( + "quiet WAL-tail rare exact filters use the hot exact cache after the first lookup", + async () => { + const fixture = await buildWalTailFixture({ + stream: "perf-wal-tail-exact", + rows: WAL_TAIL_ROWS, + payloadBytes: WAL_TAIL_PAYLOAD_BYTES, + }); + try { + const requestBody = { + q: 'requestId:"req-0000000000"', + size: 1, + sort: ["offset:desc"], + }; + const cold = await measuredSearch(fixture.app, fixture.stream, requestBody); + const warm = await measuredSearch(fixture.app, fixture.stream, requestBody); + logPerfCase("wal-tail-rare-exact-cold", fixture, cold); + logPerfCase("wal-tail-rare-exact-warm", fixture, warm); + + expect(cold.body.hits).toHaveLength(1); + expect(warm.body.hits).toHaveLength(1); + expect(warm.body.coverage.candidate_doc_ids).toBe(1); + expect(warm.body.coverage.scanned_tail_docs).toBe(1); + expect(warm.elapsedMs).toBeLessThan(cold.elapsedMs); + } finally { + fixture.app.close(); + rmSync(fixture.root, { recursive: true, force: true }); + } + }, + TIMEOUT_MS + ); + + t( + "sealed exact-only rare filters use .exact doc-id postings instead of parsing every candidate record", + async () => { + const fixture = await buildExactOnlyFixture({ + stream: "perf-exact-only-postings", + rows: EXACT_ONLY_ROWS, + payloadBytes: EXACT_ONLY_PAYLOAD_BYTES, + }); + try { + const result = await measuredSearch(fixture.app, fixture.stream, { + q: 'customerId:"cust-0000000000"', + size: 1, + sort: ["offset:desc"], + }); + logPerfCase("sealed-exact-only-postings", fixture, result); + + expect(result.body.hits).toHaveLength(1); + expect(result.body.coverage.index_families_used).toContain("exact"); + expect(result.body.coverage.candidate_doc_ids).toBe(1); + expect(result.parseCalls).toBeLessThanOrEqual(8); + } finally { + fixture.app.close(); + rmSync(fixture.root, { recursive: true, force: true }); + } + }, + TIMEOUT_MS + ); +}); diff --git a/test/search_schema.test.ts b/test/search_schema.test.ts index 6d4b360..af5f27a 100644 --- a/test/search_schema.test.ts +++ b/test/search_schema.test.ts @@ -1,8 +1,52 @@ import { describe, expect, test } from "bun:test"; import { Result } from "better-result"; import { parseSchemaUpdateResult } from "../src/schema/registry"; +import { parseSearchRequestBodyResult } from "../src/search/query"; describe("search schema config", () => { + test("defaults non-scoring filters to offset-desc and scoring text to relevance", () => { + const registry = { + search: { + primaryTimestampField: "eventTime", + defaultFields: [{ field: "message", boost: 1 }], + fields: { + eventTime: { + kind: "date", + bindings: [{ version: 1, jsonPointer: "/eventTime" }], + column: true, + exists: true, + sortable: true, + }, + service: { + kind: "keyword", + bindings: [{ version: 1, jsonPointer: "/service" }], + normalizer: "lowercase_v1", + exact: true, + prefix: true, + exists: true, + sortable: true, + }, + message: { + kind: "text", + bindings: [{ version: 1, jsonPointer: "/message" }], + analyzer: "unicode_word_v1", + exists: true, + }, + }, + }, + } as any; + + const filterRes = parseSearchRequestBodyResult(registry, { q: "service:checkout" }); + expect(Result.isOk(filterRes)).toBe(true); + if (Result.isError(filterRes)) return; + expect(filterRes.value.sort).toEqual([{ kind: "offset", direction: "desc" }]); + + const textRes = parseSearchRequestBodyResult(registry, { q: "checkout" }); + expect(Result.isOk(textRes)).toBe(true); + if (Result.isError(textRes)) return; + expect(textRes.value.sort.map((sort) => sort.kind)).toEqual(["score", "field", "offset"]); + }); + test("accepts versioned search bindings, aliases, and default fields", () => { const res = parseSchemaUpdateResult({ schema: { diff --git a/test/segmenter_behavior.test.ts b/test/segmenter_behavior.test.ts index 5572e48..2a60cab 100644 --- a/test/segmenter_behavior.test.ts +++ b/test/segmenter_behavior.test.ts @@ -207,4 +207,65 @@ describe("segmenter behavior", () => { rmSync(root, { recursive: true, force: true }); } }); + + test("clamps the compression-aware byte target at 5x base target under extreme compression", async () => { + const root = mkdtempSync(join(tmpdir(), "ds-segmenter-comp-clamp-")); + try { + const cfg = { + ...loadConfig(), + rootDir: root, + dbPath: `${root}/wal.sqlite`, + port: 0, + segmentCheckIntervalMs: 5, + segmentMaxBytes: 1024, + segmentTargetRows: 10_000, + }; + const db = new SqliteDurableStore(cfg.dbPath); + db.ensureStream("compressed", { contentType: "application/octet-stream" }); + + for (let i = 0; i < 8; i++) { + db.commitSealedSegment({ + segmentId: `hist-${i}`, + stream: "compressed", + segmentIndex: i, + startOffset: BigInt(i), + endOffset: BigInt(i), + blockCount: 1, + lastAppendMs: BigInt(i + 1), + payloadBytes: 10_000n, + sizeBytes: 100, + localPath: join(root, `hist-${i}.bin`), + rowsSealed: 1n, + }); + } + + const payload = new Uint8Array([7]); + const append = db.appendWalRows({ + stream: "compressed", + startOffset: 8n, + baseAppendMs: 100n, + rows: Array.from({ length: 6_000 }, (_, i) => ({ + routingKey: null, + contentType: null, + payload, + appendMs: BigInt(100 + i), + })), + }); + expect(Result.isOk(append)).toBe(true); + + const segmenter = new Segmenter(cfg, db, { candidatesPerTick: 1 }); + segmenter.start(); + await waitFor(() => (db.getStream("compressed")?.sealed_through ?? -1n) >= 5_127n); + segmenter.stop(); + + const seg = db.getSegmentByIndex("compressed", 8); + expect(seg).not.toBeNull(); + expect(seg?.end_offset).toBe(5_127n); + expect(seg?.payload_bytes).toBe(5_120n); + expect(db.getStream("compressed")?.sealed_through).toBe(5_127n); + db.close(); + } finally { + rmSync(root, { recursive: true, force: true }); + } + }); });